From 347d3f92da458520b30e91e56bd66e472e084e70 Mon Sep 17 00:00:00 2001 From: Heiner Lohaus Date: Mon, 25 Mar 2024 21:06:51 +0100 Subject: Add .har file support for OpenaiChat Update model list of HuggingChat Update styles of scrollbar in gui Fix image upload in gui --- g4f/Provider/HuggingChat.py | 16 +++-- g4f/Provider/needs_auth/OpenaiChat.py | 72 ++++---------------- g4f/Provider/openai/crypt.py | 66 ++++++++++++++++++ g4f/Provider/openai/har_file.py | 124 ++++++++++++++++++++++++++++++++++ g4f/gui/client/index.html | 4 +- g4f/gui/client/static/css/style.css | 14 ++++ g4f/gui/client/static/js/chat.v1.js | 2 +- g4f/gui/server/api.py | 3 +- 8 files changed, 233 insertions(+), 68 deletions(-) create mode 100644 g4f/Provider/openai/crypt.py create mode 100644 g4f/Provider/openai/har_file.py (limited to 'g4f') diff --git a/g4f/Provider/HuggingChat.py b/g4f/Provider/HuggingChat.py index 52c5ae31..5c95b679 100644 --- a/g4f/Provider/HuggingChat.py +++ b/g4f/Provider/HuggingChat.py @@ -1,7 +1,7 @@ from __future__ import annotations import json - +import requests from aiohttp import ClientSession, BaseConnector from ..typing import AsyncResult, Messages @@ -14,19 +14,27 @@ class HuggingChat(AsyncGeneratorProvider, ProviderModelMixin): working = True default_model = "meta-llama/Llama-2-70b-chat-hf" models = [ - "google/gemma-7b-it", "mistralai/Mixtral-8x7B-Instruct-v0.1", + "google/gemma-7b-it", "meta-llama/Llama-2-70b-chat-hf", "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", "codellama/CodeLlama-34b-Instruct-hf", "mistralai/Mistral-7B-Instruct-v0.2", "openchat/openchat-3.5-0106", - "codellama/CodeLlama-70b-Instruct-hf" ] model_aliases = { - "openchat/openchat_3.5": "openchat/openchat-3.5-1210", + "openchat/openchat_3.5": "openchat/openchat-3.5-0106", } + @classmethod + def get_models(cls): + if not cls.models: + url = f"{cls.url}/__data.json" + data = requests.get(url).json()["nodes"][0]["data"] + models = [data[key]["name"] for key in data[data[0]["models"]]] + cls.models = [data[key] for key in models] + return cls.models + @classmethod async def create_async_generator( cls, diff --git a/g4f/Provider/needs_auth/OpenaiChat.py b/g4f/Provider/needs_auth/OpenaiChat.py index 8a5a03d4..0aff99a7 100644 --- a/g4f/Provider/needs_auth/OpenaiChat.py +++ b/g4f/Provider/needs_auth/OpenaiChat.py @@ -8,12 +8,6 @@ import base64 import time from aiohttp import ClientWebSocketResponse -try: - from py_arkose_generator.arkose import get_values_for_request - has_arkose_generator = True -except ImportError: - has_arkose_generator = False - try: import webview has_webview = True @@ -35,6 +29,7 @@ from ...requests import get_args_from_browser, raise_for_status from ...requests.aiohttp import StreamSession from ...image import to_image, to_bytes, ImageResponse, ImageRequest from ...errors import MissingRequirementsError, MissingAuthError, ProviderNotWorkingError +from ..openai.har_file import getArkoseAndAccessToken from ... import debug class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): @@ -353,18 +348,6 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): timeout=timeout ) as session: api_key = kwargs["access_token"] if "access_token" in kwargs else api_key - if cls._headers is None or cls._expires is None or time.time() > cls._expires: - if cls._headers is None: - cookies = get_cookies("chat.openai.com", False) if cookies is None else cookies - api_key = cookies["access_token"] if "access_token" in cookies else api_key - if api_key is None: - try: - await cls.webview_access_token() if has_webview else None - except Exception as e: - if debug.logging: - print(f"Use webview failed: {e}") - else: - api_key = cls._api_key if api_key is None else api_key if api_key is not None: cls._create_request_args(cookies) @@ -380,14 +363,12 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): if debug.logging: print("OpenaiChat: Load default_model failed") print(f"{e.__class__.__name__}: {e}") + + arkose_token = None if cls.default_model is None: - login_url = os.environ.get("G4F_LOGIN_URL") - if login_url: - yield f"Please login: [ChatGPT]({login_url})\n\n" - try: - cls.browse_access_token(proxy) - except MissingRequirementsError: - raise MissingAuthError(f'Missing "access_token". Add a "api_key" please') + arkose_token, api_key, cookies = await getArkoseAndAccessToken(proxy) + cls._create_request_args(cookies) + cls._set_api_key(api_key) cls.default_model = cls.get_model(await cls.get_default_model(session, cls._headers)) async with session.post( @@ -402,9 +383,10 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): need_arkose = data["arkose"]["required"] chat_token = data["token"] - if need_arkose and not has_arkose_generator: - raise ProviderNotWorkingError("OpenAI Plus Subscriber are not working") - raise MissingRequirementsError('Install "py-arkose-generator" package') + if need_arkose and arkose_token is None: + arkose_token, api_key, cookies = await getArkoseAndAccessToken(proxy) + cls._create_request_args(cookies) + cls._set_api_key(api_key) try: image_request = await cls.upload_image(session, cls._headers, image, image_name) if image else None @@ -439,8 +421,7 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): **cls._headers } if need_arkose: - raise ProviderNotWorkingError("OpenAI Plus Subscriber are not working") - headers["OpenAI-Sentinel-Arkose-Token"] = await cls.get_arkose_token(session, cls._headers, blob) + headers["OpenAI-Sentinel-Arkose-Token"] = arkose_token headers["OpenAI-Sentinel-Chat-Requirements-Token"] = chat_token async with session.post( @@ -491,7 +472,7 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): ): yield chunk finally: - await ws.aclose() + await ws.aclose() if hasattr(ws, "aclose") else await ws.close() break async for chunk in cls.iter_messages_line(session, message, fields): if fields.finish_reason is not None: @@ -611,35 +592,6 @@ this.fetch = async (url, options) => { finally: driver.close() - @classmethod - async def get_arkose_token(cls, session: StreamSession, headers: dict, blob: str) -> str: - """ - Obtain an Arkose token for the session. - - Args: - session (StreamSession): The session object. - - Returns: - str: The Arkose token. - - Raises: - RuntimeError: If unable to retrieve the token. - """ - config = { - "pkey": "35536E1E-65B4-4D96-9D97-6ADB7EFF8147", - "surl": "https://tcr9i.chat.openai.com", - "headers": headers, - "site": cls.url, - "data": {"blob": blob} - } - args_for_request = get_values_for_request(config) - async with session.post(**args_for_request) as response: - await raise_for_status(response) - decoded_json = await response.json() - if "token" in decoded_json: - return decoded_json["token"] - raise RuntimeError(f"Response: {decoded_json}") - @classmethod async def fetch_access_token(cls, session: StreamSession, headers: dict): async with session.get( diff --git a/g4f/Provider/openai/crypt.py b/g4f/Provider/openai/crypt.py new file mode 100644 index 00000000..e7f35190 --- /dev/null +++ b/g4f/Provider/openai/crypt.py @@ -0,0 +1,66 @@ +import json +import base64 +import hashlib +import random +from Crypto.Cipher import AES + +def pad(data: str) -> bytes: + # Convert the string to bytes and calculate the number of bytes to pad + data_bytes = data.encode() + padding = 16 - (len(data_bytes) % 16) + # Append the padding bytes with their value + return data_bytes + bytes([padding] * padding) + +def encrypt(data, key): + salt = "" + salted = "" + dx = bytes() + + # Generate salt, as 8 random lowercase letters + salt = "".join(random.choice("abcdefghijklmnopqrstuvwxyz") for _ in range(8)) + + # Our final key and IV come from the key and salt being repeatedly hashed + for x in range(3): + dx = hashlib.md5(dx + key.encode() + salt.encode()).digest() + salted += dx.hex() + + # Pad the data before encryption + data = pad(data) + + aes = AES.new( + bytes.fromhex(salted[:64]), AES.MODE_CBC, bytes.fromhex(salted[64:96]) + ) + + return json.dumps( + { + "ct": base64.b64encode(aes.encrypt(data)).decode(), + "iv": salted[64:96], + "s": salt.encode().hex(), + } + ) + +def unpad(data: bytes) -> bytes: + # Extract the padding value from the last byte and remove padding + padding_value = data[-1] + return data[:-padding_value] + +def decrypt(data: str, key: str): + # Parse JSON data + parsed_data = json.loads(base64.b64decode(data)) + ct = base64.b64decode(parsed_data["ct"]) + iv = bytes.fromhex(parsed_data["iv"]) + salt = bytes.fromhex(parsed_data["s"]) + + salted = '' + dx = b'' + for x in range(3): + dx = hashlib.md5(dx + key.encode() + salt).digest() + salted += dx.hex() + + aes = AES.new( + bytes.fromhex(salted[:64]), AES.MODE_CBC, iv + ) + + data = aes.decrypt(ct) + if data.startswith(b'[{"key":'): + return unpad(data).decode() \ No newline at end of file diff --git a/g4f/Provider/openai/har_file.py b/g4f/Provider/openai/har_file.py new file mode 100644 index 00000000..3e8535ad --- /dev/null +++ b/g4f/Provider/openai/har_file.py @@ -0,0 +1,124 @@ +import base64 +import json +import os +import re +import time +import uuid +import random +from urllib.parse import unquote +from copy import deepcopy + +from .crypt import decrypt, encrypt +from ...requests import StreamSession + +arkPreURL = "https://tcr9i.chat.openai.com/fc/gt2/public_key/35536E1E-65B4-4D96-9D97-6ADB7EFF8147" +sessionUrl = "https://chat.openai.com/api/auth/session" +chatArk = None +accessToken = None + +class arkReq: + def __init__(self, arkURL, arkBx, arkHeader, arkBody, arkCookies, userAgent): + self.arkURL = arkURL + self.arkBx = arkBx + self.arkHeader = arkHeader + self.arkBody = arkBody + self.arkCookies = arkCookies + self.userAgent = userAgent + +def readHAR(): + dirPath = "./" + harPath = [] + chatArks = [] + accessToken = None + for root, dirs, files in os.walk(dirPath): + for file in files: + if file.endswith(".har"): + harPath.append(os.path.join(root, file)) + if not harPath: + raise RuntimeError("No .har file found") + for path in harPath: + with open(path, 'r') as file: + try: + harFile = json.load(file) + except json.JSONDecodeError: + # Error: not a HAR file! + continue + for v in harFile['log']['entries']: + if arkPreURL in v['request']['url']: + chatArks.append(parseHAREntry(v)) + elif v['request']['url'] == sessionUrl: + accessToken = json.loads(v["response"]["content"]["text"]).get("accessToken") + if not chatArks: + RuntimeError("No arkose requests found in .har files") + if not accessToken: + RuntimeError("No accessToken found in .har files") + return chatArks.pop(), accessToken + +def parseHAREntry(entry) -> arkReq: + tmpArk = arkReq( + arkURL=entry['request']['url'], + arkBx="", + arkHeader={h['name'].lower(): h['value'] for h in entry['request']['headers'] if h['name'].lower() not in ['content-length', 'cookie'] and not h['name'].startswith(':')}, + arkBody={p['name']: unquote(p['value']) for p in entry['request']['postData']['params'] if p['name'] not in ['rnd']}, + arkCookies=[{'name': c['name'], 'value': c['value'], 'expires': c['expires']} for c in entry['request']['cookies']], + userAgent="" + ) + tmpArk.userAgent = tmpArk.arkHeader.get('user-agent', '') + bda = tmpArk.arkBody["bda"] + bw = tmpArk.arkHeader['x-ark-esync-value'] + tmpArk.arkBx = decrypt(bda, tmpArk.userAgent + bw) + return tmpArk + +def genArkReq(chatArk: arkReq) -> arkReq: + if not chatArk: + raise RuntimeError("No .har file with arkose found") + + tmpArk: arkReq = deepcopy(chatArk) + if tmpArk is None or not tmpArk.arkBody or not tmpArk.arkHeader: + raise RuntimeError("The .har file is not valid") + bda, bw = getBDA(tmpArk) + + tmpArk.arkBody['bda'] = base64.b64encode(bda.encode()).decode() + tmpArk.arkBody['rnd'] = str(random.random()) + tmpArk.arkHeader['x-ark-esync-value'] = bw + tmpArk.arkCookies = {cookie['name']: cookie['value'] for cookie in tmpArk.arkCookies} + return tmpArk + +async def sendRequest(tmpArk: arkReq, proxy: str = None): + async with StreamSession(headers=tmpArk.arkHeader, cookies=tmpArk.arkCookies, proxies={"https": proxy}) as session: + async with session.post(tmpArk.arkURL, data=tmpArk.arkBody) as response: + arkose = (await response.json()).get("token") + if "sup=1|rid=" not in arkose: + return RuntimeError("No valid arkose token generated") + return arkose + +def getBDA(arkReq: arkReq): + bx = arkReq.arkBx + + bx = re.sub(r'"key":"n","value":"\S*?"', f'"key":"n","value":"{getN()}"', bx) + oldUUID_search = re.search(r'"key":"4b4b269e68","value":"(\S*?)"', bx) + if oldUUID_search: + oldUUID = oldUUID_search.group(1) + newUUID = str(uuid.uuid4()) + bx = bx.replace(oldUUID, newUUID) + + bw = getBw(getBt()) + encrypted_bx = encrypt(bx, arkReq.userAgent + bw) + return encrypted_bx, bw + +def getBt() -> int: + return int(time.time()) + +def getBw(bt: int) -> str: + return str(bt - (bt % 21600)) + +def getN() -> str: + timestamp = str(int(time.time())) + return base64.b64encode(timestamp.encode()).decode() + +async def getArkoseAndAccessToken(proxy: str): + global chatArk, accessToken + if chatArk is None or accessToken is None: + chatArk, accessToken = readHAR() + newReq = genArkReq(chatArk) + return await sendRequest(newReq, proxy), accessToken, newReq.arkCookies \ No newline at end of file diff --git a/g4f/gui/client/index.html b/g4f/gui/client/index.html index 6b9b1ab9..5d40b70e 100644 --- a/g4f/gui/client/index.html +++ b/g4f/gui/client/index.html @@ -133,11 +133,11 @@
-