From 347d3f92da458520b30e91e56bd66e472e084e70 Mon Sep 17 00:00:00 2001 From: Heiner Lohaus Date: Mon, 25 Mar 2024 21:06:51 +0100 Subject: Add .har file support for OpenaiChat Update model list of HuggingChat Update styles of scrollbar in gui Fix image upload in gui --- g4f/Provider/openai/crypt.py | 66 +++++++++++++++++++++ g4f/Provider/openai/har_file.py | 124 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 190 insertions(+) create mode 100644 g4f/Provider/openai/crypt.py create mode 100644 g4f/Provider/openai/har_file.py (limited to 'g4f/Provider/openai') diff --git a/g4f/Provider/openai/crypt.py b/g4f/Provider/openai/crypt.py new file mode 100644 index 00000000..e7f35190 --- /dev/null +++ b/g4f/Provider/openai/crypt.py @@ -0,0 +1,66 @@ +import json +import base64 +import hashlib +import random +from Crypto.Cipher import AES + +def pad(data: str) -> bytes: + # Convert the string to bytes and calculate the number of bytes to pad + data_bytes = data.encode() + padding = 16 - (len(data_bytes) % 16) + # Append the padding bytes with their value + return data_bytes + bytes([padding] * padding) + +def encrypt(data, key): + salt = "" + salted = "" + dx = bytes() + + # Generate salt, as 8 random lowercase letters + salt = "".join(random.choice("abcdefghijklmnopqrstuvwxyz") for _ in range(8)) + + # Our final key and IV come from the key and salt being repeatedly hashed + for x in range(3): + dx = hashlib.md5(dx + key.encode() + salt.encode()).digest() + salted += dx.hex() + + # Pad the data before encryption + data = pad(data) + + aes = AES.new( + bytes.fromhex(salted[:64]), AES.MODE_CBC, bytes.fromhex(salted[64:96]) + ) + + return json.dumps( + { + "ct": base64.b64encode(aes.encrypt(data)).decode(), + "iv": salted[64:96], + "s": salt.encode().hex(), + } + ) + +def unpad(data: bytes) -> bytes: + # Extract the padding value from the last byte and remove padding + padding_value = data[-1] + return data[:-padding_value] + +def decrypt(data: str, key: str): + # Parse JSON data + parsed_data = json.loads(base64.b64decode(data)) + ct = base64.b64decode(parsed_data["ct"]) + iv = bytes.fromhex(parsed_data["iv"]) + salt = bytes.fromhex(parsed_data["s"]) + + salted = '' + dx = b'' + for x in range(3): + dx = hashlib.md5(dx + key.encode() + salt).digest() + salted += dx.hex() + + aes = AES.new( + bytes.fromhex(salted[:64]), AES.MODE_CBC, iv + ) + + data = aes.decrypt(ct) + if data.startswith(b'[{"key":'): + return unpad(data).decode() \ No newline at end of file diff --git a/g4f/Provider/openai/har_file.py b/g4f/Provider/openai/har_file.py new file mode 100644 index 00000000..3e8535ad --- /dev/null +++ b/g4f/Provider/openai/har_file.py @@ -0,0 +1,124 @@ +import base64 +import json +import os +import re +import time +import uuid +import random +from urllib.parse import unquote +from copy import deepcopy + +from .crypt import decrypt, encrypt +from ...requests import StreamSession + +arkPreURL = "https://tcr9i.chat.openai.com/fc/gt2/public_key/35536E1E-65B4-4D96-9D97-6ADB7EFF8147" +sessionUrl = "https://chat.openai.com/api/auth/session" +chatArk = None +accessToken = None + +class arkReq: + def __init__(self, arkURL, arkBx, arkHeader, arkBody, arkCookies, userAgent): + self.arkURL = arkURL + self.arkBx = arkBx + self.arkHeader = arkHeader + self.arkBody = arkBody + self.arkCookies = arkCookies + self.userAgent = userAgent + +def readHAR(): + dirPath = "./" + harPath = [] + chatArks = [] + accessToken = None + for root, dirs, files in os.walk(dirPath): + for file in files: + if file.endswith(".har"): + harPath.append(os.path.join(root, file)) + if not harPath: + raise RuntimeError("No .har file found") + for path in harPath: + with open(path, 'r') as file: + try: + harFile = json.load(file) + except json.JSONDecodeError: + # Error: not a HAR file! + continue + for v in harFile['log']['entries']: + if arkPreURL in v['request']['url']: + chatArks.append(parseHAREntry(v)) + elif v['request']['url'] == sessionUrl: + accessToken = json.loads(v["response"]["content"]["text"]).get("accessToken") + if not chatArks: + RuntimeError("No arkose requests found in .har files") + if not accessToken: + RuntimeError("No accessToken found in .har files") + return chatArks.pop(), accessToken + +def parseHAREntry(entry) -> arkReq: + tmpArk = arkReq( + arkURL=entry['request']['url'], + arkBx="", + arkHeader={h['name'].lower(): h['value'] for h in entry['request']['headers'] if h['name'].lower() not in ['content-length', 'cookie'] and not h['name'].startswith(':')}, + arkBody={p['name']: unquote(p['value']) for p in entry['request']['postData']['params'] if p['name'] not in ['rnd']}, + arkCookies=[{'name': c['name'], 'value': c['value'], 'expires': c['expires']} for c in entry['request']['cookies']], + userAgent="" + ) + tmpArk.userAgent = tmpArk.arkHeader.get('user-agent', '') + bda = tmpArk.arkBody["bda"] + bw = tmpArk.arkHeader['x-ark-esync-value'] + tmpArk.arkBx = decrypt(bda, tmpArk.userAgent + bw) + return tmpArk + +def genArkReq(chatArk: arkReq) -> arkReq: + if not chatArk: + raise RuntimeError("No .har file with arkose found") + + tmpArk: arkReq = deepcopy(chatArk) + if tmpArk is None or not tmpArk.arkBody or not tmpArk.arkHeader: + raise RuntimeError("The .har file is not valid") + bda, bw = getBDA(tmpArk) + + tmpArk.arkBody['bda'] = base64.b64encode(bda.encode()).decode() + tmpArk.arkBody['rnd'] = str(random.random()) + tmpArk.arkHeader['x-ark-esync-value'] = bw + tmpArk.arkCookies = {cookie['name']: cookie['value'] for cookie in tmpArk.arkCookies} + return tmpArk + +async def sendRequest(tmpArk: arkReq, proxy: str = None): + async with StreamSession(headers=tmpArk.arkHeader, cookies=tmpArk.arkCookies, proxies={"https": proxy}) as session: + async with session.post(tmpArk.arkURL, data=tmpArk.arkBody) as response: + arkose = (await response.json()).get("token") + if "sup=1|rid=" not in arkose: + return RuntimeError("No valid arkose token generated") + return arkose + +def getBDA(arkReq: arkReq): + bx = arkReq.arkBx + + bx = re.sub(r'"key":"n","value":"\S*?"', f'"key":"n","value":"{getN()}"', bx) + oldUUID_search = re.search(r'"key":"4b4b269e68","value":"(\S*?)"', bx) + if oldUUID_search: + oldUUID = oldUUID_search.group(1) + newUUID = str(uuid.uuid4()) + bx = bx.replace(oldUUID, newUUID) + + bw = getBw(getBt()) + encrypted_bx = encrypt(bx, arkReq.userAgent + bw) + return encrypted_bx, bw + +def getBt() -> int: + return int(time.time()) + +def getBw(bt: int) -> str: + return str(bt - (bt % 21600)) + +def getN() -> str: + timestamp = str(int(time.time())) + return base64.b64encode(timestamp.encode()).decode() + +async def getArkoseAndAccessToken(proxy: str): + global chatArk, accessToken + if chatArk is None or accessToken is None: + chatArk, accessToken = readHAR() + newReq = genArkReq(chatArk) + return await sendRequest(newReq, proxy), accessToken, newReq.arkCookies \ No newline at end of file -- cgit v1.2.3