From 347d3f92da458520b30e91e56bd66e472e084e70 Mon Sep 17 00:00:00 2001
From: Heiner Lohaus <hlohaus@users.noreply.github.com>
Date: Mon, 25 Mar 2024 21:06:51 +0100
Subject: Add .har file support for OpenaiChat Update model list of HuggingChat
 Update styles of scrollbar in gui Fix image upload in gui

---
 g4f/Provider/openai/crypt.py    |  66 +++++++++++++++++++++
 g4f/Provider/openai/har_file.py | 124 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 190 insertions(+)
 create mode 100644 g4f/Provider/openai/crypt.py
 create mode 100644 g4f/Provider/openai/har_file.py

(limited to 'g4f/Provider/openai')

diff --git a/g4f/Provider/openai/crypt.py b/g4f/Provider/openai/crypt.py
new file mode 100644
index 00000000..e7f35190
--- /dev/null
+++ b/g4f/Provider/openai/crypt.py
@@ -0,0 +1,66 @@
+import json
+import base64
+import hashlib
+import random
+from Crypto.Cipher import AES
+
+def pad(data: str) -> bytes:
+    # Convert the string to bytes and calculate the number of bytes to pad
+    data_bytes = data.encode()
+    padding = 16 - (len(data_bytes) % 16)
+    # Append the padding bytes with their value
+    return data_bytes + bytes([padding] * padding)
+
+def encrypt(data, key):
+    salt = ""
+    salted = ""
+    dx = bytes()
+
+    # Generate salt, as 8 random lowercase letters
+    salt = "".join(random.choice("abcdefghijklmnopqrstuvwxyz") for _ in range(8))
+
+    # Our final key and IV come from the key and salt being repeatedly hashed
+    for x in range(3):
+        dx = hashlib.md5(dx + key.encode() + salt.encode()).digest()
+        salted += dx.hex()
+
+    # Pad the data before encryption
+    data = pad(data)
+
+    aes = AES.new(
+        bytes.fromhex(salted[:64]), AES.MODE_CBC, bytes.fromhex(salted[64:96])
+    )
+
+    return json.dumps(
+        {
+            "ct": base64.b64encode(aes.encrypt(data)).decode(),
+            "iv": salted[64:96],
+            "s": salt.encode().hex(),
+        }
+    )
+
+def unpad(data: bytes) -> bytes:
+    # Extract the padding value from the last byte and remove padding
+    padding_value = data[-1]
+    return data[:-padding_value]
+
+def decrypt(data: str, key: str):
+    # Parse JSON data
+    parsed_data = json.loads(base64.b64decode(data))
+    ct = base64.b64decode(parsed_data["ct"])
+    iv = bytes.fromhex(parsed_data["iv"])
+    salt = bytes.fromhex(parsed_data["s"])
+
+    salted = ''
+    dx = b''
+    for x in range(3):
+        dx = hashlib.md5(dx + key.encode() + salt).digest()
+        salted += dx.hex()
+        
+    aes = AES.new(
+        bytes.fromhex(salted[:64]), AES.MODE_CBC, iv
+    )
+
+    data = aes.decrypt(ct)
+    if data.startswith(b'[{"key":'):
+        return unpad(data).decode()
\ No newline at end of file
diff --git a/g4f/Provider/openai/har_file.py b/g4f/Provider/openai/har_file.py
new file mode 100644
index 00000000..3e8535ad
--- /dev/null
+++ b/g4f/Provider/openai/har_file.py
@@ -0,0 +1,124 @@
+import base64
+import json
+import os
+import re
+import time
+import uuid
+import random
+from urllib.parse import unquote
+from copy import deepcopy
+
+from .crypt import decrypt, encrypt
+from ...requests import StreamSession
+
+arkPreURL = "https://tcr9i.chat.openai.com/fc/gt2/public_key/35536E1E-65B4-4D96-9D97-6ADB7EFF8147"
+sessionUrl = "https://chat.openai.com/api/auth/session"
+chatArk = None
+accessToken = None
+
+class arkReq:
+    def __init__(self, arkURL, arkBx, arkHeader, arkBody, arkCookies, userAgent):
+        self.arkURL = arkURL
+        self.arkBx = arkBx
+        self.arkHeader = arkHeader
+        self.arkBody = arkBody
+        self.arkCookies = arkCookies
+        self.userAgent = userAgent
+
+def readHAR():
+    dirPath = "./"
+    harPath = []
+    chatArks = []
+    accessToken = None
+    for root, dirs, files in os.walk(dirPath):
+        for file in files:
+            if file.endswith(".har"):
+                harPath.append(os.path.join(root, file))
+    if not harPath:
+        raise RuntimeError("No .har file found")
+    for path in harPath:
+        with open(path, 'r') as file:
+            try:
+                harFile = json.load(file)
+            except json.JSONDecodeError:
+                # Error: not a HAR file!
+                continue
+            for v in harFile['log']['entries']:
+                if arkPreURL in v['request']['url']:
+                    chatArks.append(parseHAREntry(v))
+                elif v['request']['url'] == sessionUrl:
+                    accessToken = json.loads(v["response"]["content"]["text"]).get("accessToken")
+    if not chatArks:
+        RuntimeError("No arkose requests found in .har files")
+    if not accessToken:
+        RuntimeError("No accessToken found in .har files")
+    return chatArks.pop(), accessToken
+
+def parseHAREntry(entry) -> arkReq:
+    tmpArk = arkReq(
+        arkURL=entry['request']['url'],
+        arkBx="",
+        arkHeader={h['name'].lower(): h['value'] for h in entry['request']['headers'] if h['name'].lower() not in ['content-length', 'cookie'] and not h['name'].startswith(':')},
+        arkBody={p['name']: unquote(p['value']) for p in entry['request']['postData']['params'] if p['name'] not in ['rnd']},
+        arkCookies=[{'name': c['name'], 'value': c['value'], 'expires': c['expires']} for c in entry['request']['cookies']],
+        userAgent=""
+    )
+    tmpArk.userAgent = tmpArk.arkHeader.get('user-agent', '')
+    bda = tmpArk.arkBody["bda"]
+    bw = tmpArk.arkHeader['x-ark-esync-value']
+    tmpArk.arkBx = decrypt(bda, tmpArk.userAgent + bw)
+    return tmpArk
+
+def genArkReq(chatArk: arkReq) -> arkReq:
+    if not chatArk:
+        raise RuntimeError("No .har file with arkose found")
+
+    tmpArk: arkReq = deepcopy(chatArk)
+    if tmpArk is None or not tmpArk.arkBody or not tmpArk.arkHeader:
+        raise RuntimeError("The .har file is not valid")
+    bda, bw = getBDA(tmpArk)
+
+    tmpArk.arkBody['bda'] = base64.b64encode(bda.encode()).decode()
+    tmpArk.arkBody['rnd'] = str(random.random())
+    tmpArk.arkHeader['x-ark-esync-value'] = bw
+    tmpArk.arkCookies = {cookie['name']: cookie['value'] for cookie in tmpArk.arkCookies}
+    return tmpArk
+
+async def sendRequest(tmpArk: arkReq, proxy: str = None):
+    async with StreamSession(headers=tmpArk.arkHeader, cookies=tmpArk.arkCookies, proxies={"https": proxy}) as session:
+        async with session.post(tmpArk.arkURL, data=tmpArk.arkBody) as response:
+            arkose = (await response.json()).get("token")
+    if "sup=1|rid=" not in arkose:
+        return RuntimeError("No valid arkose token generated")
+    return arkose
+
+def getBDA(arkReq: arkReq):
+    bx = arkReq.arkBx
+    
+    bx = re.sub(r'"key":"n","value":"\S*?"', f'"key":"n","value":"{getN()}"', bx)
+    oldUUID_search = re.search(r'"key":"4b4b269e68","value":"(\S*?)"', bx)
+    if oldUUID_search:
+        oldUUID = oldUUID_search.group(1)
+        newUUID = str(uuid.uuid4())
+        bx = bx.replace(oldUUID, newUUID)
+
+    bw = getBw(getBt())
+    encrypted_bx = encrypt(bx, arkReq.userAgent + bw)
+    return encrypted_bx, bw
+
+def getBt() -> int:
+    return int(time.time())
+
+def getBw(bt: int) -> str:
+    return str(bt - (bt % 21600))
+
+def getN() -> str:
+    timestamp = str(int(time.time()))
+    return base64.b64encode(timestamp.encode()).decode()
+
+async def getArkoseAndAccessToken(proxy: str):
+    global chatArk, accessToken
+    if chatArk is None or accessToken is None:
+        chatArk, accessToken = readHAR()
+    newReq = genArkReq(chatArk)
+    return await sendRequest(newReq, proxy), accessToken, newReq.arkCookies
\ No newline at end of file
-- 
cgit v1.2.3