From 5bcf21f9bd2dd9ea581e5301113facda6fc28426 Mon Sep 17 00:00:00 2001 From: Heiner Lohaus Date: Tue, 12 Mar 2024 18:45:22 +0100 Subject: Add count chars to gui, Add retry support to fix rate limit in Bing --- g4f/Provider/Bing.py | 139 +++++++++++++++++++++++--------------- g4f/Provider/bing/conversation.py | 34 ++-------- g4f/Provider/bing/upload_image.py | 12 ++-- g4f/cookies.py | 2 +- g4f/gui/client/js/chat.v1.js | 6 +- g4f/providers/helper.py | 12 ++-- g4f/requests/__init__.py | 26 ++++--- g4f/webdriver.py | 5 +- 8 files changed, 123 insertions(+), 113 deletions(-) (limited to 'g4f') diff --git a/g4f/Provider/Bing.py b/g4f/Provider/Bing.py index 77178686..ca431355 100644 --- a/g4f/Provider/Bing.py +++ b/g4f/Provider/Bing.py @@ -4,16 +4,20 @@ import random import json import uuid import time +import asyncio from urllib import parse -from aiohttp import ClientSession, ClientTimeout, BaseConnector +from datetime import datetime +from aiohttp import ClientSession, ClientTimeout, BaseConnector, WSMsgType from ..typing import AsyncResult, Messages, ImageType, Cookies from ..image import ImageResponse, ImageRequest +from ..errors import ResponseStatusError from .base_provider import AsyncGeneratorProvider -from .helper import get_connector +from .helper import get_connector, get_random_hex from .bing.upload_image import upload_image from .bing.create_images import create_images from .bing.conversation import Conversation, create_conversation, delete_conversation +from .. import debug class Tones: """ @@ -65,8 +69,6 @@ class Bing(AsyncGeneratorProvider): prompt = messages[-1]["content"] context = create_context(messages[:-1]) - cookies = {**get_default_cookies(), **cookies} if cookies else get_default_cookies() - gpt4_turbo = True if model.startswith("gpt-4-turbo") else False return stream_generate(prompt, tone, image, context, cookies, get_connector(connector, proxy, True), web_search, gpt4_turbo, timeout) @@ -86,6 +88,24 @@ def create_context(messages: Messages) -> str: def get_ip_address() -> str: return f"13.{random.randint(104, 107)}.{random.randint(0, 255)}.{random.randint(0, 255)}" +def get_default_cookies(): + return { + 'SRCHD' : 'AF=NOFORM', + 'PPLState' : '1', + 'KievRPSSecAuth': '', + 'SUID' : '', + 'SRCHUSR' : '', + 'SRCHHPGUSR' : f'HV={int(time.time())}', + } + +def create_headers(cookies: Cookies = None) -> dict: + if cookies is None: + cookies = get_default_cookies() + headers = Defaults.headers.copy() + headers["cookie"] = "; ".join(f"{k}={v}" for k, v in cookies.items()) + headers["x-forwarded-for"] = get_ip_address() + return headers + class Defaults: """ Default settings and configurations for the Bing provider. @@ -169,37 +189,26 @@ class Defaults: } # Default headers for requests + home = 'https://www.bing.com/chat?q=Bing+AI&FORM=hpcodx' headers = { - 'accept': '*/*', - 'accept-language': 'en-US,en;q=0.9', - 'cache-control': 'max-age=0', - 'sec-ch-ua': '"Chromium";v="110", "Not A(Brand";v="24", "Microsoft Edge";v="110"', - 'sec-ch-ua-arch': '"x86"', - 'sec-ch-ua-bitness': '"64"', - 'sec-ch-ua-full-version': '"110.0.1587.69"', - 'sec-ch-ua-full-version-list': '"Chromium";v="110.0.5481.192", "Not A(Brand";v="24.0.0.0", "Microsoft Edge";v="110.0.1587.69"', + 'sec-ch-ua': '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"', 'sec-ch-ua-mobile': '?0', + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36', + 'sec-ch-ua-arch': '"x86"', + 'sec-ch-ua-full-version': '"122.0.6261.69"', + 'accept': 'application/json', + 'sec-ch-ua-platform-version': '"15.0.0"', + "x-ms-client-request-id": str(uuid.uuid4()), + 'sec-ch-ua-full-version-list': '"Chromium";v="122.0.6261.69", "Not(A:Brand";v="24.0.0.0", "Google Chrome";v="122.0.6261.69"', + 'x-ms-useragent': 'azsdk-js-api-client-factory/1.0.0-beta.1 core-rest-pipeline/1.12.3 OS/Windows', 'sec-ch-ua-model': '""', 'sec-ch-ua-platform': '"Windows"', - 'sec-ch-ua-platform-version': '"15.0.0"', - 'sec-fetch-dest': 'document', - 'sec-fetch-mode': 'navigate', - 'sec-fetch-site': 'none', - 'sec-fetch-user': '?1', - 'upgrade-insecure-requests': '1', - 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.69', - 'x-edge-shopping-flag': '1', - 'x-forwarded-for': get_ip_address(), - } - -def get_default_cookies(): - return { - 'SRCHD' : 'AF=NOFORM', - 'PPLState' : '1', - 'KievRPSSecAuth': '', - 'SUID' : '', - 'SRCHUSR' : '', - 'SRCHHPGUSR' : f'HV={int(time.time())}', + 'sec-fetch-site': 'same-origin', + 'sec-fetch-mode': 'cors', + 'sec-fetch-dest': 'empty', + 'referer': home, + 'accept-encoding': 'gzip, deflate, br', + 'accept-language': 'en-US,en;q=0.9', } def format_message(msg: dict) -> str: @@ -234,8 +243,6 @@ def create_message( """ options_sets = [] - if not web_search: - options_sets.append("nosearchall") if gpt4_turbo: options_sets.append("dlgpt4t") @@ -249,7 +256,7 @@ def create_message( "verbosity": "verbose", "scenario": "SERP", "plugins": [{"id": "c310c353-b9f0-4d76-ab0d-1dd5e979cf68", "category": 1}] if web_search else [], - "traceId": str(uuid.uuid4()), + "traceId": get_random_hex(40), "conversationHistoryOptionsSets": ["autosave","savemem","uprofupd","uprofgen"], "gptId": "copilot", "isStartOfSession": True, @@ -257,7 +264,7 @@ def create_message( "message":{ **Defaults.location, "userIpAddress": get_ip_address(), - "timestamp": "2024-03-11T22:40:36+01:00", + "timestamp": datetime.now().isoformat(), "author": "user", "inputMethod": "Keyboard", "text": prompt, @@ -266,6 +273,7 @@ def create_message( "messageId": request_id }, "tone": tone, + "extraExtensionParameters": {"gpt-creator-persona": {"personaId": "copilot"}}, "spokenTextMode": "None", "conversationId": conversation.conversationId, "participant": {"id": conversation.clientId} @@ -301,7 +309,10 @@ async def stream_generate( connector: BaseConnector = None, web_search: bool = False, gpt4_turbo: bool = False, - timeout: int = 900 + timeout: int = 900, + conversation: Conversation = None, + max_retries: int = 5, + sleep_retry: int = 15 ): """ Asynchronously streams generated responses from the Bing API. @@ -316,20 +327,30 @@ async def stream_generate( :param timeout: Timeout for the request. :return: An asynchronous generator yielding responses. """ - headers = Defaults.headers - if cookies: - headers["cookie"] = "; ".join(f"{k}={v}" for k, v in cookies.items()) + headers = create_headers(cookies) async with ClientSession( - headers=headers, cookies=cookies, timeout=ClientTimeout(total=timeout), connector=connector ) as session: - conversation = await create_conversation(session) - image_request = await upload_image(session, image, tone) if image else None - try: + while conversation is None: + do_read = True + try: + conversation = await create_conversation(session, headers) + except ResponseStatusError as e: + max_retries -= 1 + if max_retries < 1: + raise e + if debug.logging: + print(f"Bing: Retry: {e}") + headers = create_headers() + await asyncio.sleep(sleep_retry) + continue + + image_request = await upload_image(session, image, tone, headers) if image else None async with session.ws_connect( 'wss://sydney.bing.com/sydney/ChatHub', autoping=False, - params={'sec_access_token': conversation.conversationSignature} + params={'sec_access_token': conversation.conversationSignature}, + headers=headers ) as wss: await wss.send_str(format_message({'protocol': 'json', 'version': 1})) await wss.send_str(format_message({"type": 6})) @@ -337,11 +358,12 @@ async def stream_generate( await wss.send_str(create_message(conversation, prompt, tone, context, image_request, web_search, gpt4_turbo)) response_txt = '' returned_text = '' - final = False message_id = None - while not final: + while do_read: msg = await wss.receive(timeout=timeout) - if not msg.data: + if msg.type == WSMsgType.CLOSED: + break + if msg.type != WSMsgType.TEXT or not msg.data: continue objects = msg.data.split(Defaults.delimiter) for obj in objects: @@ -350,7 +372,6 @@ async def stream_generate( response = json.loads(obj) if response and response.get('type') == 1 and response['arguments'][0].get('messages'): message = response['arguments'][0]['messages'][0] - # Reset memory, if we have a new message if message_id is not None and message_id != message["messageId"]: returned_text = '' message_id = message["messageId"] @@ -369,7 +390,7 @@ async def stream_generate( image_response = ImageResponse(await create_images(session, prompt), prompt, {"preview": "{image}?w=200&h=200"}) except: response_txt += f"\nhttps://www.bing.com/images/create?q={parse.quote(prompt)}" - final = True + do_read = False if response_txt.startswith(returned_text): new = response_txt[len(returned_text):] if new != "\n": @@ -380,10 +401,18 @@ async def stream_generate( elif response.get('type') == 2: result = response['item']['result'] if result.get('error'): - if result["value"] == "CaptchaChallenge": - raise Exception(f"{result['value']}: Use other cookies or/and ip address") - else: - raise Exception(f"{result['value']}: {result['message']}") + max_retries -= 1 + if max_retries < 1: + if result["value"] == "CaptchaChallenge": + raise RuntimeError(f"{result['value']}: Use other cookies or/and ip address") + else: + raise RuntimeError(f"{result['value']}: {result['message']}") + if debug.logging: + print(f"Bing: Retry: {result['value']}: {result['message']}") + headers = create_headers() + do_read = False + conversation = None + await asyncio.sleep(sleep_retry) + break return - finally: - await delete_conversation(session, conversation) + await delete_conversation(session, headers, conversation) diff --git a/g4f/Provider/bing/conversation.py b/g4f/Provider/bing/conversation.py index 03f17ee7..da842808 100644 --- a/g4f/Provider/bing/conversation.py +++ b/g4f/Provider/bing/conversation.py @@ -1,8 +1,6 @@ from __future__ import annotations -import uuid from aiohttp import ClientSession -from ...errors import ResponseStatusError from ...requests import raise_for_status class Conversation: @@ -22,7 +20,7 @@ class Conversation: self.clientId = clientId self.conversationSignature = conversationSignature -async def create_conversation(session: ClientSession, proxy: str = None) -> Conversation: +async def create_conversation(session: ClientSession, headers: dict) -> Conversation: """ Create a new conversation asynchronously. @@ -33,33 +31,15 @@ async def create_conversation(session: ClientSession, proxy: str = None) -> Conv Returns: Conversation: An instance representing the created conversation. """ - url = 'https://www.bing.com/search?toncp=0&FORM=hpcodx&q=Bing+AI&showconv=1&cc=en' - headers = { - "cookie": "; ".join(f"{c.key}={c.value}" for c in session.cookie_jar) - } + url = "https://www.bing.com/turing/conversation/create?bundleVersion=1.1626.1" async with session.get(url, headers=headers) as response: - await raise_for_status(response) - headers = { - "accept": "application/json", - "sec-fetch-dest": "empty", - "sec-fetch-mode": "cors", - "sec-fetch-site": "same-origin", - "x-ms-client-request-id": str(uuid.uuid4()), - "x-ms-useragent": "azsdk-js-api-client-factory/1.0.0-beta.1 core-rest-pipeline/1.12.3 OS/Windows", - "referer": "https://www.bing.com/search?toncp=0&FORM=hpcodx&q=Bing+AI&showconv=1&cc=en", - "cookie": "; ".join(f"{c.key}={c.value}" for c in session.cookie_jar) - } - url = "https://www.bing.com/turing/conversation/create?bundleVersion=1.1634.0-service-contracts" - async with session.get(url, headers=headers, proxy=proxy) as response: - if response.status == 404: - raise ResponseStatusError(f"Response {response.status}: Can't create a new chat") - await raise_for_status(response) + await raise_for_status(response, "Failed to create conversation") data = await response.json() conversationId = data.get('conversationId') clientId = data.get('clientId') conversationSignature = response.headers.get('X-Sydney-Encryptedconversationsignature') if not conversationId or not clientId or not conversationSignature: - raise Exception('Failed to create conversation.') + raise RuntimeError('Empty fields: Failed to create conversation') return Conversation(conversationId, clientId, conversationSignature) async def list_conversations(session: ClientSession) -> list: @@ -76,8 +56,8 @@ async def list_conversations(session: ClientSession) -> list: async with session.get(url) as response: response = await response.json() return response["chats"] - -async def delete_conversation(session: ClientSession, conversation: Conversation, proxy: str = None) -> bool: + +async def delete_conversation(session: ClientSession, conversation: Conversation, headers: dict) -> bool: """ Delete a conversation asynchronously. @@ -98,7 +78,7 @@ async def delete_conversation(session: ClientSession, conversation: Conversation "optionsSets": ["autosave"] } try: - async with session.post(url, json=json, proxy=proxy) as response: + async with session.post(url, json=json, headers=headers) as response: response = await response.json() return response["result"]["value"] == "Success" except: diff --git a/g4f/Provider/bing/upload_image.py b/g4f/Provider/bing/upload_image.py index 6d51aba0..c517e493 100644 --- a/g4f/Provider/bing/upload_image.py +++ b/g4f/Provider/bing/upload_image.py @@ -9,6 +9,7 @@ from aiohttp import ClientSession, FormData from ...typing import ImageType, Tuple from ...image import to_image, process_image, to_base64_jpg, ImageRequest, Image +from ...requests import raise_for_status IMAGE_CONFIG = { "maxImagePixels": 360000, @@ -20,7 +21,7 @@ async def upload_image( session: ClientSession, image_data: ImageType, tone: str, - proxy: str = None + headers: dict ) -> ImageRequest: """ Uploads an image to Bing's AI service and returns the image response. @@ -43,11 +44,9 @@ async def upload_image( img_binary_data = to_base64_jpg(image, IMAGE_CONFIG['imageCompressionRate']) data = build_image_upload_payload(img_binary_data, tone) - headers = prepare_headers(session) - async with session.post("https://www.bing.com/images/kblob", data=data, headers=headers, proxy=proxy) as response: - if response.status != 200: - raise RuntimeError("Failed to upload image.") + async with session.post("https://www.bing.com/images/kblob", data=data, headers=prepare_headers(headers)) as response: + await raise_for_status(response, "Failed to upload image") return parse_image_response(await response.json()) def calculate_new_dimensions(image: Image) -> Tuple[int, int]: @@ -109,7 +108,7 @@ def build_knowledge_request(tone: str) -> dict: } } -def prepare_headers(session: ClientSession) -> dict: +def prepare_headers(headers: dict) -> dict: """ Prepares the headers for the image upload request. @@ -120,7 +119,6 @@ def prepare_headers(session: ClientSession) -> dict: Returns: dict: The headers for the request. """ - headers = session.headers.copy() headers["Referer"] = 'https://www.bing.com/search?q=Bing+AI&showconv=1&FORM=hpcodx' headers["Origin"] = 'https://www.bing.com' return headers diff --git a/g4f/cookies.py b/g4f/cookies.py index a38488c2..578be8db 100644 --- a/g4f/cookies.py +++ b/g4f/cookies.py @@ -40,7 +40,7 @@ def get_cookies(domain_name: str = '', raise_requirements_error: bool = True, si """ if domain_name in _cookies: return _cookies[domain_name] - + cookies = load_cookies_from_browsers(domain_name, raise_requirements_error, single_browser) _cookies[domain_name] = cookies return cookies diff --git a/g4f/gui/client/js/chat.v1.js b/g4f/gui/client/js/chat.v1.js index 8dd17275..16b4acb9 100644 --- a/g4f/gui/client/js/chat.v1.js +++ b/g4f/gui/client/js/chat.v1.js @@ -751,8 +751,12 @@ function count_words(text) { return text.trim().match(/[\w\u4E00-\u9FA5]+/gu)?.length || 0; } +function count_chars(text) { + return text.match(/[^\s\p{P}]/gu)?.length || 0; +} + function count_words_and_tokens(text, model) { - return `(${count_words(text)} words, ${count_tokens(model, text)} tokens)`; + return `(${count_words(text)} words, ${count_chars(text)} chars, ${count_tokens(model, text)} tokens)`; } let countFocus = messageInput; diff --git a/g4f/providers/helper.py b/g4f/providers/helper.py index df6767a4..5f3b4fb6 100644 --- a/g4f/providers/helper.py +++ b/g4f/providers/helper.py @@ -1,7 +1,6 @@ from __future__ import annotations import random -import secrets import string from ..typing import Messages @@ -40,11 +39,14 @@ def get_random_string(length: int = 10) -> str: for _ in range(length) ) -def get_random_hex() -> str: +def get_random_hex(length: int = 32) -> str: """ - Generate a random hexadecimal string of a fixed length. + Generate a random hexadecimal string with n length. Returns: - str: A random hexadecimal string of 32 characters (16 bytes). + str: A random hexadecimal string of n characters. """ - return secrets.token_hex(16).zfill(32) \ No newline at end of file + return ''.join( + random.choice("abcdef" + string.digits) + for _ in range(length) + ) \ No newline at end of file diff --git a/g4f/requests/__init__.py b/g4f/requests/__init__.py index d4ef9cec..f2946fc1 100644 --- a/g4f/requests/__init__.py +++ b/g4f/requests/__init__.py @@ -1,6 +1,5 @@ from __future__ import annotations -from urllib.parse import urlparse from typing import Union from aiohttp import ClientResponse from requests import Response as RequestsResponse @@ -15,7 +14,7 @@ except ImportError: has_curl_cffi = False from ..webdriver import WebDriver, WebDriverSession -from ..webdriver import user_config_dir, bypass_cloudflare, get_driver_cookies +from ..webdriver import bypass_cloudflare, get_driver_cookies from ..errors import MissingRequirementsError, RateLimitError, ResponseStatusError from .defaults import DEFAULT_HEADERS @@ -39,17 +38,16 @@ def get_args_from_browser( Returns: Session: A Session object configured with cookies and headers from the WebDriver. """ - user_data_dir = "" #user_config_dir(f"g4f-{urlparse(url).hostname}") - with WebDriverSession(webdriver, user_data_dir, proxy=proxy, virtual_display=virtual_display) as driver: + with WebDriverSession(webdriver, "", proxy=proxy, virtual_display=virtual_display) as driver: if do_bypass_cloudflare: bypass_cloudflare(driver, url, timeout) - user_agent = driver.execute_script("return navigator.userAgent") headers = { **DEFAULT_HEADERS, 'referer': url, - 'user-agent': user_agent, } - if hasattr(driver, "requests"): + if not hasattr(driver, "requests"): + headers["user-agent"] = driver.execute_script("return navigator.userAgent") + else: for request in driver.requests: if request.url.startswith(url): for key, value in request.headers.items(): @@ -83,22 +81,22 @@ def get_session_from_browser(url: str, webdriver: WebDriver = None, proxy: str = impersonate="chrome" ) -async def raise_for_status_async(response: Union[StreamResponse, ClientResponse]): +async def raise_for_status_async(response: Union[StreamResponse, ClientResponse], message: str = None): if response.status in (429, 402): raise RateLimitError(f"Response {response.status}: Rate limit reached") - text = await response.text() if not response.ok else None - if response.status == 403 and "Just a moment..." in text: + message = await response.text() if not response.ok and message is None else message + if response.status == 403 and "Just a moment..." in message: raise ResponseStatusError(f"Response {response.status}: Cloudflare detected") elif not response.ok: - raise ResponseStatusError(f"Response {response.status}: {text}") + raise ResponseStatusError(f"Response {response.status}: {message}") -def raise_for_status(response: Union[StreamResponse, ClientResponse, Response, RequestsResponse]): +def raise_for_status(response: Union[StreamResponse, ClientResponse, Response, RequestsResponse], message: str = None): if isinstance(response, StreamSession) or isinstance(response, ClientResponse): - return raise_for_status_async(response) + return raise_for_status_async(response, message) if response.status_code in (429, 402): raise RateLimitError(f"Response {response.status_code}: Rate limit reached") elif response.status_code == 403 and "Just a moment..." in response.text: raise ResponseStatusError(f"Response {response.status_code}: Cloudflare detected") elif not response.ok: - raise ResponseStatusError(f"Response {response.status_code}: {response.text}") \ No newline at end of file + raise ResponseStatusError(f"Response {response.status_code}: {response.text if message is None else message}") \ No newline at end of file diff --git a/g4f/webdriver.py b/g4f/webdriver.py index 21dbc469..2b7a7241 100644 --- a/g4f/webdriver.py +++ b/g4f/webdriver.py @@ -10,10 +10,9 @@ try: from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.keys import Keys from selenium.common.exceptions import NoSuchElementException - from undetected_chromedriver import Chrome, ChromeOptions has_requirements = True except ImportError: - from typing import Type as WebDriver, Callable as user_config_dir + from typing import Type as WebDriver has_requirements = False import time @@ -38,9 +37,9 @@ try: def __init__(self, *args, options=None, seleniumwire_options={}, **kwargs): if options is None: options = ChromeOptions() - options.add_argument('--proxy-bypass-list=<-loopback>') config = self._setup_backend(seleniumwire_options) options.add_argument(f"--proxy-server={config['proxy']['httpProxy']}") + options.add_argument('--proxy-bypass-list=<-loopback>') options.add_argument("--ignore-certificate-errors") super().__init__(*args, options=options, **kwargs) has_seleniumwire = True -- cgit v1.2.3