From 6ef282de3a3245acbfecd08ae48dba85ff91d031 Mon Sep 17 00:00:00 2001 From: H Lohaus Date: Tue, 12 Mar 2024 02:06:06 +0100 Subject: Remove all not working provider (#1679) Fix many providers Add selenium-wire to requierments --- g4f/requests/__init__.py | 75 +++++++++++++++++++++++++++++++++++++----------- g4f/requests/aiohttp.py | 26 +++++++++++++---- g4f/requests/defaults.py | 28 +++++++++++------- 3 files changed, 96 insertions(+), 33 deletions(-) (limited to 'g4f/requests') diff --git a/g4f/requests/__init__.py b/g4f/requests/__init__.py index 83176557..d4ef9cec 100644 --- a/g4f/requests/__init__.py +++ b/g4f/requests/__init__.py @@ -1,18 +1,22 @@ from __future__ import annotations from urllib.parse import urlparse +from typing import Union +from aiohttp import ClientResponse +from requests import Response as RequestsResponse try: - from curl_cffi.requests import Session + from curl_cffi.requests import Session, Response from .curl_cffi import StreamResponse, StreamSession has_curl_cffi = True except ImportError: - from typing import Type as Session + from typing import Type as Session, Type as Response from .aiohttp import StreamResponse, StreamSession has_curl_cffi = False -from ..webdriver import WebDriver, WebDriverSession, bypass_cloudflare, get_driver_cookies -from ..errors import MissingRequirementsError +from ..webdriver import WebDriver, WebDriverSession +from ..webdriver import user_config_dir, bypass_cloudflare, get_driver_cookies +from ..errors import MissingRequirementsError, RateLimitError, ResponseStatusError from .defaults import DEFAULT_HEADERS def get_args_from_browser( @@ -20,7 +24,8 @@ def get_args_from_browser( webdriver: WebDriver = None, proxy: str = None, timeout: int = 120, - do_bypass_cloudflare: bool = True + do_bypass_cloudflare: bool = True, + virtual_display: bool = False ) -> dict: """ Create a Session object using a WebDriver to handle cookies and headers. @@ -34,21 +39,37 @@ def get_args_from_browser( Returns: Session: A Session object configured with cookies and headers from the WebDriver. """ - with WebDriverSession(webdriver, "", proxy=proxy, virtual_display=False) as driver: + user_data_dir = "" #user_config_dir(f"g4f-{urlparse(url).hostname}") + with WebDriverSession(webdriver, user_data_dir, proxy=proxy, virtual_display=virtual_display) as driver: if do_bypass_cloudflare: bypass_cloudflare(driver, url, timeout) - cookies = get_driver_cookies(driver) user_agent = driver.execute_script("return navigator.userAgent") - parse = urlparse(url) + headers = { + **DEFAULT_HEADERS, + 'referer': url, + 'user-agent': user_agent, + } + if hasattr(driver, "requests"): + for request in driver.requests: + if request.url.startswith(url): + for key, value in request.headers.items(): + if key in ( + "accept-encoding", + "accept-language", + "user-agent", + "sec-ch-ua", + "sec-ch-ua-platform", + "sec-ch-ua-arch", + "sec-ch-ua-full-version", + "sec-ch-ua-platform-version", + "sec-ch-ua-bitness" + ): + headers[key] = value + break + cookies = get_driver_cookies(driver) return { 'cookies': cookies, - 'headers': { - **DEFAULT_HEADERS, - 'Authority': parse.netloc, - 'Origin': f'{parse.scheme}://{parse.netloc}', - 'Referer': url, - 'User-Agent': user_agent, - }, + 'headers': headers, } def get_session_from_browser(url: str, webdriver: WebDriver = None, proxy: str = None, timeout: int = 120) -> Session: @@ -59,5 +80,25 @@ def get_session_from_browser(url: str, webdriver: WebDriver = None, proxy: str = **args, proxies={"https": proxy, "http": proxy}, timeout=timeout, - impersonate="chrome110" - ) \ No newline at end of file + impersonate="chrome" + ) + +async def raise_for_status_async(response: Union[StreamResponse, ClientResponse]): + if response.status in (429, 402): + raise RateLimitError(f"Response {response.status}: Rate limit reached") + text = await response.text() if not response.ok else None + if response.status == 403 and "Just a moment..." in text: + raise ResponseStatusError(f"Response {response.status}: Cloudflare detected") + elif not response.ok: + raise ResponseStatusError(f"Response {response.status}: {text}") + +def raise_for_status(response: Union[StreamResponse, ClientResponse, Response, RequestsResponse]): + if isinstance(response, StreamSession) or isinstance(response, ClientResponse): + return raise_for_status_async(response) + + if response.status_code in (429, 402): + raise RateLimitError(f"Response {response.status_code}: Rate limit reached") + elif response.status_code == 403 and "Just a moment..." in response.text: + raise ResponseStatusError(f"Response {response.status_code}: Cloudflare detected") + elif not response.ok: + raise ResponseStatusError(f"Response {response.status_code}: {response.text}") \ No newline at end of file diff --git a/g4f/requests/aiohttp.py b/g4f/requests/aiohttp.py index d9bd6541..6979b20a 100644 --- a/g4f/requests/aiohttp.py +++ b/g4f/requests/aiohttp.py @@ -1,16 +1,20 @@ from __future__ import annotations -from aiohttp import ClientSession, ClientResponse, ClientTimeout -from typing import AsyncGenerator, Any +from aiohttp import ClientSession, ClientResponse, ClientTimeout, BaseConnector +from typing import AsyncIterator, Any, Optional -from ..providers.helper import get_connector from .defaults import DEFAULT_HEADERS +from ..errors import MissingRequirementsError class StreamResponse(ClientResponse): - async def iter_lines(self) -> AsyncGenerator[bytes, None]: + async def iter_lines(self) -> AsyncIterator[bytes]: async for line in self.content: yield line.rstrip(b"\r\n") + async def iter_content(self) -> AsyncIterator[bytes]: + async for chunk in self.content.iter_any(): + yield chunk + async def json(self) -> Any: return await super().json(content_type=None) @@ -27,4 +31,16 @@ class StreamSession(ClientSession): response_class=StreamResponse, connector=get_connector(kwargs.get("connector"), proxies.get("https")), headers=headers - ) \ No newline at end of file + ) + +def get_connector(connector: BaseConnector = None, proxy: str = None, rdns: bool = False) -> Optional[BaseConnector]: + if proxy and not connector: + try: + from aiohttp_socks import ProxyConnector + if proxy.startswith("socks5h://"): + proxy = proxy.replace("socks5h://", "socks5://") + rdns = True + connector = ProxyConnector.from_url(proxy, rdns=rdns) + except ImportError: + raise MissingRequirementsError('Install "aiohttp_socks" package for proxy support') + return connector \ No newline at end of file diff --git a/g4f/requests/defaults.py b/g4f/requests/defaults.py index 6ae6d7eb..2457f046 100644 --- a/g4f/requests/defaults.py +++ b/g4f/requests/defaults.py @@ -1,13 +1,19 @@ DEFAULT_HEADERS = { - 'Accept': '*/*', - 'Accept-Encoding': 'gzip, deflate, br', - 'Accept-Language': 'en-US', - 'Connection': 'keep-alive', - 'Sec-Ch-Ua': '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"', - 'Sec-Ch-Ua-Mobile': '?0', - 'Sec-Ch-Ua-Platform': '"Windows"', - 'Sec-Fetch-Dest': 'empty', - 'Sec-Fetch-Mode': 'cors', - 'Sec-Fetch-Site': 'same-site', - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36' + "sec-ch-ua": '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"', + "sec-ch-ua-mobile": "?0", + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36", + "ec-ch-ua-arch": '"x86"', + "sec-ch-ua-full-version": '"122.0.6261.69"', + "accept": "*/*", + "sec-ch-ua-platform-version:": '"6.5.0"', + "sec-ch-ua-full-version-list": '"Chromium";v="122.0.6261.69", "Not(A:Brand";v="24.0.0.0", "Google Chrome";v="122.0.6261.69"', + "sec-ch-ua-bitness": '"64"', + "sec-ch-ua-model": '""', + "sec-ch-ua-platform": '"Windows"', + "sec-fetch-site": "same-site", + "sec-fetch-mode": "cors", + "sec-fetch-dest": "empty", + "referer": "", + "accept-encoding": "gzip, deflate, br", + "accept-language": "en-US", } \ No newline at end of file -- cgit v1.2.3