summaryrefslogtreecommitdiffstats
path: root/g4f/requests
diff options
context:
space:
mode:
Diffstat (limited to 'g4f/requests')
-rw-r--r--g4f/requests/__init__.py75
-rw-r--r--g4f/requests/aiohttp.py26
-rw-r--r--g4f/requests/defaults.py28
3 files changed, 96 insertions, 33 deletions
diff --git a/g4f/requests/__init__.py b/g4f/requests/__init__.py
index 83176557..d4ef9cec 100644
--- a/g4f/requests/__init__.py
+++ b/g4f/requests/__init__.py
@@ -1,18 +1,22 @@
from __future__ import annotations
from urllib.parse import urlparse
+from typing import Union
+from aiohttp import ClientResponse
+from requests import Response as RequestsResponse
try:
- from curl_cffi.requests import Session
+ from curl_cffi.requests import Session, Response
from .curl_cffi import StreamResponse, StreamSession
has_curl_cffi = True
except ImportError:
- from typing import Type as Session
+ from typing import Type as Session, Type as Response
from .aiohttp import StreamResponse, StreamSession
has_curl_cffi = False
-from ..webdriver import WebDriver, WebDriverSession, bypass_cloudflare, get_driver_cookies
-from ..errors import MissingRequirementsError
+from ..webdriver import WebDriver, WebDriverSession
+from ..webdriver import user_config_dir, bypass_cloudflare, get_driver_cookies
+from ..errors import MissingRequirementsError, RateLimitError, ResponseStatusError
from .defaults import DEFAULT_HEADERS
def get_args_from_browser(
@@ -20,7 +24,8 @@ def get_args_from_browser(
webdriver: WebDriver = None,
proxy: str = None,
timeout: int = 120,
- do_bypass_cloudflare: bool = True
+ do_bypass_cloudflare: bool = True,
+ virtual_display: bool = False
) -> dict:
"""
Create a Session object using a WebDriver to handle cookies and headers.
@@ -34,21 +39,37 @@ def get_args_from_browser(
Returns:
Session: A Session object configured with cookies and headers from the WebDriver.
"""
- with WebDriverSession(webdriver, "", proxy=proxy, virtual_display=False) as driver:
+ user_data_dir = "" #user_config_dir(f"g4f-{urlparse(url).hostname}")
+ with WebDriverSession(webdriver, user_data_dir, proxy=proxy, virtual_display=virtual_display) as driver:
if do_bypass_cloudflare:
bypass_cloudflare(driver, url, timeout)
- cookies = get_driver_cookies(driver)
user_agent = driver.execute_script("return navigator.userAgent")
- parse = urlparse(url)
+ headers = {
+ **DEFAULT_HEADERS,
+ 'referer': url,
+ 'user-agent': user_agent,
+ }
+ if hasattr(driver, "requests"):
+ for request in driver.requests:
+ if request.url.startswith(url):
+ for key, value in request.headers.items():
+ if key in (
+ "accept-encoding",
+ "accept-language",
+ "user-agent",
+ "sec-ch-ua",
+ "sec-ch-ua-platform",
+ "sec-ch-ua-arch",
+ "sec-ch-ua-full-version",
+ "sec-ch-ua-platform-version",
+ "sec-ch-ua-bitness"
+ ):
+ headers[key] = value
+ break
+ cookies = get_driver_cookies(driver)
return {
'cookies': cookies,
- 'headers': {
- **DEFAULT_HEADERS,
- 'Authority': parse.netloc,
- 'Origin': f'{parse.scheme}://{parse.netloc}',
- 'Referer': url,
- 'User-Agent': user_agent,
- },
+ 'headers': headers,
}
def get_session_from_browser(url: str, webdriver: WebDriver = None, proxy: str = None, timeout: int = 120) -> Session:
@@ -59,5 +80,25 @@ def get_session_from_browser(url: str, webdriver: WebDriver = None, proxy: str =
**args,
proxies={"https": proxy, "http": proxy},
timeout=timeout,
- impersonate="chrome110"
- ) \ No newline at end of file
+ impersonate="chrome"
+ )
+
+async def raise_for_status_async(response: Union[StreamResponse, ClientResponse]):
+ if response.status in (429, 402):
+ raise RateLimitError(f"Response {response.status}: Rate limit reached")
+ text = await response.text() if not response.ok else None
+ if response.status == 403 and "<title>Just a moment...</title>" in text:
+ raise ResponseStatusError(f"Response {response.status}: Cloudflare detected")
+ elif not response.ok:
+ raise ResponseStatusError(f"Response {response.status}: {text}")
+
+def raise_for_status(response: Union[StreamResponse, ClientResponse, Response, RequestsResponse]):
+ if isinstance(response, StreamSession) or isinstance(response, ClientResponse):
+ return raise_for_status_async(response)
+
+ if response.status_code in (429, 402):
+ raise RateLimitError(f"Response {response.status_code}: Rate limit reached")
+ elif response.status_code == 403 and "<title>Just a moment...</title>" in response.text:
+ raise ResponseStatusError(f"Response {response.status_code}: Cloudflare detected")
+ elif not response.ok:
+ raise ResponseStatusError(f"Response {response.status_code}: {response.text}") \ No newline at end of file
diff --git a/g4f/requests/aiohttp.py b/g4f/requests/aiohttp.py
index d9bd6541..6979b20a 100644
--- a/g4f/requests/aiohttp.py
+++ b/g4f/requests/aiohttp.py
@@ -1,16 +1,20 @@
from __future__ import annotations
-from aiohttp import ClientSession, ClientResponse, ClientTimeout
-from typing import AsyncGenerator, Any
+from aiohttp import ClientSession, ClientResponse, ClientTimeout, BaseConnector
+from typing import AsyncIterator, Any, Optional
-from ..providers.helper import get_connector
from .defaults import DEFAULT_HEADERS
+from ..errors import MissingRequirementsError
class StreamResponse(ClientResponse):
- async def iter_lines(self) -> AsyncGenerator[bytes, None]:
+ async def iter_lines(self) -> AsyncIterator[bytes]:
async for line in self.content:
yield line.rstrip(b"\r\n")
+ async def iter_content(self) -> AsyncIterator[bytes]:
+ async for chunk in self.content.iter_any():
+ yield chunk
+
async def json(self) -> Any:
return await super().json(content_type=None)
@@ -27,4 +31,16 @@ class StreamSession(ClientSession):
response_class=StreamResponse,
connector=get_connector(kwargs.get("connector"), proxies.get("https")),
headers=headers
- ) \ No newline at end of file
+ )
+
+def get_connector(connector: BaseConnector = None, proxy: str = None, rdns: bool = False) -> Optional[BaseConnector]:
+ if proxy and not connector:
+ try:
+ from aiohttp_socks import ProxyConnector
+ if proxy.startswith("socks5h://"):
+ proxy = proxy.replace("socks5h://", "socks5://")
+ rdns = True
+ connector = ProxyConnector.from_url(proxy, rdns=rdns)
+ except ImportError:
+ raise MissingRequirementsError('Install "aiohttp_socks" package for proxy support')
+ return connector \ No newline at end of file
diff --git a/g4f/requests/defaults.py b/g4f/requests/defaults.py
index 6ae6d7eb..2457f046 100644
--- a/g4f/requests/defaults.py
+++ b/g4f/requests/defaults.py
@@ -1,13 +1,19 @@
DEFAULT_HEADERS = {
- 'Accept': '*/*',
- 'Accept-Encoding': 'gzip, deflate, br',
- 'Accept-Language': 'en-US',
- 'Connection': 'keep-alive',
- 'Sec-Ch-Ua': '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
- 'Sec-Ch-Ua-Mobile': '?0',
- 'Sec-Ch-Ua-Platform': '"Windows"',
- 'Sec-Fetch-Dest': 'empty',
- 'Sec-Fetch-Mode': 'cors',
- 'Sec-Fetch-Site': 'same-site',
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
+ "sec-ch-ua": '"Chromium";v="122", "Not(A:Brand";v="24", "Google Chrome";v="122"',
+ "sec-ch-ua-mobile": "?0",
+ "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
+ "ec-ch-ua-arch": '"x86"',
+ "sec-ch-ua-full-version": '"122.0.6261.69"',
+ "accept": "*/*",
+ "sec-ch-ua-platform-version:": '"6.5.0"',
+ "sec-ch-ua-full-version-list": '"Chromium";v="122.0.6261.69", "Not(A:Brand";v="24.0.0.0", "Google Chrome";v="122.0.6261.69"',
+ "sec-ch-ua-bitness": '"64"',
+ "sec-ch-ua-model": '""',
+ "sec-ch-ua-platform": '"Windows"',
+ "sec-fetch-site": "same-site",
+ "sec-fetch-mode": "cors",
+ "sec-fetch-dest": "empty",
+ "referer": "",
+ "accept-encoding": "gzip, deflate, br",
+ "accept-language": "en-US",
} \ No newline at end of file