From c1b992c3460cb0069127524a9b987a8af475ec14 Mon Sep 17 00:00:00 2001 From: Heiner Lohaus Date: Thu, 8 Feb 2024 22:02:52 +0100 Subject: Add Gemini Provider with image upload and generation --- g4f/Provider/__init__.py | 2 +- g4f/Provider/bing/create_images.py | 6 +- g4f/Provider/needs_auth/Bard.py | 79 ---------------- g4f/Provider/needs_auth/Gemini.py | 165 ++++++++++++++++++++++++++++++++++ g4f/Provider/needs_auth/OpenaiChat.py | 18 ++-- g4f/Provider/needs_auth/ThebApi.py | 16 ++-- g4f/Provider/needs_auth/__init__.py | 2 +- g4f/Provider/selenium/Bard.py | 80 +++++++++++++++++ g4f/Provider/selenium/__init__.py | 3 +- g4f/__init__.py | 13 +-- g4f/errors.py | 5 +- g4f/gui/server/backend.py | 2 +- g4f/image.py | 20 +++-- 13 files changed, 293 insertions(+), 118 deletions(-) delete mode 100644 g4f/Provider/needs_auth/Bard.py create mode 100644 g4f/Provider/needs_auth/Gemini.py create mode 100644 g4f/Provider/selenium/Bard.py diff --git a/g4f/Provider/__init__.py b/g4f/Provider/__init__.py index 5ac5cfca..1b45b00d 100644 --- a/g4f/Provider/__init__.py +++ b/g4f/Provider/__init__.py @@ -5,9 +5,9 @@ from .retry_provider import RetryProvider from .base_provider import AsyncProvider, AsyncGeneratorProvider from .create_images import CreateImagesProvider from .deprecated import * +from .selenium import * from .needs_auth import * from .unfinished import * -from .selenium import * from .AiAsk import AiAsk from .AiChatOnline import AiChatOnline diff --git a/g4f/Provider/bing/create_images.py b/g4f/Provider/bing/create_images.py index e1031e61..b31e9408 100644 --- a/g4f/Provider/bing/create_images.py +++ b/g4f/Provider/bing/create_images.py @@ -23,7 +23,7 @@ from ..helper import get_cookies, get_connector from ...webdriver import WebDriver, get_driver_cookies, get_browser from ...base_provider import ProviderType from ...image import ImageResponse -from ...errors import MissingRequirementsError, MissingAccessToken +from ...errors import MissingRequirementsError, MissingAuthError BING_URL = "https://www.bing.com" TIMEOUT_LOGIN = 1200 @@ -210,7 +210,7 @@ class CreateImagesBing: try: self.cookies = get_cookies_from_browser(self.proxy) except MissingRequirementsError as e: - raise MissingAccessToken(f'Missing "_U" cookie. {e}') + raise MissingAuthError(f'Missing "_U" cookie. {e}') yield asyncio.run(self.create_async(prompt)) async def create_async(self, prompt: str) -> ImageResponse: @@ -225,7 +225,7 @@ class CreateImagesBing: """ cookies = self.cookies or get_cookies(".bing.com", False) if "_U" not in cookies: - raise MissingAccessToken('Missing "_U" cookie') + raise MissingAuthError('Missing "_U" cookie') proxy = os.environ.get("G4F_PROXY") async with create_session(cookies, proxy) as session: images = await create_images(session, prompt, self.proxy) diff --git a/g4f/Provider/needs_auth/Bard.py b/g4f/Provider/needs_auth/Bard.py deleted file mode 100644 index 73c62edc..00000000 --- a/g4f/Provider/needs_auth/Bard.py +++ /dev/null @@ -1,79 +0,0 @@ -from __future__ import annotations - -import time -import os - -try: - from selenium.webdriver.common.by import By - from selenium.webdriver.support.ui import WebDriverWait - from selenium.webdriver.support import expected_conditions as EC -except ImportError: - pass - -from ...typing import CreateResult, Messages -from ..base_provider import AbstractProvider -from ..helper import format_prompt -from ...webdriver import WebDriver, WebDriverSession, element_send_text - - -class Bard(AbstractProvider): - url = "https://bard.google.com" - working = True - needs_auth = True - - @classmethod - def create_completion( - cls, - model: str, - messages: Messages, - stream: bool, - proxy: str = None, - webdriver: WebDriver = None, - user_data_dir: str = None, - headless: bool = True, - **kwargs - ) -> CreateResult: - prompt = format_prompt(messages) - session = WebDriverSession(webdriver, user_data_dir, headless, proxy=proxy) - with session as driver: - try: - driver.get(f"{cls.url}/chat") - wait = WebDriverWait(driver, 10 if headless else 240) - wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea"))) - except: - # Reopen browser for login - if not webdriver: - driver = session.reopen() - driver.get(f"{cls.url}/chat") - login_url = os.environ.get("G4F_LOGIN_URL") - if login_url: - yield f"Please login: [Google Bard]({login_url})\n\n" - wait = WebDriverWait(driver, 240) - wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea"))) - else: - raise RuntimeError("Prompt textarea not found. You may not be logged in.") - - # Add hook in XMLHttpRequest - script = """ -const _http_request_open = XMLHttpRequest.prototype.open; -window._message = ""; -XMLHttpRequest.prototype.open = function(method, url) { - if (url.includes("/assistant.lamda.BardFrontendService/StreamGenerate")) { - this.addEventListener("load", (event) => { - window._message = JSON.parse(JSON.parse(this.responseText.split("\\n")[3])[0][2])[4][0][1][0]; - }); - } - return _http_request_open.call(this, method, url); -} -""" - driver.execute_script(script) - - element_send_text(driver.find_element(By.CSS_SELECTOR, "div.ql-editor.textarea"), prompt) - - while True: - chunk = driver.execute_script("return window._message;") - if chunk: - yield chunk - return - else: - time.sleep(0.1) \ No newline at end of file diff --git a/g4f/Provider/needs_auth/Gemini.py b/g4f/Provider/needs_auth/Gemini.py new file mode 100644 index 00000000..a6e4c15d --- /dev/null +++ b/g4f/Provider/needs_auth/Gemini.py @@ -0,0 +1,165 @@ +from __future__ import annotations + +import json +import random +import re + +from aiohttp import ClientSession + +from ...typing import Messages, Cookies, ImageType, AsyncResult +from ..base_provider import AsyncGeneratorProvider +from ..helper import format_prompt, get_cookies +from ...errors import MissingAuthError +from ...image import to_bytes, ImageResponse + +REQUEST_HEADERS = { + "authority": "gemini.google.com", + "origin": "https://gemini.google.com", + "referer": "https://gemini.google.com/", + 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36', + 'x-same-domain': '1', +} +REQUEST_BL_PARAM = "boq_assistant-bard-web-server_20240201.08_p8" +REQUEST_URL = "https://gemini.google.com/_/BardChatUi/data/assistant.lamda.BardFrontendService/StreamGenerate" +UPLOAD_IMAGE_URL = "https://content-push.googleapis.com/upload/" +UPLOAD_IMAGE_HEADERS = { + "authority": "content-push.googleapis.com", + "accept": "*/*", + "accept-language": "en-US,en;q=0.7", + "authorization": "Basic c2F2ZXM6cyNMdGhlNmxzd2F2b0RsN3J1d1U=", + "content-type": "application/x-www-form-urlencoded;charset=UTF-8", + "origin": "https://gemini.google.com", + "push-id": "feeds/mcudyrk2a4khkz", + "referer": "https://gemini.google.com/", + "x-goog-upload-command": "start", + "x-goog-upload-header-content-length": "", + "x-goog-upload-protocol": "resumable", + "x-tenant-id": "bard-storage", +} + +class Gemini(AsyncGeneratorProvider): + url = "https://gemini.google.com" + needs_auth = True + working = True + supports_stream = False + + @classmethod + async def create_async_generator( + cls, + model: str, + messages: Messages, + proxy: str = None, + cookies: Cookies = None, + image: ImageType = None, + image_name: str = None, + **kwargs + ) -> AsyncResult: + prompt = format_prompt(messages) + if not cookies: + cookies = get_cookies(".google.com", False) + if "__Secure-1PSID" not in cookies: + raise MissingAuthError('Missing "__Secure-1PSID" cookie') + + image_url = await cls.upload_image(to_bytes(image), image_name, proxy) if image else None + + async with ClientSession( + cookies=cookies, + headers=REQUEST_HEADERS + ) as session: + async with session.get(cls.url, proxy=proxy) as response: + text = await response.text() + match = re.search(r'SNlM0e\":\"(.*?)\"', text) + if match: + snlm0e = match.group(1) + else: + raise RuntimeError("SNlM0e not found") + + params = { + 'bl': REQUEST_BL_PARAM, + '_reqid': random.randint(1111, 9999), + 'rt': 'c' + } + data = { + 'at': snlm0e, + 'f.req': json.dumps([None, json.dumps(cls.build_request( + prompt, + image_url=image_url, + image_name=image_name + ))]) + } + async with session.post( + REQUEST_URL, + data=data, + params=params, + proxy=proxy + ) as response: + response = await response.text() + response_part = json.loads(json.loads(response.splitlines()[-5])[0][2]) + if response_part[4] is None: + response_part = json.loads(json.loads(response.splitlines()[-7])[0][2]) + + content = response_part[4][0][1][0] + image_prompt = None + match = re.search(r'\[Imagen of (.*?)\]', content) + if match: + image_prompt = match.group(1) + content = content.replace(match.group(0), '') + + yield content + if image_prompt: + images = [image[0][3][3] for image in response_part[4][0][12][7][0]] + yield ImageResponse(images, image_prompt) + + def build_request( + prompt: str, + conversation_id: str = "", + response_id: str = "", + choice_id: str = "", + image_url: str = None, + image_name: str = None, + tools: list[list[str]] = [] + ) -> list: + image_list = [[[image_url, 1], image_name]] if image_url else [] + return [ + [prompt, 0, None, image_list, None, None, 0], + ["en"], + [conversation_id, response_id, choice_id, None, None, []], + None, + None, + None, + [1], + 0, + [], + tools, + 1, + 0, + ] + + async def upload_image(image: bytes, image_name: str = None, proxy: str = None): + async with ClientSession( + headers=UPLOAD_IMAGE_HEADERS + ) as session: + async with session.options(UPLOAD_IMAGE_URL, proxy=proxy) as reponse: + reponse.raise_for_status() + + headers = { + "size": str(len(image)), + "x-goog-upload-command": "start" + } + data = f"File name: {image_name}" if image_name else None + async with session.post( + UPLOAD_IMAGE_URL, headers=headers, data=data, proxy=proxy + ) as response: + response.raise_for_status() + upload_url = response.headers["X-Goog-Upload-Url"] + + async with session.options(upload_url, headers=headers) as response: + response.raise_for_status() + + headers["x-goog-upload-command"] = "upload, finalize" + headers["X-Goog-Upload-Offset"] = "0" + async with session.post( + upload_url, headers=headers, data=image, proxy=proxy + ) as response: + response.raise_for_status() + return await response.text() \ No newline at end of file diff --git a/g4f/Provider/needs_auth/OpenaiChat.py b/g4f/Provider/needs_auth/OpenaiChat.py index 253d4f77..32aee9fb 100644 --- a/g4f/Provider/needs_auth/OpenaiChat.py +++ b/g4f/Provider/needs_auth/OpenaiChat.py @@ -25,7 +25,7 @@ from ...webdriver import get_browser, get_driver_cookies from ...typing import AsyncResult, Messages, Cookies, ImageType from ...requests import StreamSession from ...image import to_image, to_bytes, ImageResponse, ImageRequest -from ...errors import MissingRequirementsError, MissingAccessToken +from ...errors import MissingRequirementsError, MissingAuthError class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): @@ -99,7 +99,8 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): cls, session: StreamSession, headers: dict, - image: ImageType + image: ImageType, + image_name: str = None ) -> ImageRequest: """ Upload an image to the service and get the download URL @@ -118,7 +119,7 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): # Convert the image to a bytes object and get the size data_bytes = to_bytes(image) data = { - "file_name": f"{image.width}x{image.height}.{extension}", + "file_name": image_name if image_name else f"{image.width}x{image.height}.{extension}", "file_size": len(data_bytes), "use_case": "multimodal" } @@ -338,7 +339,7 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): try: access_token, cookies = cls.browse_access_token(proxy) except MissingRequirementsError: - raise MissingAccessToken(f'Missing "access_token"') + raise MissingAuthError(f'Missing "access_token"') cls._cookies = cookies headers = {"Authorization": f"Bearer {access_token}"} @@ -351,7 +352,7 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): try: image_response = None if image: - image_response = await cls.upload_image(session, headers, image) + image_response = await cls.upload_image(session, headers, image, kwargs.get("image_name")) except Exception as e: yield e end_turn = EndTurn() @@ -438,21 +439,18 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): Returns: tuple[str, dict]: A tuple containing the access token and cookies. """ - driver = get_browser(proxy=proxy) - try: + with get_browser(proxy=proxy) as driver: driver.get(f"{cls.url}/") WebDriverWait(driver, timeout).until(EC.presence_of_element_located((By.ID, "prompt-textarea"))) access_token = driver.execute_script( "let session = await fetch('/api/auth/session');" "let data = await session.json();" "let accessToken = data['accessToken'];" - "let expires = new Date(); expires.setTime(expires.getTime() + 60 * 60 * 24 * 7);" + "let expires = new Date(); expires.setTime(expires.getTime() + 60 * 60 * 4);" "document.cookie = 'access_token=' + accessToken + ';expires=' + expires.toUTCString() + ';path=/';" "return accessToken;" ) return access_token, get_driver_cookies(driver) - finally: - driver.quit() @classmethod async def get_arkose_token(cls, session: StreamSession) -> str: diff --git a/g4f/Provider/needs_auth/ThebApi.py b/g4f/Provider/needs_auth/ThebApi.py index 8ec7bda8..ea633243 100644 --- a/g4f/Provider/needs_auth/ThebApi.py +++ b/g4f/Provider/needs_auth/ThebApi.py @@ -3,7 +3,8 @@ from __future__ import annotations import requests from ...typing import Any, CreateResult, Messages -from ..base_provider import AbstractProvider +from ..base_provider import AbstractProvider, ProviderModelMixin +from ...errors import MissingAuthError models = { "theb-ai": "TheB.AI", @@ -29,13 +30,16 @@ models = { "qwen-7b-chat": "Qwen 7B" } -class ThebApi(AbstractProvider): +class ThebApi(AbstractProvider, ProviderModelMixin): url = "https://theb.ai" working = True needs_auth = True + default_model = "gpt-3.5-turbo" + models = list(models) - @staticmethod + @classmethod def create_completion( + cls, model: str, messages: Messages, stream: bool, @@ -43,8 +47,8 @@ class ThebApi(AbstractProvider): proxy: str = None, **kwargs ) -> CreateResult: - if model and model not in models: - raise ValueError(f"Model are not supported: {model}") + if not auth: + raise MissingAuthError("Missing auth") headers = { 'accept': 'application/json', 'authorization': f'Bearer {auth}', @@ -54,7 +58,7 @@ class ThebApi(AbstractProvider): # models = dict([(m["id"], m["name"]) for m in response]) # print(json.dumps(models, indent=4)) data: dict[str, Any] = { - "model": model if model else "gpt-3.5-turbo", + "model": cls.get_model(model), "messages": messages, "stream": False, "model_params": { diff --git a/g4f/Provider/needs_auth/__init__.py b/g4f/Provider/needs_auth/__init__.py index 46e1f740..5eb1b2eb 100644 --- a/g4f/Provider/needs_auth/__init__.py +++ b/g4f/Provider/needs_auth/__init__.py @@ -1,4 +1,4 @@ -from .Bard import Bard +from .Gemini import Gemini from .Raycast import Raycast from .Theb import Theb from .ThebApi import ThebApi diff --git a/g4f/Provider/selenium/Bard.py b/g4f/Provider/selenium/Bard.py new file mode 100644 index 00000000..459f6f37 --- /dev/null +++ b/g4f/Provider/selenium/Bard.py @@ -0,0 +1,80 @@ +from __future__ import annotations + +import time +import os + +try: + from selenium.webdriver.common.by import By + from selenium.webdriver.support.ui import WebDriverWait + from selenium.webdriver.support import expected_conditions as EC +except ImportError: + pass + +from ...typing import CreateResult, Messages +from ..base_provider import AbstractProvider +from ..helper import format_prompt +from ...webdriver import WebDriver, WebDriverSession, element_send_text + + +class Bard(AbstractProvider): + url = "https://bard.google.com" + working = True + needs_auth = True + webdriver = True + + @classmethod + def create_completion( + cls, + model: str, + messages: Messages, + stream: bool, + proxy: str = None, + webdriver: WebDriver = None, + user_data_dir: str = None, + headless: bool = True, + **kwargs + ) -> CreateResult: + prompt = format_prompt(messages) + session = WebDriverSession(webdriver, user_data_dir, headless, proxy=proxy) + with session as driver: + try: + driver.get(f"{cls.url}/chat") + wait = WebDriverWait(driver, 10 if headless else 240) + wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea"))) + except: + # Reopen browser for login + if not webdriver: + driver = session.reopen() + driver.get(f"{cls.url}/chat") + login_url = os.environ.get("G4F_LOGIN_URL") + if login_url: + yield f"Please login: [Google Bard]({login_url})\n\n" + wait = WebDriverWait(driver, 240) + wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea"))) + else: + raise RuntimeError("Prompt textarea not found. You may not be logged in.") + + # Add hook in XMLHttpRequest + script = """ +const _http_request_open = XMLHttpRequest.prototype.open; +window._message = ""; +XMLHttpRequest.prototype.open = function(method, url) { + if (url.includes("/assistant.lamda.BardFrontendService/StreamGenerate")) { + this.addEventListener("load", (event) => { + window._message = JSON.parse(JSON.parse(this.responseText.split("\\n")[3])[0][2])[4][0][1][0]; + }); + } + return _http_request_open.call(this, method, url); +} +""" + driver.execute_script(script) + + element_send_text(driver.find_element(By.CSS_SELECTOR, "div.ql-editor.textarea"), prompt) + + while True: + chunk = driver.execute_script("return window._message;") + if chunk: + yield chunk + return + else: + time.sleep(0.1) \ No newline at end of file diff --git a/g4f/Provider/selenium/__init__.py b/g4f/Provider/selenium/__init__.py index a8c18a49..9a020460 100644 --- a/g4f/Provider/selenium/__init__.py +++ b/g4f/Provider/selenium/__init__.py @@ -2,4 +2,5 @@ from .AItianhuSpace import AItianhuSpace from .MyShell import MyShell from .PerplexityAi import PerplexityAi from .Phind import Phind -from .TalkAi import TalkAi \ No newline at end of file +from .TalkAi import TalkAi +from .Bard import Bard \ No newline at end of file diff --git a/g4f/__init__.py b/g4f/__init__.py index 173a1688..d76d70b5 100644 --- a/g4f/__init__.py +++ b/g4f/__init__.py @@ -91,7 +91,7 @@ class ChatCompletion: auth : Union[str, None] = None, ignored : list[str] = None, ignore_working: bool = False, - ignore_stream_and_auth: bool = False, + ignore_stream: bool = False, patch_provider: callable = None, **kwargs) -> Union[CreateResult, str]: """ @@ -105,7 +105,7 @@ class ChatCompletion: auth (Union[str, None], optional): Authentication token or credentials, if required. ignored (list[str], optional): List of provider names to be ignored. ignore_working (bool, optional): If True, ignores the working status of the provider. - ignore_stream_and_auth (bool, optional): If True, ignores the stream and authentication requirement checks. + ignore_stream (bool, optional): If True, ignores the stream and authentication requirement checks. patch_provider (callable, optional): Function to modify the provider. **kwargs: Additional keyword arguments. @@ -118,10 +118,11 @@ class ChatCompletion: ProviderNotWorkingError: If the provider is not operational. StreamNotSupportedError: If streaming is requested but not supported by the provider. """ - model, provider = get_model_and_provider(model, provider, stream, ignored, ignore_working, ignore_stream_and_auth) - - if not ignore_stream_and_auth and provider.needs_auth and not auth: - raise AuthenticationRequiredError(f'{provider.__name__} requires authentication (use auth=\'cookie or token or jwt ...\' param)') + model, provider = get_model_and_provider( + model, provider, stream, + ignored, ignore_working, + ignore_stream or kwargs.get("ignore_stream_and_auth") + ) if auth: kwargs['auth'] = auth diff --git a/g4f/errors.py b/g4f/errors.py index 15bfafbd..ff28de3e 100644 --- a/g4f/errors.py +++ b/g4f/errors.py @@ -7,9 +7,6 @@ class ProviderNotWorkingError(Exception): class StreamNotSupportedError(Exception): pass -class AuthenticationRequiredError(Exception): - pass - class ModelNotFoundError(Exception): pass @@ -37,5 +34,5 @@ class MissingRequirementsError(Exception): class MissingAiohttpSocksError(MissingRequirementsError): pass -class MissingAccessToken(Exception): +class MissingAuthError(Exception): pass \ No newline at end of file diff --git a/g4f/gui/server/backend.py b/g4f/gui/server/backend.py index 8bf58e52..2218452c 100644 --- a/g4f/gui/server/backend.py +++ b/g4f/gui/server/backend.py @@ -162,7 +162,7 @@ class Backend_Api: "provider": provider, "messages": messages, "stream": True, - "ignore_stream_and_auth": True, + "ignore_stream": True, "patch_provider": patch, **kwargs } diff --git a/g4f/image.py b/g4f/image.py index 1a4692b3..3f26f75f 100644 --- a/g4f/image.py +++ b/g4f/image.py @@ -210,20 +210,28 @@ def format_images_markdown(images, alt: str, preview: str = None) -> str: end_flag = "\n" return f"\n{start_flag}{images}\n{end_flag}\n" -def to_bytes(image: Image) -> bytes: +def to_bytes(image: ImageType) -> bytes: """ Converts the given image to bytes. Args: - image (Image.Image): The image to convert. + image (ImageType): The image to convert. Returns: bytes: The image as bytes. """ - bytes_io = BytesIO() - image.save(bytes_io, image.format) - image.seek(0) - return bytes_io.getvalue() + if isinstance(image, bytes): + return image + elif isinstance(image, str): + is_data_uri_an_image(image) + return extract_data_uri(image) + elif isinstance(image, Image): + bytes_io = BytesIO() + image.save(bytes_io, image.format) + image.seek(0) + return bytes_io.getvalue() + else: + return image.read() class ImageResponse: def __init__( -- cgit v1.2.3 From 47900f23718e398fc086a6dfbf6590b4c5859c28 Mon Sep 17 00:00:00 2001 From: Heiner Lohaus Date: Fri, 9 Feb 2024 03:31:05 +0100 Subject: Resolve images in Gemini Provider --- g4f/Provider/needs_auth/Gemini.py | 41 +++++++++++++++++++++++++++++++++++++-- g4f/gui/client/html/index.html | 2 +- g4f/image.py | 5 ++--- 3 files changed, 42 insertions(+), 6 deletions(-) diff --git a/g4f/Provider/needs_auth/Gemini.py b/g4f/Provider/needs_auth/Gemini.py index a6e4c15d..da7230dd 100644 --- a/g4f/Provider/needs_auth/Gemini.py +++ b/g4f/Provider/needs_auth/Gemini.py @@ -1,16 +1,25 @@ from __future__ import annotations +import os import json import random import re from aiohttp import ClientSession +try: + from selenium.webdriver.common.by import By + from selenium.webdriver.support.ui import WebDriverWait + from selenium.webdriver.support import expected_conditions as EC +except ImportError: + pass + from ...typing import Messages, Cookies, ImageType, AsyncResult from ..base_provider import AsyncGeneratorProvider from ..helper import format_prompt, get_cookies -from ...errors import MissingAuthError +from ...errors import MissingAuthError, MissingRequirementsError from ...image import to_bytes, ImageResponse +from ...webdriver import get_browser, get_driver_cookies REQUEST_HEADERS = { "authority": "gemini.google.com", @@ -55,6 +64,27 @@ class Gemini(AsyncGeneratorProvider): **kwargs ) -> AsyncResult: prompt = format_prompt(messages) + + try: + driver = get_browser(proxy=proxy) + try: + driver.get(f"{cls.url}/app") + WebDriverWait(driver, 5).until( + EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea")) + ) + except: + login_url = os.environ.get("G4F_LOGIN_URL") + if login_url: + yield f"Please login: [Google Gemini]({login_url})\n\n" + WebDriverWait(driver, 240).until( + EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea")) + ) + cookies = get_driver_cookies(driver) + except MissingRequirementsError: + pass + finally: + driver.close() + if not cookies: cookies = get_cookies(".google.com", False) if "__Secure-1PSID" not in cookies: @@ -108,7 +138,14 @@ class Gemini(AsyncGeneratorProvider): yield content if image_prompt: images = [image[0][3][3] for image in response_part[4][0][12][7][0]] - yield ImageResponse(images, image_prompt) + resolved_images = [] + for image in images: + async with session.get(image, allow_redirects=False) as fetch: + image = fetch.headers["location"] + async with session.get(image, allow_redirects=False) as fetch: + image = fetch.headers["location"] + resolved_images.append(image) + yield ImageResponse(resolved_images, image_prompt, {"orginal_links": images}) def build_request( prompt: str, diff --git a/g4f/gui/client/html/index.html b/g4f/gui/client/html/index.html index 5edb55e8..55b54b48 100644 --- a/g4f/gui/client/html/index.html +++ b/g4f/gui/client/html/index.html @@ -154,7 +154,7 @@ - + diff --git a/g4f/image.py b/g4f/image.py index 3f26f75f..f0ee0395 100644 --- a/g4f/image.py +++ b/g4f/image.py @@ -46,9 +46,8 @@ def to_image(image: ImageType, is_svg: bool = False) -> Image: return open_image(BytesIO(image)) elif not isinstance(image, Image): image = open_image(image) - copy = image.copy() - copy.format = image.format - return copy + image.load() + return image return image def is_allowed_extension(filename: str) -> bool: -- cgit v1.2.3 From 4b41a8f4e83d4b455b1196f09def14828c73c12f Mon Sep 17 00:00:00 2001 From: Heiner Lohaus Date: Fri, 9 Feb 2024 04:24:21 +0100 Subject: Add example for Image Upload & Generation --- README.md | 26 +++++++++++++++++++++++++- g4f/Provider/needs_auth/Gemini.py | 39 +++++++++++++++++++++------------------ g4f/__init__.py | 2 +- 3 files changed, 47 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 288f886f..9db1679d 100644 --- a/README.md +++ b/README.md @@ -316,7 +316,7 @@ For generating images with Bing and for the OpenAi Chat you need cookies or a t ```python from g4f import set_cookies -set_cookies(".bing", { +set_cookies(".bing.com", { "_U": "cookie value" }) set_cookies("chat.openai.com", { @@ -336,6 +336,30 @@ pip install browser_cookie3 pip install g4f[webdriver] ``` +##### Image Upload & Generation + +Image upload and generation are supported by three main providers: + +- **Bing & Other GPT-4 Providers:** Utilizes Microsoft's Image Creator. +- **Google Gemini:** Available for free accounts with IP addresses outside Europe. +- **OpenaiChat with GPT-4:** Accessible for users with a Plus subscription. + +```python +import g4f + +# Setting up the request for image creation +response = g4f.ChatCompletion.create( + model=g4f.models.default, # Using the default model + provider=g4f.Provider.Gemini, # Specifying the provider as Gemini + messages=[{"role": "user", "content": "Create an image like this"}], + image=open("images/g4f.png", "rb"), # Image input can be a data URI, bytes, PIL Image, or IO object + image_name="g4f.png" # Optional: specifying the filename +) + +# Displaying the response +print(response) +``` + ##### Using Browser Some providers using a browser to bypass the bot protection. They using the selenium webdriver to control the browser. The browser settings and the login data are saved in a custom directory. If the headless mode is enabled, the browser windows are loaded invisibly. For performance reasons, it is recommended to reuse the browser instances and close them yourself at the end: diff --git a/g4f/Provider/needs_auth/Gemini.py b/g4f/Provider/needs_auth/Gemini.py index da7230dd..402fc02f 100644 --- a/g4f/Provider/needs_auth/Gemini.py +++ b/g4f/Provider/needs_auth/Gemini.py @@ -65,25 +65,28 @@ class Gemini(AsyncGeneratorProvider): ) -> AsyncResult: prompt = format_prompt(messages) - try: - driver = get_browser(proxy=proxy) + if not cookies: + driver = None try: - driver.get(f"{cls.url}/app") - WebDriverWait(driver, 5).until( - EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea")) - ) - except: - login_url = os.environ.get("G4F_LOGIN_URL") - if login_url: - yield f"Please login: [Google Gemini]({login_url})\n\n" - WebDriverWait(driver, 240).until( - EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea")) - ) - cookies = get_driver_cookies(driver) - except MissingRequirementsError: - pass - finally: - driver.close() + driver = get_browser(proxy=proxy) + try: + driver.get(f"{cls.url}/app") + WebDriverWait(driver, 5).until( + EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea")) + ) + except: + login_url = os.environ.get("G4F_LOGIN_URL") + if login_url: + yield f"Please login: [Google Gemini]({login_url})\n\n" + WebDriverWait(driver, 240).until( + EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea")) + ) + cookies = get_driver_cookies(driver) + except MissingRequirementsError: + pass + finally: + if driver: + driver.close() if not cookies: cookies = get_cookies(".google.com", False) diff --git a/g4f/__init__.py b/g4f/__init__.py index d76d70b5..34c8aa19 100644 --- a/g4f/__init__.py +++ b/g4f/__init__.py @@ -136,7 +136,7 @@ class ChatCompletion: provider = patch_provider(provider) result = provider.create_completion(model, messages, stream, **kwargs) - return result if stream else ''.join(result) + return result if stream else ''.join([str(chunk) for chunk in result]) @staticmethod def create_async(model : Union[Model, str], -- cgit v1.2.3