From 4bc4d635bca9c1c7633ff87ff24b757c653ff60f Mon Sep 17 00:00:00 2001 From: Heiner Lohaus Date: Mon, 22 Apr 2024 01:27:48 +0200 Subject: Add vision models to readme --- README.md | 24 ++++----- etc/tool/readme_table.py | 14 ++--- g4f/Provider/Bing.py | 3 +- g4f/Provider/BingCreateImages.py | 1 + g4f/Provider/DeepInfra.py | 33 ++++++++---- g4f/Provider/DeepInfraImage.py | 1 + g4f/Provider/MetaAIAccount.py | 1 + g4f/Provider/ReplicateImage.py | 1 + g4f/Provider/You.py | 22 +++++--- g4f/Provider/needs_auth/Gemini.py | 87 ++++++++++++++++++++++---------- g4f/Provider/needs_auth/OpenaiAccount.py | 1 + g4f/Provider/needs_auth/OpenaiChat.py | 1 + g4f/gui/server/api.py | 14 +++-- g4f/image.py | 9 +++- 14 files changed, 142 insertions(+), 70 deletions(-) diff --git a/README.md b/README.md index 552569a2..66398659 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,7 @@ As per the survey, here is a list of improvements to come ```sh docker pull hlohaus789/g4f -docker run -p 8080:8080 -p 1337:1337 -p 7900:7900 --shm-size="2g" -v ${PWD}/hardir:/app/har_and_cookies hlohaus789/g4f:latest +docker run -p 8080:8080 -p 1337:1337 -p 7900:7900 --shm-size="2g" -v ${PWD}/har_and_cookies:/app/har_and_cookies hlohaus789/g4f:latest ``` 3. **Access the Client:** @@ -400,17 +400,17 @@ While we wait for gpt-5, here is a list of new models that are at least better t | openchat_3.5 | Huggingface | 2+ Providers | [huggingface.co](https://huggingface.co/) | | pi | Inflection | g4f.Provider.Pi | [inflection.ai](https://inflection.ai/) | -### Image Models - -| Label | Provider | Model | Website | -| ----- | -------- | ----- | ------- | -| Microsoft Designer | Bing | dall-e | [bing.com](https://www.bing.com/images/create) | -| OpenAI ChatGPT | Openai | dall-e | [chat.openai.com](https://chat.openai.com) | -| You.com | You | dall-e | [you.com](https://you.com) | -| DeepInfraImage | DeepInfra | stability-ai/sdxl | [deepinfra.com](https://deepinfra.com) | -| ReplicateImage | Replicate | stability-ai/sdxl | [replicate.com](https://replicate.com) | -| Gemini | Gemini | gemini | [gemini.google.com](https://gemini.google.com) | -| Meta AI | MetaAI | meta | [meta.ai](https://www.meta.ai) | +### Image and Vision Models + +| Label | Provider | Image Model | Vision Model | Website | +| ----- | -------- | ----------- | ------------ | ------- | +| Microsoft Copilot in Bing | `g4f.Provider.Bing` | dall-e| gpt-4-vision | [bing.com](https://bing.com/chat) | +| DeepInfra | `g4f.Provider.DeepInfra` | stability-ai/sdxl| llava-1.5-7b-hf | [deepinfra.com](https://deepinfra.com) | +| Gemini | `g4f.Provider.Gemini` | gemini| gemini | [gemini.google.com](https://gemini.google.com) | +| Meta AI | `g4f.Provider.MetaAI` | meta| ❌ | [meta.ai](https://www.meta.ai) | +| OpenAI ChatGPT | `g4f.Provider.OpenaiChat` | dall-e| gpt-4-vision | [chat.openai.com](https://chat.openai.com) | +| Replicate | `g4f.Provider.Replicate` | stability-ai/sdxl| ❌ | [replicate.com](https://replicate.com) | +| You.com | `g4f.Provider.You` | dall-e| agent | [you.com](https://you.com) | ## 🔗 Powered by gpt4free diff --git a/etc/tool/readme_table.py b/etc/tool/readme_table.py index b56e4bca..439b17fa 100644 --- a/etc/tool/readme_table.py +++ b/etc/tool/readme_table.py @@ -127,8 +127,8 @@ def print_models(): def print_image_models(): lines = [ - "| Label | Provider | Model | Website |", - "| ----- | -------- | ----- | ------- |", + "| Label | Provider | Image Model | Vision Model | Website |", + "| ----- | -------- | ----------- | ------------ | ------- |", ] from g4f.gui.server.api import Api for image_model in Api.get_image_models(): @@ -136,13 +136,15 @@ def print_image_models(): netloc = urlparse(provider_url).netloc.replace("www.", "") website = f"[{netloc}]({provider_url})" label = image_model["provider"] if image_model["label"] is None else image_model["label"] - lines.append(f'| {label} | {image_model["provider"]} | {image_model["image_model"]} | {website} |') + if image_model["vision_model"] is None: + image_model["vision_model"] = "❌" + lines.append(f'| {label} | `g4f.Provider.{image_model["provider"]}` | {image_model["image_model"]}| {image_model["vision_model"]} | {website} |') print("\n".join(lines)) if __name__ == "__main__": - print_providers() - print("\n", "-" * 50, "\n") - print_models() + #print_providers() + #print("\n", "-" * 50, "\n") + #print_models() print("\n", "-" * 50, "\n") print_image_models() \ No newline at end of file diff --git a/g4f/Provider/Bing.py b/g4f/Provider/Bing.py index 1fe94359..bfd74f8c 100644 --- a/g4f/Provider/Bing.py +++ b/g4f/Provider/Bing.py @@ -38,8 +38,9 @@ class Bing(AsyncGeneratorProvider, ProviderModelMixin): supports_message_history = True supports_gpt_4 = True default_model = "Balanced" + default_vision_model = "gpt-4-vision" models = [getattr(Tones, key) for key in Tones.__dict__ if not key.startswith("__")] - + @classmethod def create_async_generator( cls, diff --git a/g4f/Provider/BingCreateImages.py b/g4f/Provider/BingCreateImages.py index 60ecff07..69bf1e92 100644 --- a/g4f/Provider/BingCreateImages.py +++ b/g4f/Provider/BingCreateImages.py @@ -13,6 +13,7 @@ from .bing.create_images import create_images, create_session, get_cookies_from_ class BingCreateImages(AsyncGeneratorProvider, ProviderModelMixin): label = "Microsoft Designer" + parent = "Bing" url = "https://www.bing.com/images/create" working = True needs_auth = True diff --git a/g4f/Provider/DeepInfra.py b/g4f/Provider/DeepInfra.py index 971424b7..35ff84a1 100644 --- a/g4f/Provider/DeepInfra.py +++ b/g4f/Provider/DeepInfra.py @@ -1,17 +1,22 @@ from __future__ import annotations import requests -from ..typing import AsyncResult, Messages +from ..typing import AsyncResult, Messages, ImageType +from ..image import to_data_uri from .needs_auth.Openai import Openai class DeepInfra(Openai): label = "DeepInfra" url = "https://deepinfra.com" working = True - needs_auth = False + has_auth = True supports_stream = True supports_message_history = True - default_model = 'HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1' + default_model = "meta-llama/Meta-Llama-3-70b-instruct" + default_vision_model = "llava-hf/llava-1.5-7b-hf" + model_aliases = { + 'mixtral-8x22b': 'HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1' + } @classmethod def get_models(cls): @@ -27,19 +32,12 @@ class DeepInfra(Openai): model: str, messages: Messages, stream: bool, + image: ImageType = None, api_base: str = "https://api.deepinfra.com/v1/openai", temperature: float = 0.7, max_tokens: int = 1028, **kwargs ) -> AsyncResult: - - if not '/' in model: - models = { - 'mixtral-8x22b': 'HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1', - 'dbrx-instruct': 'databricks/dbrx-instruct', - } - model = models.get(model, model) - headers = { 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'en-US', @@ -55,6 +53,19 @@ class DeepInfra(Openai): 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"macOS"', } + if image is not None: + if not model: + model = cls.default_vision_model + messages[-1]["content"] = [ + { + "type": "image_url", + "image_url": {"url": to_data_uri(image)} + }, + { + "type": "text", + "text": messages[-1]["content"] + } + ] return super().create_async_generator( model, messages, stream=stream, diff --git a/g4f/Provider/DeepInfraImage.py b/g4f/Provider/DeepInfraImage.py index 8e56e513..46a5c2e2 100644 --- a/g4f/Provider/DeepInfraImage.py +++ b/g4f/Provider/DeepInfraImage.py @@ -9,6 +9,7 @@ from ..image import ImageResponse class DeepInfraImage(AsyncGeneratorProvider, ProviderModelMixin): url = "https://deepinfra.com" + parent = "DeepInfra" working = True default_model = 'stability-ai/sdxl' image_models = [default_model] diff --git a/g4f/Provider/MetaAIAccount.py b/g4f/Provider/MetaAIAccount.py index d334393d..369b3f2f 100644 --- a/g4f/Provider/MetaAIAccount.py +++ b/g4f/Provider/MetaAIAccount.py @@ -6,6 +6,7 @@ from .MetaAI import MetaAI class MetaAIAccount(MetaAI): needs_auth = True + parent = "MetaAI" image_models = ["meta"] @classmethod diff --git a/g4f/Provider/ReplicateImage.py b/g4f/Provider/ReplicateImage.py index 5d001604..cc3943d7 100644 --- a/g4f/Provider/ReplicateImage.py +++ b/g4f/Provider/ReplicateImage.py @@ -11,6 +11,7 @@ from ..errors import ResponseError class ReplicateImage(AsyncGeneratorProvider, ProviderModelMixin): url = "https://replicate.com" + parent = "Replicate" working = True default_model = 'stability-ai/sdxl' default_versions = [ diff --git a/g4f/Provider/You.py b/g4f/Provider/You.py index 61069503..a9e7834c 100644 --- a/g4f/Provider/You.py +++ b/g4f/Provider/You.py @@ -14,13 +14,16 @@ from .you.har_file import get_telemetry_ids from .. import debug class You(AsyncGeneratorProvider, ProviderModelMixin): + label = "You.com" url = "https://you.com" working = True supports_gpt_35_turbo = True supports_gpt_4 = True default_model = "gpt-3.5-turbo" + default_vision_model = "agent" + image_models = ["dall-e"] models = [ - "gpt-3.5-turbo", + default_model, "gpt-4", "gpt-4-turbo", "claude-instant", @@ -29,12 +32,12 @@ class You(AsyncGeneratorProvider, ProviderModelMixin): "claude-3-sonnet", "gemini-pro", "zephyr", - "dall-e", + default_vision_model, + *image_models ] model_aliases = { "claude-v2": "claude-2" } - image_models = ["dall-e"] _cookies = None _cookies_used = 0 _telemetry_ids = [] @@ -52,7 +55,7 @@ class You(AsyncGeneratorProvider, ProviderModelMixin): chat_mode: str = "default", **kwargs, ) -> AsyncResult: - if image is not None: + if image is not None or model == cls.default_vision_model: chat_mode = "agent" elif not model or model == cls.default_model: ... @@ -63,13 +66,18 @@ class You(AsyncGeneratorProvider, ProviderModelMixin): chat_mode = "custom" model = cls.get_model(model) async with StreamSession( - proxies={"all": proxy}, + proxy=proxy, impersonate="chrome", timeout=(30, timeout) ) as session: cookies = await cls.get_cookies(session) if chat_mode != "default" else None - - upload = json.dumps([await cls.upload_file(session, cookies, to_bytes(image), image_name)]) if image else "" + upload = "" + if image is not None: + upload_file = await cls.upload_file( + session, cookies, + to_bytes(image), image_name + ) + upload = json.dumps([upload_file]) headers = { "Accept": "text/event-stream", "Referer": f"{cls.url}/search?fromSearchBar=true&tbm=youchat", diff --git a/g4f/Provider/needs_auth/Gemini.py b/g4f/Provider/needs_auth/Gemini.py index 3917df80..209c2e91 100644 --- a/g4f/Provider/needs_auth/Gemini.py +++ b/g4f/Provider/needs_auth/Gemini.py @@ -16,6 +16,7 @@ try: except ImportError: pass +from ... import debug from ...typing import Messages, Cookies, ImageType, AsyncResult from ..base_provider import AsyncGeneratorProvider from ..helper import format_prompt, get_cookies @@ -54,6 +55,55 @@ class Gemini(AsyncGeneratorProvider): needs_auth = True working = True image_models = ["gemini"] + default_vision_model = "gemini" + _cookies: Cookies = None + + @classmethod + async def nodriver_login(cls) -> Cookies: + try: + import nodriver as uc + except ImportError: + return + try: + from platformdirs import user_config_dir + user_data_dir = user_config_dir("g4f-nodriver") + except: + user_data_dir = None + if debug.logging: + print(f"Open nodriver with user_dir: {user_data_dir}") + browser = await uc.start(user_data_dir=user_data_dir) + page = await browser.get(f"{cls.url}/app") + await page.select("div.ql-editor.textarea", 240) + cookies = {} + for c in await page.browser.cookies.get_all(): + if c.domain.endswith(".google.com"): + cookies[c.name] = c.value + await page.close() + return cookies + + @classmethod + async def webdriver_login(cls, proxy: str): + driver = None + try: + driver = get_browser(proxy=proxy) + try: + driver.get(f"{cls.url}/app") + WebDriverWait(driver, 5).until( + EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea")) + ) + except: + login_url = os.environ.get("G4F_LOGIN_URL") + if login_url: + yield f"Please login: [Google Gemini]({login_url})\n\n" + WebDriverWait(driver, 240).until( + EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea")) + ) + cls._cookies = get_driver_cookies(driver) + except MissingRequirementsError: + pass + finally: + if driver: + driver.close() @classmethod async def create_async_generator( @@ -73,47 +123,30 @@ class Gemini(AsyncGeneratorProvider): if cookies is None: cookies = {} cookies["__Secure-1PSID"] = api_key - cookies = cookies if cookies else get_cookies(".google.com", False, True) + cls._cookies = cookies or cls._cookies or get_cookies(".google.com", False, True) base_connector = get_connector(connector, proxy) async with ClientSession( headers=REQUEST_HEADERS, connector=base_connector ) as session: - snlm0e = await cls.fetch_snlm0e(session, cookies) if cookies else None + snlm0e = await cls.fetch_snlm0e(session, cls._cookies) if cls._cookies else None if not snlm0e: - driver = None - try: - driver = get_browser(proxy=proxy) - try: - driver.get(f"{cls.url}/app") - WebDriverWait(driver, 5).until( - EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea")) - ) - except: - login_url = os.environ.get("G4F_LOGIN_URL") - if login_url: - yield f"Please login: [Google Gemini]({login_url})\n\n" - WebDriverWait(driver, 240).until( - EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea")) - ) - cookies = get_driver_cookies(driver) - except MissingRequirementsError: - pass - finally: - if driver: - driver.close() + cls._cookies = await cls.nodriver_login(); + if cls._cookies is None: + async for chunk in cls.webdriver_login(proxy): + yield chunk if not snlm0e: - if "__Secure-1PSID" not in cookies: + if "__Secure-1PSID" not in cls._cookies: raise MissingAuthError('Missing "__Secure-1PSID" cookie') - snlm0e = await cls.fetch_snlm0e(session, cookies) + snlm0e = await cls.fetch_snlm0e(session, cls._cookies) if not snlm0e: - raise RuntimeError("Invalid auth. SNlM0e not found") + raise RuntimeError("Invalid cookies. SNlM0e not found") image_url = await cls.upload_image(base_connector, to_bytes(image), image_name) if image else None async with ClientSession( - cookies=cookies, + cookies=cls._cookies, headers=REQUEST_HEADERS, connector=base_connector, ) as client: diff --git a/g4f/Provider/needs_auth/OpenaiAccount.py b/g4f/Provider/needs_auth/OpenaiAccount.py index 6260d343..16bfff66 100644 --- a/g4f/Provider/needs_auth/OpenaiAccount.py +++ b/g4f/Provider/needs_auth/OpenaiAccount.py @@ -4,4 +4,5 @@ from .OpenaiChat import OpenaiChat class OpenaiAccount(OpenaiChat): needs_auth = True + parent = "OpenaiChat" image_models = ["dall-e"] \ No newline at end of file diff --git a/g4f/Provider/needs_auth/OpenaiChat.py b/g4f/Provider/needs_auth/OpenaiChat.py index 3d6e9858..515230f0 100644 --- a/g4f/Provider/needs_auth/OpenaiChat.py +++ b/g4f/Provider/needs_auth/OpenaiChat.py @@ -44,6 +44,7 @@ class OpenaiChat(AsyncGeneratorProvider, ProviderModelMixin): supports_message_history = True supports_system_message = True default_model = None + default_vision_model = "gpt-4-vision" models = ["gpt-3.5-turbo", "gpt-4", "gpt-4-gizmo"] model_aliases = { "text-davinci-002-render-sha": "gpt-3.5-turbo", diff --git a/g4f/gui/server/api.py b/g4f/gui/server/api.py index 435700ea..3d9f6a1c 100644 --- a/g4f/gui/server/api.py +++ b/g4f/gui/server/api.py @@ -45,16 +45,20 @@ class Api(): @staticmethod def get_image_models() -> list[dict]: image_models = [] - for key, provider in __map__.items(): + for provider in __providers__: if hasattr(provider, "image_models"): if hasattr(provider, "get_models"): provider.get_models() + parent = provider + if hasattr(provider, "parent"): + parent = __map__[provider.parent] for model in provider.image_models: image_models.append({ - "provider": key, - "url": provider.url, - "label": provider.label if hasattr(provider, "label") else None, - "image_model": model + "provider": parent.__name__, + "url": parent.url, + "label": parent.label if hasattr(parent, "label") else None, + "image_model": model, + "vision_model": parent.default_vision_model if hasattr(parent, "default_vision_model") else None }) return image_models diff --git a/g4f/image.py b/g4f/image.py index ed8af103..270b59ad 100644 --- a/g4f/image.py +++ b/g4f/image.py @@ -86,7 +86,7 @@ def is_data_uri_an_image(data_uri: str) -> bool: if image_format not in ALLOWED_EXTENSIONS and image_format != "svg+xml": raise ValueError("Invalid image format (from mime file type).") -def is_accepted_format(binary_data: bytes) -> bool: +def is_accepted_format(binary_data: bytes) -> str: """ Checks if the given binary data represents an image with an accepted format. @@ -241,6 +241,13 @@ def to_bytes(image: ImageType) -> bytes: else: return image.read() +def to_data_uri(image: ImageType) -> str: + if not isinstance(image, str): + data = to_bytes(image) + data_base64 = base64.b64encode(data).decode() + return f"data:{is_accepted_format(data)};base64,{data_base64}" + return image + class ImageResponse: def __init__( self, -- cgit v1.2.3