From 4b41a8f4e83d4b455b1196f09def14828c73c12f Mon Sep 17 00:00:00 2001 From: Heiner Lohaus Date: Fri, 9 Feb 2024 04:24:21 +0100 Subject: Add example for Image Upload & Generation --- README.md | 26 +++++++++++++++++++++++++- g4f/Provider/needs_auth/Gemini.py | 39 +++++++++++++++++++++------------------ g4f/__init__.py | 2 +- 3 files changed, 47 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 288f886f..9db1679d 100644 --- a/README.md +++ b/README.md @@ -316,7 +316,7 @@ For generating images with Bing and for the OpenAi Chat you need cookies or a t ```python from g4f import set_cookies -set_cookies(".bing", { +set_cookies(".bing.com", { "_U": "cookie value" }) set_cookies("chat.openai.com", { @@ -336,6 +336,30 @@ pip install browser_cookie3 pip install g4f[webdriver] ``` +##### Image Upload & Generation + +Image upload and generation are supported by three main providers: + +- **Bing & Other GPT-4 Providers:** Utilizes Microsoft's Image Creator. +- **Google Gemini:** Available for free accounts with IP addresses outside Europe. +- **OpenaiChat with GPT-4:** Accessible for users with a Plus subscription. + +```python +import g4f + +# Setting up the request for image creation +response = g4f.ChatCompletion.create( + model=g4f.models.default, # Using the default model + provider=g4f.Provider.Gemini, # Specifying the provider as Gemini + messages=[{"role": "user", "content": "Create an image like this"}], + image=open("images/g4f.png", "rb"), # Image input can be a data URI, bytes, PIL Image, or IO object + image_name="g4f.png" # Optional: specifying the filename +) + +# Displaying the response +print(response) +``` + ##### Using Browser Some providers using a browser to bypass the bot protection. They using the selenium webdriver to control the browser. The browser settings and the login data are saved in a custom directory. If the headless mode is enabled, the browser windows are loaded invisibly. For performance reasons, it is recommended to reuse the browser instances and close them yourself at the end: diff --git a/g4f/Provider/needs_auth/Gemini.py b/g4f/Provider/needs_auth/Gemini.py index da7230dd..402fc02f 100644 --- a/g4f/Provider/needs_auth/Gemini.py +++ b/g4f/Provider/needs_auth/Gemini.py @@ -65,25 +65,28 @@ class Gemini(AsyncGeneratorProvider): ) -> AsyncResult: prompt = format_prompt(messages) - try: - driver = get_browser(proxy=proxy) + if not cookies: + driver = None try: - driver.get(f"{cls.url}/app") - WebDriverWait(driver, 5).until( - EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea")) - ) - except: - login_url = os.environ.get("G4F_LOGIN_URL") - if login_url: - yield f"Please login: [Google Gemini]({login_url})\n\n" - WebDriverWait(driver, 240).until( - EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea")) - ) - cookies = get_driver_cookies(driver) - except MissingRequirementsError: - pass - finally: - driver.close() + driver = get_browser(proxy=proxy) + try: + driver.get(f"{cls.url}/app") + WebDriverWait(driver, 5).until( + EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea")) + ) + except: + login_url = os.environ.get("G4F_LOGIN_URL") + if login_url: + yield f"Please login: [Google Gemini]({login_url})\n\n" + WebDriverWait(driver, 240).until( + EC.visibility_of_element_located((By.CSS_SELECTOR, "div.ql-editor.textarea")) + ) + cookies = get_driver_cookies(driver) + except MissingRequirementsError: + pass + finally: + if driver: + driver.close() if not cookies: cookies = get_cookies(".google.com", False) diff --git a/g4f/__init__.py b/g4f/__init__.py index d76d70b5..34c8aa19 100644 --- a/g4f/__init__.py +++ b/g4f/__init__.py @@ -136,7 +136,7 @@ class ChatCompletion: provider = patch_provider(provider) result = provider.create_completion(model, messages, stream, **kwargs) - return result if stream else ''.join(result) + return result if stream else ''.join([str(chunk) for chunk in result]) @staticmethod def create_async(model : Union[Model, str], -- cgit v1.2.3