From 2fad27b2c5ce6b05591d2921c7bafefa2de7c9b5 Mon Sep 17 00:00:00 2001 From: abc <98614666+xtekky@users.noreply.github.com> Date: Sat, 13 Apr 2024 03:09:11 +0100 Subject: new gpt-4 beating opensource models --- README.md | 15 ++++++++------- g4f/Provider/HuggingChat.py | 14 +++++++------- g4f/Provider/PerplexityLabs.py | 5 +++-- g4f/models.py | 11 +++++++++-- 4 files changed, 27 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 038ab1dc..4fab92b3 100644 --- a/README.md +++ b/README.md @@ -281,13 +281,14 @@ set G4F_PROXY=http://host:port | [beta.theb.ai](https://beta.theb.ai) | `g4f.Provider.Theb` | ✔️ | ✔️ | ✔️ | ![Unknown](https://img.shields.io/badge/Unknown-grey) | ❌ | | [you.com](https://you.com) | `g4f.Provider.You` | ✔️ | ✔️ | ✔️ | ![Unknown](https://img.shields.io/badge/Unknown-grey) | ❌ | -## New OpenSource Models -While we wait for gpt-5, here is a list of new models that are at least better than gpt-3.5-turbo. Some rival gpt-4. Expect this list to grow. - -| Website | Provider | parameters | -| ------ | ------- | ------ | -| [mixtral-8x22b](https://huggingface.co/mistral-community/Mixtral-8x22B-v0.1) | `g4f.Provider.DeepInfra` | 176B / 44b active | -| [dbrx-instruct](https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm) | `g4f.Provider.DeepInfra` | 132B / 36B active| +## Best OpenSource Models +While we wait for gpt-5, here is a list of new models that are at least better than gpt-3.5-turbo. **Some are better than gpt-4**. Expect this list to grow. + +| Website | Provider | parameters | better than | +| ------ | ------- | ------ | ------ | +| [mixtral-8x22b](https://huggingface.co/mistral-community/Mixtral-8x22B-v0.1) | `g4f.Provider.DeepInfra` | 176B / 44b active | gpt-3.5-turbo | +| [dbrx-instruct](https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm) | `g4f.Provider.DeepInfra` | 132B / 36B active| gpt-3.5-turbo | +| [command-r+](https://txt.cohere.com/command-r-plus-microsoft-azure/) | `g4f.Provider.HuggingChat` | 104B | gpt-4-0613 | ### GPT-3.5 diff --git a/g4f/Provider/HuggingChat.py b/g4f/Provider/HuggingChat.py index b80795fe..882edb78 100644 --- a/g4f/Provider/HuggingChat.py +++ b/g4f/Provider/HuggingChat.py @@ -14,13 +14,12 @@ class HuggingChat(AsyncGeneratorProvider, ProviderModelMixin): working = True default_model = "mistralai/Mixtral-8x7B-Instruct-v0.1" models = [ - "mistralai/Mixtral-8x7B-Instruct-v0.1", - "google/gemma-7b-it", - "meta-llama/Llama-2-70b-chat-hf", - "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", - "codellama/CodeLlama-34b-Instruct-hf", - "mistralai/Mistral-7B-Instruct-v0.2", - "openchat/openchat-3.5-0106", + "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1", + 'CohereForAI/c4ai-command-r-plus', + 'mistralai/Mixtral-8x7B-Instruct-v0.1', + 'google/gemma-1.1-7b-it', + 'NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO', + 'mistralai/Mistral-7B-Instruct-v0.2' ] model_aliases = { "openchat/openchat_3.5": "openchat/openchat-3.5-0106", @@ -48,6 +47,7 @@ class HuggingChat(AsyncGeneratorProvider, ProviderModelMixin): **kwargs ) -> AsyncResult: options = {"model": cls.get_model(model)} + system_prompt = "\n".join([message["content"] for message in messages if message["role"] == "system"]) if system_prompt: options["preprompt"] = system_prompt diff --git a/g4f/Provider/PerplexityLabs.py b/g4f/Provider/PerplexityLabs.py index 6c80efee..ba956100 100644 --- a/g4f/Provider/PerplexityLabs.py +++ b/g4f/Provider/PerplexityLabs.py @@ -19,13 +19,14 @@ class PerplexityLabs(AsyncGeneratorProvider, ProviderModelMixin): "sonar-small-online", "sonar-medium-online", "sonar-small-chat", "sonar-medium-chat", "mistral-7b-instruct", "codellama-70b-instruct", "llava-v1.5-7b-wrapper", "llava-v1.6-34b", "mixtral-8x7b-instruct", "gemma-2b-it", "gemma-7b-it" - "mistral-medium", "related" + "mistral-medium", "related", "dbrx-instruct" ] model_aliases = { "mistralai/Mistral-7B-Instruct-v0.1": "mistral-7b-instruct", "mistralai/Mixtral-8x7B-Instruct-v0.1": "mixtral-8x7b-instruct", "codellama/CodeLlama-70b-Instruct-hf": "codellama-70b-instruct", - "llava-v1.5-7b": "llava-v1.5-7b-wrapper" + "llava-v1.5-7b": "llava-v1.5-7b-wrapper", + 'databricks/dbrx-instruct': "dbrx-instruct" } @classmethod diff --git a/g4f/models.py b/g4f/models.py index ee8d83f8..ff8b3a9c 100644 --- a/g4f/models.py +++ b/g4f/models.py @@ -165,7 +165,7 @@ mistral_7b_v02 = Model( mixtral_8x22b = Model( name = "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1", base_provider = "huggingface", - best_provider = DeepInfra + best_provider = RetryProvider([HuggingChat, DeepInfra]) ) # Misc models @@ -269,7 +269,13 @@ pi = Model( dbrx_instruct = Model( name = 'databricks/dbrx-instruct', base_provider = 'mistral', - best_provider = DeepInfra + best_provider = RetryProvider([DeepInfra, PerplexityLabs]) +) + +command_r_plus = Model( + name = 'CohereForAI/c4ai-command-r-plus', + base_provider = 'mistral', + best_provider = HuggingChat ) class ModelUtils: @@ -324,6 +330,7 @@ class ModelUtils: 'claude-3-sonnet': claude_3_sonnet, # other + 'command-r+': command_r_plus, 'dbrx-instruct': dbrx_instruct, 'lzlv-70b': lzlv_70b, 'airoboros-70b': airoboros_70b, -- cgit v1.2.3