diff options
Diffstat (limited to 'tool')
-rw-r--r-- | tool/provider_init.py | 33 | ||||
-rw-r--r-- | tool/readme_table.py | 103 | ||||
-rw-r--r-- | tool/vercel.py | 103 |
3 files changed, 239 insertions, 0 deletions
diff --git a/tool/provider_init.py b/tool/provider_init.py new file mode 100644 index 00000000..22f21d4d --- /dev/null +++ b/tool/provider_init.py @@ -0,0 +1,33 @@ +from pathlib import Path + + +def main(): + content = create_content() + with open("g4f/provider/__init__.py", "w", encoding="utf-8") as f: + f.write(content) + + +def create_content(): + path = Path() + paths = path.glob("g4f/provider/*.py") + paths = [p for p in paths if p.name not in ["__init__.py", "base_provider.py"]] + classnames = [p.stem for p in paths] + + import_lines = [f"from .{name} import {name}" for name in classnames] + import_content = "\n".join(import_lines) + + classnames.insert(0, "BaseProvider") + all_content = [f' "{name}"' for name in classnames] + all_content = ",\n".join(all_content) + all_content = f"__all__ = [\n{all_content},\n]" + + return f"""from .base_provider import BaseProvider +{import_content} + + +{all_content} +""" + + +if __name__ == "__main__": + main() diff --git a/tool/readme_table.py b/tool/readme_table.py new file mode 100644 index 00000000..a4d07adf --- /dev/null +++ b/tool/readme_table.py @@ -0,0 +1,103 @@ +import re +import sys +from pathlib import Path +from urllib.parse import urlparse + +sys.path.append(str(Path(__file__).parent.parent)) + +from g4f import models, provider +from g4f.provider.base_provider import BaseProvider + + +def main(): + print_providers() + print("\n", "-" * 50, "\n") + print_models() + + +def print_providers(): + lines = [ + "| Website| Provider| gpt-3.5 | gpt-4 | Streaming | Status | Auth |", + "| ------ | ------- | ------- | ----- | --------- | ------ | ---- |", + ] + providers = get_providers() + for _provider in providers: + netloc = urlparse(_provider.url).netloc + website = f"[{netloc}]({_provider.url})" + + provider_name = f"g4f.provider.{_provider.__name__}" + + has_gpt_35 = "✔️" if _provider.supports_gpt_35_turbo else "❌" + has_gpt_4 = "✔️" if _provider.supports_gpt_4 else "❌" + stream = "✔️" if _provider.supports_stream else "❌" + status = ( + "![Active](https://img.shields.io/badge/Active-brightgreen)" + if _provider.working + else "![Inactive](https://img.shields.io/badge/Inactive-red)" + ) + auth = "✔️" if _provider.needs_auth else "❌" + + lines.append( + f"| {website} | {provider_name} | {has_gpt_35} | {has_gpt_4} | {stream} | {status} | {auth} |" + ) + print("\n".join(lines)) + + +def get_providers() -> list[type[BaseProvider]]: + provider_names = dir(provider) + ignore_names = [ + "base_provider", + "BaseProvider", + ] + provider_names = [ + provider_name + for provider_name in provider_names + if not provider_name.startswith("__") and provider_name not in ignore_names + ] + return [getattr(provider, provider_name) for provider_name in provider_names] + + +def print_models(): + base_provider_names = { + "cohere": "Cohere", + "google": "Google", + "openai": "OpenAI", + "anthropic": "Anthropic", + "replicate": "Replicate", + "huggingface": "Huggingface", + } + provider_urls = { + "Bard": "https://bard.google.com/", + "H2o": "https://www.h2o.ai/", + "Vercel": "https://sdk.vercel.ai/", + } + + lines = [ + "| Model | Base Provider | Provider | Website |", + "| ----- | ------------- | -------- | ------- |", + ] + + _models = get_models() + for model in _models: + split_name = re.split(r":|/", model.name) + name = split_name[-1] + + base_provider = base_provider_names[model.base_provider] + provider_name = f"g4f.provider.{model.best_provider.__name__}" + + provider_url = provider_urls[model.best_provider.__name__] + netloc = urlparse(provider_url).netloc + website = f"[{netloc}]({provider_url})" + lines.append(f"| {name} | {base_provider} | {provider_name} | {website} |") + + print("\n".join(lines)) + + +def get_models(): + _models = [item[1] for item in models.__dict__.items()] + _models = [model for model in _models if type(model) is models.Model] + return [model for model in _models if model.name not in ["gpt-3.5-turbo", "gpt-4"]] + + +if __name__ == "__main__": + main() diff --git a/tool/vercel.py b/tool/vercel.py new file mode 100644 index 00000000..7b87e298 --- /dev/null +++ b/tool/vercel.py @@ -0,0 +1,103 @@ +import json +import re +from typing import Any + +import quickjs +from curl_cffi import requests + +session = requests.Session(impersonate="chrome107") + + +def get_model_info() -> dict[str, Any]: + url = "https://sdk.vercel.ai" + response = session.get(url) + html = response.text + paths_regex = r"static\/chunks.+?\.js" + separator_regex = r'"\]\)<\/script><script>self\.__next_f\.push\(\[.,"' + + paths = re.findall(paths_regex, html) + paths = [re.sub(separator_regex, "", path) for path in paths] + paths = list(set(paths)) + + urls = [f"{url}/_next/{path}" for path in paths] + scripts = [session.get(url).text for url in urls] + + for script in scripts: + models_regex = r'let .="\\n\\nHuman:\",r=(.+?),.=' + matches = re.findall(models_regex, script) + + if matches: + models_str = matches[0] + stop_sequences_regex = r"(?<=stopSequences:{value:\[)\D(?<!\])" + models_str = re.sub( + stop_sequences_regex, re.escape('"\\n\\nHuman:"'), models_str + ) + + context = quickjs.Context() # type: ignore + json_str: str = context.eval(f"({models_str})").json() # type: ignore + return json.loads(json_str) # type: ignore + + return {} + + +def convert_model_info(models: dict[str, Any]) -> dict[str, Any]: + model_info: dict[str, Any] = {} + for model_name, params in models.items(): + default_params = params_to_default_params(params["parameters"]) + model_info[model_name] = {"id": params["id"], "default_params": default_params} + return model_info + + +def params_to_default_params(parameters: dict[str, Any]): + defaults: dict[str, Any] = {} + for key, parameter in parameters.items(): + if key == "maximumLength": + key = "maxTokens" + defaults[key] = parameter["value"] + return defaults + + +def get_model_names(model_info: dict[str, Any]): + model_names = model_info.keys() + model_names = [ + name + for name in model_names + if name not in ["openai:gpt-4", "openai:gpt-3.5-turbo"] + ] + model_names.sort() + return model_names + + +def print_providers(model_names: list[str]): + for name in model_names: + split_name = re.split(r":|/", name) + base_provider = split_name[0] + variable_name = split_name[-1].replace("-", "_").replace(".", "") + line = f'{variable_name} = Model(name="{name}", base_provider="{base_provider}", best_provider=Vercel,)\n' + print(line) + + +def print_convert(model_names: list[str]): + for name in model_names: + split_name = re.split(r":|/", name) + key = split_name[-1] + variable_name = split_name[-1].replace("-", "_").replace(".", "") + # "claude-instant-v1": claude_instant_v1, + line = f' "{key}": {variable_name},' + print(line) + + +def main(): + model_info = get_model_info() + model_info = convert_model_info(model_info) + print(json.dumps(model_info, indent=2)) + + model_names = get_model_names(model_info) + print("-------" * 40) + print_providers(model_names) + print("-------" * 40) + print_convert(model_names) + + +if __name__ == "__main__": + main() |