From b7342b1f130aa867eec17d973b0cab00b16a4507 Mon Sep 17 00:00:00 2001 From: abc <98614666+xtekky@users.noreply.github.com> Date: Mon, 11 Mar 2024 18:26:34 +0000 Subject: ~ | support local llm inference --- .gitignore | 2 + g4f/local/__init__.py | 109 ++++++++++++++++++++++++++++++++++++++++++++ g4f/local/core/engine.py | 42 +++++++++++++++++ g4f/local/core/models.py | 86 ++++++++++++++++++++++++++++++++++ g4f/local/models/model-here | 1 + 5 files changed, 240 insertions(+) create mode 100644 g4f/local/__init__.py create mode 100644 g4f/local/core/engine.py create mode 100644 g4f/local/core/models.py create mode 100644 g4f/local/models/model-here diff --git a/.gitignore b/.gitignore index 5e00e16b..2ee81ffe 100644 --- a/.gitignore +++ b/.gitignore @@ -50,3 +50,5 @@ prv.py x.js x.py info.txt +local.py +*.gguf \ No newline at end of file diff --git a/g4f/local/__init__.py b/g4f/local/__init__.py new file mode 100644 index 00000000..626643fc --- /dev/null +++ b/g4f/local/__init__.py @@ -0,0 +1,109 @@ +import random, string, time, re + +from ..typing import Union, Iterator, Messages +from ..stubs import ChatCompletion, ChatCompletionChunk +from .core.engine import LocalProvider +from .core.models import models + +IterResponse = Iterator[Union[ChatCompletion, ChatCompletionChunk]] + +def read_json(text: str) -> dict: + match = re.search(r"```(json|)\n(?P[\S\s]+?)\n```", text) + if match: + return match.group("code") + return text + +def iter_response( + response: Iterator[str], + stream: bool, + response_format: dict = None, + max_tokens: int = None, + stop: list = None +) -> IterResponse: + + content = "" + finish_reason = None + completion_id = ''.join(random.choices(string.ascii_letters + string.digits, k=28)) + for idx, chunk in enumerate(response): + content += str(chunk) + if max_tokens is not None and idx + 1 >= max_tokens: + finish_reason = "length" + first = -1 + word = None + if stop is not None: + for word in list(stop): + first = content.find(word) + if first != -1: + content = content[:first] + break + if stream and first != -1: + first = chunk.find(word) + if first != -1: + chunk = chunk[:first] + else: + first = 0 + if first != -1: + finish_reason = "stop" + if stream: + yield ChatCompletionChunk(chunk, None, completion_id, int(time.time())) + if finish_reason is not None: + break + finish_reason = "stop" if finish_reason is None else finish_reason + if stream: + yield ChatCompletionChunk(None, finish_reason, completion_id, int(time.time())) + else: + if response_format is not None and "type" in response_format: + if response_format["type"] == "json_object": + content = read_json(content) + yield ChatCompletion(content, finish_reason, completion_id, int(time.time())) + +def filter_none(**kwargs): + for key in list(kwargs.keys()): + if kwargs[key] is None: + del kwargs[key] + return kwargs + +class LocalClient(): + def __init__( + self, + **kwargs + ) -> None: + self.chat: Chat = Chat(self) + + @staticmethod + def list_models(): + return list(models.keys()) + +class Completions(): + def __init__(self, client: LocalClient): + self.client: LocalClient = client + + def create( + self, + messages: Messages, + model: str, + stream: bool = False, + response_format: dict = None, + max_tokens: int = None, + stop: Union[list[str], str] = None, + **kwargs + ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]: + + stop = [stop] if isinstance(stop, str) else stop + response = LocalProvider.create_completion( + model, messages, stream, + **filter_none( + max_tokens=max_tokens, + stop=stop, + ), + **kwargs + ) + response = iter_response(response, stream, response_format, max_tokens, stop) + return response if stream else next(response) + +class Chat(): + completions: Completions + + def __init__(self, client: LocalClient): + self.completions = Completions(client) + \ No newline at end of file diff --git a/g4f/local/core/engine.py b/g4f/local/core/engine.py new file mode 100644 index 00000000..920ed9b4 --- /dev/null +++ b/g4f/local/core/engine.py @@ -0,0 +1,42 @@ +import os + +from gpt4all import GPT4All +from .models import models + +class LocalProvider: + @staticmethod + def create_completion(model, messages, stream, **kwargs): + if model not in models: + raise ValueError(f"Model '{model}' not found / not yet implemented") + + model = models[model] + model_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../models/') + full_model_path = os.path.join(model_dir, model['path']) + + if not os.path.isfile(full_model_path): + print(f"Model file '{full_model_path}' not found.") + download = input(f'Do you want to download {model["path"]} ? [y/n]') + + if download in ['y', 'Y']: + GPT4All.download_model(model['path'], model_dir) + else: + raise ValueError(f"Model '{model['path']}' not found.") + + model = GPT4All(model_name=model['path'], + n_threads=8, + verbose=False, + allow_download=False, + model_path=model_dir) + + system_template = next((message['content'] for message in messages if message['role'] == 'system'), + 'A chat between a curious user and an artificial intelligence assistant.') + + prompt_template = 'USER: {0}\nASSISTANT: ' + conversation = '\n'.join(f"{msg['role'].upper()}: {msg['content']}" for msg in messages) + "\nASSISTANT: " + + with model.chat_session(system_template, prompt_template): + if stream: + for token in model.generate(conversation, streaming=True): + yield token + else: + yield model.generate(conversation) \ No newline at end of file diff --git a/g4f/local/core/models.py b/g4f/local/core/models.py new file mode 100644 index 00000000..ec36fe41 --- /dev/null +++ b/g4f/local/core/models.py @@ -0,0 +1,86 @@ +models = { + "mistral-7b": { + "path": "mistral-7b-openorca.gguf2.Q4_0.gguf", + "ram": "8", + "prompt": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n", + "system": "<|im_start|>system\nYou are MistralOrca, a large language model trained by Alignment Lab AI. For multi-step problems, write out your reasoning for each step.\n<|im_end|>" + }, + "mistral-7b-instruct": { + "path": "mistral-7b-instruct-v0.1.Q4_0.gguf", + "ram": "8", + "prompt": "[INST] %1 [/INST]", + "system": None + }, + "gpt4all-falcon": { + "path": "gpt4all-falcon-newbpe-q4_0.gguf", + "ram": "8", + "prompt": "### Instruction:\n%1\n### Response:\n", + "system": None + }, + "orca-2": { + "path": "orca-2-13b.Q4_0.gguf", + "ram": "16", + "prompt": None, + "system": None + }, + "wizardlm-13b": { + "path": "wizardlm-13b-v1.2.Q4_0.gguf", + "ram": "16", + "prompt": None, + "system": None + }, + "nous-hermes-llama2": { + "path": "nous-hermes-llama2-13b.Q4_0.gguf", + "ram": "16", + "prompt": "### Instruction:\n%1\n### Response:\n", + "system": None + }, + "gpt4all-13b-snoozy": { + "path": "gpt4all-13b-snoozy-q4_0.gguf", + "ram": "16", + "prompt": None, + "system": None + }, + "mpt-7b-chat": { + "path": "mpt-7b-chat-newbpe-q4_0.gguf", + "ram": "8", + "prompt": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n", + "system": "<|im_start|>system\n- You are a helpful assistant chatbot trained by MosaicML.\n- You answer questions.\n- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.\n- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>" + }, + "orca-mini-3b": { + "path": "orca-mini-3b-gguf2-q4_0.gguf", + "ram": "4", + "prompt": "### User:\n%1\n### Response:\n", + "system": "### System:\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.\n\n" + }, + "replit-code-3b": { + "path": "replit-code-v1_5-3b-newbpe-q4_0.gguf", + "ram": "4", + "prompt": "%1", + "system": None + }, + "starcoder": { + "path": "starcoder-newbpe-q4_0.gguf", + "ram": "4", + "prompt": "%1", + "system": None + }, + "rift-coder-7b": { + "path": "rift-coder-v0-7b-q4_0.gguf", + "ram": "8", + "prompt": "%1", + "system": None + }, + "all-MiniLM-L6-v2": { + "path": "all-MiniLM-L6-v2-f16.gguf", + "ram": "1", + "prompt": None, + "system": None + }, + "mistral-7b-german": { + "path": "em_german_mistral_v01.Q4_0.gguf", + "ram": "8", + "prompt": "USER: %1 ASSISTANT: ", + "system": "Du bist ein hilfreicher Assistent. " + } +} \ No newline at end of file diff --git a/g4f/local/models/model-here b/g4f/local/models/model-here new file mode 100644 index 00000000..945c9b46 --- /dev/null +++ b/g4f/local/models/model-here @@ -0,0 +1 @@ +. \ No newline at end of file -- cgit v1.2.3