g4f/Provider/H2o.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109

from __future__ import annotations

import json
import uuid

from aiohttp import ClientSession

from ..typing import AsyncResult, Messages
from .base_provider import AsyncGeneratorProvider, format_prompt


class H2o(AsyncGeneratorProvider):
    url = "https://gpt-gm.h2o.ai"
    working = True
    model = "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-40b-v1"

    @classmethod
    async def create_async_generator(
        cls,
        model: str,
        messages: Messages,
        proxy: str = None,
        **kwargs
    ) -> AsyncResult:
        model = model if model else cls.model
        headers = {"Referer": cls.url + "/"}

        async with ClientSession(
            headers=headers
        ) as session:
            data = {
                "ethicsModalAccepted": "true",
                "shareConversationsWithModelAuthors": "true",
                "ethicsModalAcceptedAt": "",
                "activeModel": model,
                "searchEnabled": "true",
            }
            async with session.post(
                f"{cls.url}/settings",
                proxy=proxy,
                data=data
            ) as response:
                response.raise_for_status()

            async with session.post(
                f"{cls.url}/conversation",
                proxy=proxy,
                json={"model": model},
            ) as response:
                response.raise_for_status()
                conversationId = (await response.json())["conversationId"]

            data = {
                "inputs": format_prompt(messages),
                "parameters": {
                    "temperature": 0.4,
                    "truncate": 2048,
                    "max_new_tokens": 1024,
                    "do_sample":  True,
                    "repetition_penalty": 1.2,
                    "return_full_text": False,
                    **kwargs
                },
                "stream": True,
                "options": {
                    "id": str(uuid.uuid4()),
                    "response_id": str(uuid.uuid4()),
                    "is_retry": False,
                    "use_cache": False,
                    "web_search_id": "",
                },
            }
            async with session.post(
                f"{cls.url}/conversation/{conversationId}",
                proxy=proxy,
                json=data
             ) as response:
                start = "data:"
                async for line in response.content:
                    line = line.decode("utf-8")
                    if line and line.startswith(start):
                        line = json.loads(line[len(start):-1])
                        if not line["token"]["special"]:
                            yield line["token"]["text"]

            async with session.delete(
                f"{cls.url}/conversation/{conversationId}",
                proxy=proxy,
                json=data
            ) as response:
                response.raise_for_status()


    @classmethod
    @property
    def params(cls):
        params = [
            ("model", "str"),
            ("messages", "list[dict[str, str]]"),
            ("stream", "bool"),
            ("temperature", "float"),
            ("truncate", "int"),
            ("max_new_tokens", "int"),
            ("do_sample", "bool"),
            ("repetition_penalty", "float"),
            ("return_full_text", "bool"),
        ]
        param = ", ".join([": ".join(p) for p in params])
        return f"g4f.provider.{cls.__name__} supports: ({param})"