1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
|
from __future__ import annotations
import json
from aiohttp import ClientSession
from ..typing import AsyncResult, Messages
from .base_provider import AsyncGeneratorProvider
class DeepInfra(AsyncGeneratorProvider):
url = "https://deepinfra.com"
supports_message_history = True
working = True
@classmethod
async def create_async_generator(
cls,
model: str,
messages: Messages,
proxy: str = None,
**kwargs
) -> AsyncResult:
if not model:
model = "meta-llama/Llama-2-70b-chat-hf"
headers = {
"User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/118.0",
"Accept": "text/event-stream",
"Accept-Language": "de,en-US;q=0.7,en;q=0.3",
"Accept-Encoding": "gzip, deflate, br",
"Referer": f"{cls.url}/",
"Content-Type": "application/json",
"X-Deepinfra-Source": "web-page",
"Origin": cls.url,
"Connection": "keep-alive",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-site",
"Pragma": "no-cache",
"Cache-Control": "no-cache",
}
async with ClientSession(headers=headers) as session:
data = {
"model": model,
"messages": messages,
"stream": True,
}
async with session.post(
"https://api.deepinfra.com/v1/openai/chat/completions",
json=data,
proxy=proxy
) as response:
response.raise_for_status()
first = True
async for line in response.content:
if line.startswith(b"data: [DONE]"):
break
elif line.startswith(b"data: "):
chunk = json.loads(line[6:])["choices"][0]["delta"].get("content")
if chunk:
if first:
chunk = chunk.lstrip()
if chunk:
first = False
yield chunk
|