g4f/Provider/bing/upload_image.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152

"""
Module to handle image uploading and processing for Bing AI integrations.
"""
from __future__ import annotations

import json
import math
from aiohttp import ClientSession, FormData

from ...typing import ImageType, Tuple
from ...image import to_image, process_image, to_base64_jpg, ImageRequest, Image

IMAGE_CONFIG = {
    "maxImagePixels": 360000,
    "imageCompressionRate": 0.7,
    "enableFaceBlurDebug": False,
}

async def upload_image(
    session: ClientSession, 
    image_data: ImageType, 
    tone: str, 
    proxy: str = None
) -> ImageRequest:
    """
    Uploads an image to Bing's AI service and returns the image response.

    Args:
        session (ClientSession): The active session.
        image_data (bytes): The image data to be uploaded.
        tone (str): The tone of the conversation.
        proxy (str, optional): Proxy if any. Defaults to None.

    Raises:
        RuntimeError: If the image upload fails.

    Returns:
        ImageRequest: The response from the image upload.
    """
    image = to_image(image_data)
    new_width, new_height = calculate_new_dimensions(image)
    image = process_image(image, new_width, new_height)
    img_binary_data = to_base64_jpg(image, IMAGE_CONFIG['imageCompressionRate'])

    data = build_image_upload_payload(img_binary_data, tone)
    headers = prepare_headers(session)

    async with session.post("https://www.bing.com/images/kblob", data=data, headers=headers, proxy=proxy) as response:
        if response.status != 200:
            raise RuntimeError("Failed to upload image.")
        return parse_image_response(await response.json())

def calculate_new_dimensions(image: Image) -> Tuple[int, int]:
    """
    Calculates the new dimensions for the image based on the maximum allowed pixels.

    Args:
        image (Image): The PIL Image object.

    Returns:
        Tuple[int, int]: The new width and height for the image.
    """
    width, height = image.size
    max_image_pixels = IMAGE_CONFIG['maxImagePixels']
    if max_image_pixels / (width * height) < 1:
        scale_factor = math.sqrt(max_image_pixels / (width * height))
        return int(width * scale_factor), int(height * scale_factor)
    return width, height

def build_image_upload_payload(image_bin: str, tone: str) -> FormData:
    """
    Builds the payload for image uploading.

    Args:
        image_bin (str): Base64 encoded image binary data.
        tone (str): The tone of the conversation.

    Returns:
        Tuple[str, str]: The data and boundary for the payload.
    """
    data = FormData()
    knowledge_request = json.dumps(build_knowledge_request(tone), ensure_ascii=False)
    data.add_field('knowledgeRequest', knowledge_request, content_type="application/json")
    data.add_field('imageBase64', image_bin)
    return data

def build_knowledge_request(tone: str) -> dict:
    """
    Builds the knowledge request payload.

    Args:
        tone (str): The tone of the conversation.

    Returns:
        dict: The knowledge request payload.
    """
    return {
        "imageInfo": {},
        "knowledgeRequest": {
            'invokedSkills': ["ImageById"],
            'subscriptionId': "Bing.Chat.Multimodal",
            'invokedSkillsRequestData': {
                'enableFaceBlur': True
            },
            'convoData': {
                'convoid': "",
                'convotone': tone
            }
        }
    }

def prepare_headers(session: ClientSession) -> dict:
    """
    Prepares the headers for the image upload request.

    Args:
        session (ClientSession): The active session.
        boundary (str): The boundary string for the multipart/form-data.

    Returns:
        dict: The headers for the request.
    """
    headers = session.headers.copy()
    headers["Referer"] = 'https://www.bing.com/search?q=Bing+AI&showconv=1&FORM=hpcodx'
    headers["Origin"] = 'https://www.bing.com'
    return headers

def parse_image_response(response: dict) -> ImageRequest:
    """
    Parses the response from the image upload.

    Args:
        response (dict): The response dictionary.

    Raises:
        RuntimeError: If parsing the image info fails.

    Returns:
        ImageRequest: The parsed image response.
    """
    if not response.get('blobId'):
        raise RuntimeError("Failed to parse image info.")

    result = {'bcid': response.get('blobId', ""), 'blurredBcid': response.get('processedBlobId', "")}
    result["imageUrl"] = f"https://www.bing.com/images/blob?bcid={result['blurredBcid'] or result['bcid']}"

    result['originalImageUrl'] = (
        f"https://www.bing.com/images/blob?bcid={result['blurredBcid']}"
        if IMAGE_CONFIG["enableFaceBlurDebug"] else
        f"https://www.bing.com/images/blob?bcid={result['bcid']}"
    )
    return ImageRequest(result["imageUrl"], "", result)