From f43107aa764acb59dbbaad5d5ac6fb53f3990057 Mon Sep 17 00:00:00 2001 From: Raju Komati Date: Wed, 26 Apr 2023 23:50:18 +0530 Subject: refactored you code added error handling and updated regex for parsing response --- you/__init__.py | 143 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 82 insertions(+), 61 deletions(-) (limited to 'you/__init__.py') diff --git a/you/__init__.py b/you/__init__.py index 073daee9..397600bd 100644 --- a/you/__init__.py +++ b/you/__init__.py @@ -1,78 +1,99 @@ -from tls_client import Session -from re import findall -from json import loads, dumps -from uuid import uuid4 +import re +from json import loads +from uuid import uuid4 + +from fake_useragent import UserAgent +from tls_client import Session class Completion: + @staticmethod def create( - prompt : str, - page : int = 1, - count : int = 10, - safeSearch : str = "Moderate", - onShoppingpage : bool = False, - mkt : str = "", - responseFilter : str = "WebPages,Translations,TimeZone,Computation,RelatedSearches", - domain : str = "youchat", - queryTraceId : str = None, - chat : list = [], - includelinks : bool = False, - detailed : bool = False, - debug : bool = False ) -> dict: - - client = Session(client_identifier="chrome_108") - client.headers = { - "authority" : "you.com", - "accept" : "text/event-stream", - "accept-language" : "en,fr-FR;q=0.9,fr;q=0.8,es-ES;q=0.7,es;q=0.6,en-US;q=0.5,am;q=0.4,de;q=0.3", - "cache-control" : "no-cache", - "referer" : "https://you.com/search?q=who+are+you&tbm=youchat", - "sec-ch-ua" : '"Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"', - "sec-ch-ua-mobile" : "?0", - "sec-ch-ua-platform": '"Windows"', - "sec-fetch-dest" : "empty", - "sec-fetch-mode" : "cors", - "sec-fetch-site" : "same-origin", - 'cookie' : f'safesearch_guest=Moderate; uuid_guest={str(uuid4())}', - "user-agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36", - } + prompt: str, + page: int = 1, + count: int = 10, + safe_search: str = 'Moderate', + on_shopping_page: bool = False, + mkt: str = '', + response_filter: str = 'WebPages,Translations,TimeZone,Computation,RelatedSearches', + domain: str = 'youchat', + query_trace_id: str = None, + chat: list = None, + include_links: bool = False, + detailed: bool = False, + debug: bool = False, + ) -> dict: + if chat is None: + chat = [] + + client = Session(client_identifier='chrome_108') + client.headers = Completion.__get_headers() - response = client.get(f"https://you.com/api/streamingSearch", params = { - "q" : prompt, - "page" : page, - "count" : count, - "safeSearch" : safeSearch, - "onShoppingPage" : onShoppingpage, - "mkt" : mkt, - "responseFilter" : responseFilter, - "domain" : domain, - "queryTraceId" : str(uuid4()) if queryTraceId is None else queryTraceId, - "chat" : str(chat), # {"question":"","answer":" '"} - } + response = client.get( + f'https://you.com/api/streamingSearch', + params={ + 'q': prompt, + 'page': page, + 'count': count, + 'safeSearch': safe_search, + 'onShoppingPage': on_shopping_page, + 'mkt': mkt, + 'responseFilter': response_filter, + 'domain': domain, + 'queryTraceId': str(uuid4()) if query_trace_id is None else query_trace_id, + 'chat': str(chat), # {'question':'','answer':' ''} + }, ) - - + if debug: print('\n\n------------------\n\n') print(response.text) print('\n\n------------------\n\n') - youChatSerpResults = findall(r'youChatSerpResults\ndata: (.*)\n\nevent', response.text)[0] - thirdPartySearchResults = findall(r"thirdPartySearchResults\ndata: (.*)\n\nevent", response.text)[0] - #slots = findall(r"slots\ndata: (.*)\n\nevent", response.text)[0] - - text = response.text.split('}]}\n\nevent: youChatToken\ndata: {"youChatToken": "')[-1] - text = text.replace('"}\n\nevent: youChatToken\ndata: {"youChatToken": "', '') - text = text.replace('event: done\ndata: I\'m Mr. Meeseeks. Look at me.\n\n', '') - text = text[:-4] # trims '"}', along with the last two remaining newlines + if 'youChatToken' not in response.text: + return Completion.__get_failure_response() + + you_chat_serp_results = re.search( + r'(?<=event: youChatSerpResults\ndata:)(.*\n)*?(?=event: )', response.text + ).group() + third_party_search_results = re.search( + r'(?<=event: thirdPartySearchResults\ndata:)(.*\n)*?(?=event: )', response.text + ).group() + # slots = findall(r"slots\ndata: (.*)\n\nevent", response.text)[0] + + text = ''.join(re.findall(r'{\"youChatToken\": \"(.*?)\"}', response.text)) extra = { - 'youChatSerpResults' : loads(youChatSerpResults), - #'slots' : loads(slots) + 'youChatSerpResults': loads(you_chat_serp_results), + # 'slots' : loads(slots) } return { - 'response': text, - 'links' : loads(thirdPartySearchResults)['search']["third_party_search_results"] if includelinks else None, - 'extra' : extra if detailed else None, + 'response': text.replace('\\n', '\n').replace('\\\\', '\\'), + 'links': loads(third_party_search_results)['search']['third_party_search_results'] + if include_links + else None, + 'extra': extra if detailed else None, } + + @classmethod + def __get_headers(cls) -> dict: + return { + 'authority': 'you.com', + 'accept': 'text/event-stream', + 'accept-language': 'en,fr-FR;q=0.9,fr;q=0.8,es-ES;q=0.7,es;q=0.6,en-US;q=0.5,am;q=0.4,de;q=0.3', + 'cache-control': 'no-cache', + 'referer': 'https://you.com/search?q=who+are+you&tbm=youchat', + 'sec-ch-ua': '"Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': '"Windows"', + 'sec-fetch-dest': 'empty', + 'sec-fetch-mode': 'cors', + 'sec-fetch-site': 'same-origin', + 'cookie': f'safesearch_guest=Moderate; uuid_guest={str(uuid4())}', + 'user-agent': UserAgent().random, + } + + @classmethod + def __get_failure_response(cls) -> dict: + return dict(response='Unable to fetch the response, Please try again.', links=[], extra={}) -- cgit v1.2.3