diff options
Diffstat (limited to '')
-rw-r--r-- | g4f/gui/server/internet.py | 12 |
1 files changed, 5 insertions, 7 deletions
diff --git a/g4f/gui/server/internet.py b/g4f/gui/server/internet.py index a1fafa7d..b41b5eae 100644 --- a/g4f/gui/server/internet.py +++ b/g4f/gui/server/internet.py @@ -2,7 +2,7 @@ from __future__ import annotations from aiohttp import ClientSession, ClientTimeout try: - from duckduckgo_search.duckduckgo_search_async import AsyncDDGS + from duckduckgo_search import DDGS from bs4 import BeautifulSoup has_requirements = True except ImportError: @@ -46,8 +46,6 @@ class SearchResultEntry(): def scrape_text(html: str, max_words: int = None) -> str: soup = BeautifulSoup(html, "html.parser") - for exclude in soup(["script", "style"]): - exclude.extract() for selector in [ "main", ".main-content-wrapper", @@ -67,7 +65,7 @@ def scrape_text(html: str, max_words: int = None) -> str: if select: select.extract() clean_text = "" - for paragraph in soup.select("p"): + for paragraph in soup.select("p, h1, h2, h3, h4, h5, h6"): text = paragraph.get_text() for line in text.splitlines(): words = [] @@ -98,10 +96,10 @@ async def fetch_and_scrape(session: ClientSession, url: str, max_words: int = No async def search(query: str, n_results: int = 5, max_words: int = 2500, add_text: bool = True) -> SearchResults: if not has_requirements: - raise MissingRequirementsError('Install "duckduckgo-search" and "beautifulsoup4" package') - async with AsyncDDGS() as ddgs: + raise MissingRequirementsError('Install "duckduckgo-search" and "beautifulsoup4" package | pip install -U g4f[search]') + with DDGS() as ddgs: results = [] - for result in await ddgs.text( + for result in ddgs.text( query, region="wt-wt", safesearch="moderate", |