summaryrefslogtreecommitdiffstats
path: root/g4f/gui/server/internet.py
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--g4f/gui/server/internet.py12
1 files changed, 5 insertions, 7 deletions
diff --git a/g4f/gui/server/internet.py b/g4f/gui/server/internet.py
index a1fafa7d..b41b5eae 100644
--- a/g4f/gui/server/internet.py
+++ b/g4f/gui/server/internet.py
@@ -2,7 +2,7 @@ from __future__ import annotations
from aiohttp import ClientSession, ClientTimeout
try:
- from duckduckgo_search.duckduckgo_search_async import AsyncDDGS
+ from duckduckgo_search import DDGS
from bs4 import BeautifulSoup
has_requirements = True
except ImportError:
@@ -46,8 +46,6 @@ class SearchResultEntry():
def scrape_text(html: str, max_words: int = None) -> str:
soup = BeautifulSoup(html, "html.parser")
- for exclude in soup(["script", "style"]):
- exclude.extract()
for selector in [
"main",
".main-content-wrapper",
@@ -67,7 +65,7 @@ def scrape_text(html: str, max_words: int = None) -> str:
if select:
select.extract()
clean_text = ""
- for paragraph in soup.select("p"):
+ for paragraph in soup.select("p, h1, h2, h3, h4, h5, h6"):
text = paragraph.get_text()
for line in text.splitlines():
words = []
@@ -98,10 +96,10 @@ async def fetch_and_scrape(session: ClientSession, url: str, max_words: int = No
async def search(query: str, n_results: int = 5, max_words: int = 2500, add_text: bool = True) -> SearchResults:
if not has_requirements:
- raise MissingRequirementsError('Install "duckduckgo-search" and "beautifulsoup4" package')
- async with AsyncDDGS() as ddgs:
+ raise MissingRequirementsError('Install "duckduckgo-search" and "beautifulsoup4" package | pip install -U g4f[search]')
+ with DDGS() as ddgs:
results = []
- for result in await ddgs.text(
+ for result in ddgs.text(
query,
region="wt-wt",
safesearch="moderate",