From dd08d816dca127808b2343005ec8a728b6cb2a2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anton=20Luka=20=C5=A0ijanec?= Date: Fri, 5 Jan 2024 16:44:26 +0100 Subject: support for suggested queries and query redirects --- src/api.c | 44 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) (limited to 'src/api.c') diff --git a/src/api.c b/src/api.c index a652e3a..b0ef96d 100644 --- a/src/api.c +++ b/src/api.c @@ -127,6 +127,7 @@ int sc_fix_url (char ** h) { /* fixes a (result) URL in-place (removes tracking enum sc_return sc_query_google (const char * s, /* breaking change: changed return type */ struct sc_cache * c, struct sc_query * q, + char ** redirect, /* variable redirect will be set to a heap allocated string that must be freed by the caller if the upstream returned results for a different query. in that case the returned query object will be for a different search string! -- if NULL, request that upstream does not enable "results for" feature */ SC_OPT_TYPE opt) { /* check4cachedB4 */ /* query is in most cases NULL. then it will be allocated and put into sc_cache. otherwise response will be put into passed q. */ /* if query is not NULL, it MUST be initialized */ @@ -155,6 +156,9 @@ enum sc_return sc_query_google (const char * s, /* breaking change: changed retu htmlDocPtr xmldoc = NULL; char * txtdoc = NULL; int qwasgiven = 0; + SC_LOG(SC_LOG_DEBUG, c, "%s called, redirect is %p", __func__, redirect); + if (redirect) + *redirect = NULL; if (!s || !c) { rs = SC_BADCALL; goto rc; @@ -166,7 +170,7 @@ enum sc_return sc_query_google (const char * s, /* breaking change: changed retu qwasgiven++; char * us = malloc(sizeof(char)*strlen(s)*3+1); urlencode(us, s); - txtdoc = SC_CAPI(c, NULL, NULL, "http://wap.google.com/search?q=%s&num=100&ie=UTF-8%s", us, (opt&SC_OPT_IMAGE) ? "&tbm=isch" : ""); + txtdoc = SC_CAPI(c, NULL, NULL, "http://wap.google.com/search?q=%s&num=100&ie=UTF-8%s%s", us, (opt&SC_OPT_IMAGE) ? "&tbm=isch" : "", redirect ? "" : "&nfpr=1"); // fprintf(stdout, "%s\n", txtdoc); free(us); if (!txtdoc) { @@ -178,6 +182,7 @@ enum sc_return sc_query_google (const char * s, /* breaking change: changed retu rs = SC_CAPTCHA; goto rc; } + char * resultsforclass = sc_find_class(txtdoc, "{color:#1967d2}"); if (opt & SC_OPT_IMAGE) { imageclass = sc_find_class(txtdoc, "{font-family:Roboto,Helvetica,Arial,sans-serif}"); if (!imageclass) { @@ -292,11 +297,42 @@ enum sc_return sc_query_google (const char * s, /* breaking change: changed retu SC_CUE(c, c->queries_lock); goto rc; } + q->string = realloc(q->string, sl+1); + strcpy(q->string, s); + char * xpathsugg = NULL; + if (resultsforclass) { + xpathsugg = malloc(512+strlen(resultsforclass)); + sprintf(xpathsugg, "//a[contains(@class, '%s')]", resultsforclass); + xmlNodePtr suggnode = nthNodeX(xmldoc, xpathsugg, 0); + if (suggnode && xmlHasProp(suggnode, BAD_CAST "href")) { + char * href = (char *) xmlGetProp(suggnode, BAD_CAST "href"); + char * content = (char *) xmlNodeGetContent(suggnode); + if (href && strstr(href, "&spell=1&")) + strcpy((q->suggested = realloc(q->suggested, strlen(content)+1)), content); + xmlFree(href); + xmlFree(content); + } else { + free(q->suggested); + q->suggested = NULL; + } + } else { + free(q->suggested); + q->suggested = NULL; + } + xmlNodePtr first = nthNodeX(xmldoc, xpathsugg, 1); + if (redirect && xpathsugg && q->suggested && xmlHasProp(first, BAD_CAST "href")) { + char * href = (char *) xmlGetProp(first, BAD_CAST "href"); + if (href && strstr(href, "&nfpr=1&")) { + *redirect = q->suggested; + q->suggested = NULL; + q->string = realloc(q->string, strlen(*redirect)+1); + strcpy(q->string, *redirect); + } + xmlFree(href); + } q->cache = c; q->lookup_time = time(NULL); - q->string = realloc(q->string, sl+1); q->opt |= opt | SC_ENGINE_GOOGLE; - strcpy(q->string, s); if (!qwasgiven) { SC_CWLE(c, c->queries_lock); #ifdef SC_OLD_STORAGE @@ -318,6 +354,8 @@ rc: free(titleclass); free(descclass); free(imageclass); + free(resultsforclass); free(xpath); + free(xpathsugg); return rs; } -- cgit v1.2.3