diff options
Diffstat (limited to '')
-rw-r--r-- | src/api.c | 40 | ||||
-rw-r--r-- | src/hp.html | 15 | ||||
-rw-r--r-- | src/httpd.c | 10 | ||||
-rw-r--r-- | src/structs.c | 7 |
4 files changed, 60 insertions, 12 deletions
@@ -107,6 +107,22 @@ char * sc_find_class (char * haystack, const char * definition) { /* you must fr toreturn[endofclass-class] = '\0'; return toreturn; } +int sc_fix_url (char ** h) { /* fixes a (result) URL in-place (removes tracking nonsense, so resulting URL is shorter or equl) */ + if (!h || !*h) /* stage 0: prevent accidental death */ + return -1; + if (!strncmp(*h, "/url?q=", strlen("/url?q="))) { /* stage 1: url may be tracking url by google results */ + *h = *h+strlen("/url?q="); + *strchrnul(*h, '&') = '\0'; + urldecode(*h, *h); + } + char * c = NULL; + if ((c = strstr(*h, "googleweblight.com/fp?u="))) { /* stage 2: url may be "light web" tracking url by google results */ + *h = c+strlen("googleweblight.com/fp?u="); /* we could disable this with a cookie but meh, this is easier and _stateless_ */ + *strchrnul(*h, '&') = '\0'; + urldecode(*h, *h); + } /* TODO: be pedantic and remove utm_source and other tracking bullshit */ + return 1; +} struct sc_query * sc_query_google (const char * s, struct sc_cache * c, struct sc_query * q) { /* check for cached queries first! */ /* query is in most cases NULL. then it will be allocated and put into sc_cache. otherwise response will be put into passed q. */ /* if query is not NULL, it MUST be initialized */ @@ -146,7 +162,7 @@ struct sc_query * sc_query_google (const char * s, struct sc_cache * c, struct s char * xpath = NULL; char * descclass = NULL; char * titleclass = NULL; - char * txtdoc = SC_CAPI(c, NULL, NULL, "http://wap.google.com/search?q=%s", us); + char * txtdoc = SC_CAPI(c, NULL, NULL, "http://wap.google.com/search?q=%s&num=100", us); // fprintf(stdout, "%s\n", txtdoc); free(us); if (!txtdoc) { @@ -162,7 +178,8 @@ struct sc_query * sc_query_google (const char * s, struct sc_cache * c, struct s goto rc; } #define SC_GTXF "/html/body//a[contains(@class, '%s')]" /* result a */ -#define SC_GTXD "../..//table//span[@class='%s']" +#define SC_GTXD /* description */ "../..//table//span[@class='%s']" +#define SC_GTXB /* breadcrumbs */ ".//span[@class='%s']" #define SC_GTR q->results[q->results_length-1] xpath = malloc(strlen(titleclass)+strlen(SC_GTXF)); sprintf(xpath, SC_GTXF, titleclass); /* whenever starts with titleclas */ @@ -177,16 +194,18 @@ struct sc_query * sc_query_google (const char * s, struct sc_cache * c, struct s if (node->type == XML_ELEMENT_NODE) { xmlAttrPtr href = xmlHasProp(node, BAD_CAST "href"); if (href) { - char * hreflink = (char *) xmlGetProp(node, BAD_CAST "href"); - if (!strncmp(hreflink, "/url?q=", strlen("/url?q="))) { - hreflink = hreflink+strlen("/url?q="); - *strchrnul(hreflink, '&') = '\0'; - urldecode(hreflink, hreflink); - } + char * hreflink = (char *) xmlGetProp(node, BAD_CAST "href"); /* fuck rules, I will rewrite it anyways <= hi future me */ + sc_fix_url(&hreflink); char * x = malloc(strlen(descclass)+strlen(SC_GTXD)); + char * xbread = malloc(strlen(descclass)+strlen(SC_GTXB)); sprintf(x, SC_GTXD, descclass /* remember, kids, GNU C is fucking legendary */); + sprintf(xbread, SC_GTXB, descclass /* remember, kids, GNU C is fucking legendary */); xmlNodePtr descnode = nthNodeXN(node, x, 0); + if (!descnode) /* description may be above, see https://support.google.com/websearch?p=featured_snippets */ + descnode = nthNodeXN(node, "../../div/div", 0); + xmlNodePtr breadnode = nthNodeXN(node, xbread, 0); free(x); + free(xbread); if (q->results_sizeof <= q->results_length) SC_BIGGER_ARRAY(q->results, sc_result); q->results_length++; @@ -206,6 +225,11 @@ struct sc_query * sc_query_google (const char * s, struct sc_cache * c, struct s SC_GTR->desc = malloc(strlen(SC_I18N_NO_DESCRIPTION)+1); strcpy(SC_GTR->desc, SC_I18N_NO_DESCRIPTION); } + SC_GTR->breadcrumbs = (char *) xmlNodeGetContent(breadnode); + if (!SC_GTR->breadcrumbs) { + SC_GTR->breadcrumbs = malloc(strlen(SC_GTR->url)+1); + strcpy(SC_GTR->breadcrumbs, SC_GTR->url); + } } } } diff --git a/src/hp.html b/src/hp.html index d2bc82f..47aff62 100644 --- a/src/hp.html +++ b/src/hp.html @@ -14,7 +14,7 @@ <style> input[type=password], input[type=text], input[type=submit], input[type=button] { height: 1cm; - font-size: 18px; + font-size: large; } .result:hover { background: var(--bgc2); @@ -44,6 +44,19 @@ .SC_LOG_DEBUG { color: magenta; } + .breadcrumb { + color: var(--fgc2); + font-size: small; /* small is relative to parent (h4) size */ + } + .result h4 { + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + margin-bottom: 0.314159265358em; + } + .result p { + margin-top: 0.314159265358em; + } </style> </head> <body> diff --git a/src/httpd.c b/src/httpd.c index bf5c3d1..656ad92 100644 --- a/src/httpd.c +++ b/src/httpd.c @@ -9,13 +9,17 @@ char * sc_queryhtml (struct sc_query * q) { /* remember to free returned string string##_sizeof = (string##_written+wanted+1)*SC_REALLOC_K; \ string = realloc(string, string##_sizeof); \ } -#define SC_HRF "<div class=result><h4><a href=\"%s\">%s</a></h4><p>%s</p></div>" +#define SC_HRF "<div class=result id=result%lu><h4><a href=\"%s\" accesskey=%lu>%s</a> " \ + "<span class=breadcrumb>%s</span></h4><p>%s</p></div>" +#define SC_HRA i, safeurl, i, safetitle, safebreadcrumbs, safebody char * safetitle = htmlspecialchars(q->results[i]->title); char * safebody = htmlspecialchars(q->results[i]->desc); char * safeurl = htmlspecialchars(q->results[i]->url); - size_t ws = snprintf(NULL, 0, SC_HRF, safeurl, safetitle, safebody); + char * safebreadcrumbs = htmlspecialchars(q->results[i]->breadcrumbs); + size_t ws = snprintf(NULL, 0, SC_HRF, SC_HRA); SC_HRC(resultshtml, ws); - resultshtml_written += sprintf(resultshtml+resultshtml_written, SC_HRF, safeurl, safetitle, safebody); + resultshtml_written += sprintf(resultshtml+resultshtml_written, SC_HRF, SC_HRA); + free(safebreadcrumbs); free(safetitle); free(safebody); free(safeurl); diff --git a/src/structs.c b/src/structs.c index b99f1eb..2d83f74 100644 --- a/src/structs.c +++ b/src/structs.c @@ -36,9 +36,14 @@ struct sc_result { time_t date; /* some search engines like to extract a date from a website, store that here - not implemented */ unsigned short int rating; /* some search engines like to extract a rating from a website, store that here */ /* not implementd */ unsigned short int rating_max; /* max rating when above is used /\ */ /* not implemented yet */ + char * breadcrumbs; /* yesfree - google has nice breadcrumbs, when hovering over the URL requires too much time (: */ }; struct sc_result * sc_result_init () { struct sc_result * r = calloc(1, sizeof(struct sc_result)); + r->url = NULL; + r->desc = NULL; + r->title = NULL; + r->breadcrumbs = NULL; return r; } int sc_result_free (struct sc_result * r) { @@ -47,6 +52,7 @@ int sc_result_free (struct sc_result * r) { free(r->url); free(r->desc); free(r->title); + free(r->breadcrumbs); free(r); return 1; } @@ -65,6 +71,7 @@ struct sc_query * sc_query_init () { q->results[i] = sc_result_init(); q->results[i]->query = q; } + q->string = NULL; return q; } int sc_query_free (struct sc_query * q) { |