From a10a8fb335e5a817e1a9add49ee179394eea67c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anton=20Luka=20=C5=A0ijanec?= Date: Sun, 26 Dec 2021 19:52:31 +0100 Subject: fixed parser, fixed leak, O(log n) storage - tsearch(3) - 0.0.17 --- src/api.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'src/api.c') diff --git a/src/api.c b/src/api.c index 5ab4083..a366882 100644 --- a/src/api.c +++ b/src/api.c @@ -87,7 +87,7 @@ htmlDocPtr sc_capix (struct sc_cache * c, char * body, char * headers, int isfmt char * sc_find_class (char * haystack, const char * definition) { /* you must free class after calling */ if (!haystack || !definition) return NULL; - char * class = strstr(haystack, definition); + char * class = strcasestr(haystack, definition); if (!class) return NULL; int found = 0; @@ -117,7 +117,7 @@ int sc_fix_url (char ** h) { /* fixes a (result) URL in-place (removes tracking urldecode(*h, *h); } char * c = NULL; - if ((c = strstr(*h, "googleweblight.com/fp?u="))) { /* stage 2: url may be "light web" tracking url by google results */ + if ((c = strcasestr(*h, "googleweblight.com/fp?u="))) { /* stage 2: url may be "light web" tracking url by google results */ *h = c+strlen("googleweblight.com/fp?u="); /* we could disable this with a cookie but meh, this is easier and _stateless_ */ *strchrnul(*h, '&') = '\0'; urldecode(*h, *h); @@ -156,6 +156,7 @@ struct sc_query * sc_query_google (const char * s, struct sc_cache * c, struct s goto rc; } int qwasgiven = 0; + int sl = strlen(s); if (!q) q = sc_query_init(); else @@ -214,6 +215,7 @@ struct sc_query * sc_query_google (const char * s, struct sc_cache * c, struct s char * imgrefurl = NULL; /* easy, huh? */ SC_LOG(SC_LOG_DEBUG, c, "hreflink = %s", hreflink); sscanf(hreflink, "/imgres?imgurl=%m[^&]&imgrefurl=%m[^&]", &imgurl, &imgrefurl); + xmlFree(hreflink); if (!imgurl && !imgrefurl) { SC_LOG(SC_LOG_ERROR, c, "!imgurl && !imgrefurl, txtdoc = %s", txtdoc); /* rs = -6; */ /* we continue running not fail because of a single picture */ @@ -257,8 +259,9 @@ struct sc_query * sc_query_google (const char * s, struct sc_cache * c, struct s if (hreflink) { SC_GTR->url = malloc(strlen(hreflink)+1); strcpy(SC_GTR->url, hreflink); - xmlFree(orig_hreflink_for_free); } else SC_GTR->url = NULL; + if (orig_hreflink_for_free) + xmlFree(orig_hreflink_for_free); cp = (char *) xmlNodeGetContent(descnode); if (cp) { SC_GTR->desc = malloc(strlen(cp)+1); @@ -285,17 +288,20 @@ struct sc_query * sc_query_google (const char * s, struct sc_cache * c, struct s } q->cache = c; q->lookup_time = time(NULL); - q->engines = SC_ENGINE_GOOGLE; - q->string = realloc(q->string, strlen(s)+1); - q->opt = opt; + q->string = realloc(q->string, sl+1); + q->opt |= opt | SC_ENGINE_GOOGLE; strcpy(q->string, s); if (!qwasgiven) { SC_CWLE(c, c->queries_lock); +#ifdef SC_OLD_STORAGE if (c->queries_sizeof <= c->queries_length) SC_BIGGER_ARRAY(c->queries, sc_query, 0); c->queries_length++; #define SC_GTQ c->queries[c->queries_length-1] SC_GTQ = q; +#else /* we don't detect here if query is already stored, but it should not be ... */ + tsearch(q, &c->qrp, SC_COMPAR_CAST sc_query_compar); +#endif } SC_CUE(c, c->queries_lock); rc: -- cgit v1.2.3