From bbf6fe2fd069ef89f73ecc3fe3ec2000833f05f4 Mon Sep 17 00:00:00 2001 From: sijanec Date: Sat, 3 Apr 2021 23:15:48 +0200 Subject: initial release --- src/lib.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 94 insertions(+), 10 deletions(-) (limited to 'src/lib.c') diff --git a/src/lib.c b/src/lib.c index 2c3e34a..5c0576e 100644 --- a/src/lib.c +++ b/src/lib.c @@ -1,4 +1,4 @@ -static htmlDocPtr parseHtmlDocument(const char * d, const char * b /* base url */) { +htmlDocPtr parseHtmlDocument (const char * d, const char * b /* base url */) { if (!b) b = ""; htmlParserCtxtPtr parser_context = htmlNewParserCtxt(); @@ -6,28 +6,112 @@ static htmlDocPtr parseHtmlDocument(const char * d, const char * b /* base url * htmlFreeParserCtxt(parser_context); return document; } -static xmlXPathObjectPtr findNodes(htmlDocPtr document, const char * xpath_query) { +xmlXPathObjectPtr findNodes (htmlDocPtr document, const char * xpath_query) { xmlXPathContextPtr xpath_ctx = xmlXPathNewContext(document); xmlXPathObjectPtr nodes = xmlXPathEvalExpression(BAD_CAST xpath_query, xpath_ctx); + if (xmlXPathNodeSetIsEmpty(nodes->nodesetval)) { + xmlXPathFreeContext(xpath_ctx); + xmlXPathFreeObject(nodes); + return NULL; + } + xmlXPathFreeContext(xpath_ctx); + return nodes; +} +xmlXPathObjectPtr findNodesN (xmlNodePtr node, const char * xpath_query) { + xmlXPathContextPtr xpath_ctx = xmlXPathNewContext(node->doc); + xmlXPathSetContextNode(node, xpath_ctx); + xmlXPathObjectPtr nodes = xmlXPathNodeEval(node, BAD_CAST xpath_query, xpath_ctx); + if (xmlXPathNodeSetIsEmpty(nodes->nodesetval)) { + xmlXPathFreeContext(xpath_ctx); + xmlXPathFreeObject(nodes); + return NULL; + } xmlXPathFreeContext(xpath_ctx); return nodes; } -typedef void (*node_function_t)(xmlNodePtr node, void * data); -static void eachNode(xmlXPathObjectPtr nodes, node_function_t f, void * data) { +typedef void (*node_function_t) (xmlNodePtr node, void * data); +void eachNode (xmlXPathObjectPtr nodes, node_function_t f, void * data) { /* you can instead use EACHNODE macro */ xmlNodeSetPtr nodeset = nodes->nodesetval; int i, size = nodeset->nodeNr; for (i = 0; i < size; i++) { xmlNodePtr cur; - cur = (xmlNodePtr)nodeset->nodeTab[i]; + cur = (xmlNodePtr) nodeset->nodeTab[i]; f(cur, data); } } -void printLinkNode(xmlNodePtr node, void * data) { +void eachNodeX (htmlDocPtr doc, const char * xpath, node_function_t f, void * data) { + xmlXPathObjectPtr nodes = findNodes(doc, xpath); + if (!nodes) + return; + eachNode(nodes, f, data); + xmlXPathFreeObject(nodes); +} +xmlNodePtr nthNodeXN (xmlNodePtr node, const char * xpath, int n) { + xmlXPathObjectPtr nodes = findNodesN(node, xpath); + if (!nodes) + return NULL; + xmlNodeSetPtr nodeset = nodes->nodesetval; + int size = nodeset->nodeNr; + if (size <= n) + return NULL; + xmlNodePtr toreturn = (xmlNodePtr) nodeset->nodeTab[n]; + xmlXPathFreeObject(nodes); + return toreturn; +} +#define EACHNODE(node, nodes) /* you can instead use eachNodeX with anonymous function - no need to free and findnodes separatl */ \ + for (int EACHNODE_i = 0; \ + nodes ? nodes->nodesetval ? \ + ((EACHNODE_i < nodes->nodesetval->nodeNr) && (node = (xmlNodePtr)nodes->nodesetval->nodeTab[EACHNODE_i])) \ + : 0 : 0; \ + EACHNODE_i++) +/* // to ne dela +#define EACHNODEX(node, target, xpath) \ + xmlXPathObjectPtr EACHNODEX_nodes##__LINE__ = findNodes(target, xpath); \ + for (size_t EACHNODEX_i = 0; \ + EACHNODEX_nodes##__LINE__ ? EACHNODEX_nodes##__LINE__->nodesetval \ + ? ((EACHNODEX_i < EACHNODEX_nodes##__LINE__->nodesetval->nodeNr) \ + && (node = (xmlNodePtr) EACHNODEX_nodes##__LINE__->nodesetval->nodeTab[EACHNODEX_i])) \ + : xmlXPathFreeObject(EACHNODEX_nodes##__LINE__) \ + : 0 : 0; \ + EACHNODEX_i++) +*/ +void printNode (xmlNodePtr node, void * data) { + if (data){} if (node->type == XML_ELEMENT_NODE) { - xmlAttrPtr href = xmlHasProp(node, BAD_CAST "href"); - if (href) { - printf("-> Link to '%s'\n", xmlGetProp(node, BAD_CAST "href")); + printf("-> content: '%s'\n", (char *) xmlNodeGetContent(node)); + } +} +#define gnu_code_start \ + _Pragma ("GCC diagnostic push") \ + _Pragma ("GCC diagnostic ignored \"-Wpedantic\"") +#define gnu_code_end \ + _Pragma ("GCC diagnostic pop") +/* this is the definition of the anonymous function - source: https://en.wikipedia.org/wiki/Anonymous_function#GCC */ +#define lambda(l_ret_type, l_arguments, l_body) \ + ({ \ + l_ret_type l_anonymous_functions_name l_arguments \ + l_body \ + &l_anonymous_functions_name; \ + }) +char * htmlspecialchars (const char * i) { /* remember to free the output */ + size_t s = 128; + char * o = malloc(s); + size_t w = 0; + for (; *i; i++) { + if (s - w <= 10) + o = realloc(o, (s *= 1.5)); + switch (*i) { + case '<': + w += sprintf(o+w, "<"); + break; + case '"': + w += sprintf(o+w, """); + break; + default: + o[w++] = *i; + break; } } + o[w++] = '\0'; + return o; } - -- cgit v1.2.3