blob: 2371077e3a76e9336349158399aebb71ad744f38 (
plain) (
tree)
|
|
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <libxml/HTMLparser.h>
#include <string.h>
int main (int argc, char ** argv) {
xmlInitParser();
htmlDocPtr xmldoc;
char * txtdoc;
struct stat s;
int fd = open(argv[1], O_RDONLY);
htmlParserCtxtPtr c;
stat(argv[1], &s);
txtdoc = mmap(NULL, s.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
xmlInitParser();
c = htmlNewParserCtxt();
xmldoc = htmlCtxtReadMemory(c, txtdoc, strlen(txtdoc), "", NULL, HTML_PARSE_RECOVER);
/* by the way: why/how/when does libxml2 use networking when HTML_PARSE_NOT is not specified? */
htmlFreeParserCtxt(c);
xmlFreeDoc(xmldoc);
close(fd);
munmap(txtdoc, s.st_size);
xmlCleanupParser();
return 0;
}
|