1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
|
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <libxml/HTMLparser.h>
#include <string.h>
int main (int argc, char ** argv) {
xmlInitParser();
htmlDocPtr xmldoc;
char * txtdoc;
struct stat s;
int fd = open(argv[1], O_RDONLY);
htmlParserCtxtPtr c;
stat(argv[1], &s);
txtdoc = mmap(NULL, s.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
xmlInitParser();
c = htmlNewParserCtxt();
xmldoc = htmlCtxtReadMemory(c, txtdoc, strlen(txtdoc), "", NULL, HTML_PARSE_RECOVER);
/* by the way: why/how/when does libxml2 use networking when HTML_PARSE_NOT is not specified? */
htmlFreeParserCtxt(c);
xmlFreeDoc(xmldoc);
close(fd);
munmap(txtdoc, s.st_size);
xmlCleanupParser();
return 0;
}
|