summaryrefslogtreecommitdiffstats
path: root/src/bencoding.c
diff options
context:
space:
mode:
authorAnton Luka Šijanec <anton@sijanec.eu>2022-11-21 20:11:12 +0100
committerAnton Luka Šijanec <anton@sijanec.eu>2022-11-21 20:11:12 +0100
commit8e1cbd8f7bd5deb3310be4f4f65f7aa6ea9a9beb (patch)
tree27508c3ffa05f5934bd7af60c34736d89e0e5954 /src/bencoding.c
parentinitial commit, UNTESTED bencoding parser (diff)
downloadtravnik-8e1cbd8f7bd5deb3310be4f4f65f7aa6ea9a9beb.tar
travnik-8e1cbd8f7bd5deb3310be4f4f65f7aa6ea9a9beb.tar.gz
travnik-8e1cbd8f7bd5deb3310be4f4f65f7aa6ea9a9beb.tar.bz2
travnik-8e1cbd8f7bd5deb3310be4f4f65f7aa6ea9a9beb.tar.lz
travnik-8e1cbd8f7bd5deb3310be4f4f65f7aa6ea9a9beb.tar.xz
travnik-8e1cbd8f7bd5deb3310be4f4f65f7aa6ea9a9beb.tar.zst
travnik-8e1cbd8f7bd5deb3310be4f4f65f7aa6ea9a9beb.zip
Diffstat (limited to 'src/bencoding.c')
-rw-r--r--src/bencoding.c259
1 files changed, 228 insertions, 31 deletions
diff --git a/src/bencoding.c b/src/bencoding.c
index 8c32399..d1324c7 100644
--- a/src/bencoding.c
+++ b/src/bencoding.c
@@ -21,11 +21,10 @@ struct bencoding {
struct bencoding * next; /**< NULL if element is not member of a list or dict */
struct bencoding * prev;
struct bencoding * child; /**< NULL if element is not a list or dict or if it has 0 children */
- struct bencoding * parent;
- enum benc type; /**< type of this element */
- struct bencoding * key; /**< the key element, string according to the spec, applicable for list and dict */
- char * value; /**< always set to the content of the element, value is not null terminated unless terminate opt is set */
- size_t valuelen; /**< length of string value, as value is not null terminated */
+ enum benc type; /**< type | opts of this element */
+ struct bencoding * key; /**< the key element, string according to the spec, applicable for dict */
+ char * value; /**< set to the content of the element, value is not null terminated unless terminate opt is set. NULL for dict and list. */
+ size_t valuelen; /**< length of string value, as value is not null terminated, internal value for list or dict. */
int intvalue;
int index;
char oldterminator; /**< when opts&terminate, the character that was replaced with \0 is stored here */
@@ -41,12 +40,10 @@ struct bencoding {
void free_bencoding (struct bencoding * b) {
if (!b)
return;
- struct bencoding * s = b;
- while (s) /* we free all siblings should they exist */
- free_bencoding(s = s->next);
free_bencoding(b->child); /* we free the child should it exist. it can be NULL. */
free_bencoding(b->key); /* should this be an element of a dict, free the key */
- free(b); /* we free the element */
+ free_bencoding(b->next);
+ free(b);
return;
}
@@ -58,6 +55,201 @@ void free_bencoding (struct bencoding * b) {
#define MIN(x, y) ((x) <= (y) ? (x) : (y))
/**
+ * return how much space a character in a string uses
+ *
+ * @param a [in] the character in question
+ */
+
+int b2json_charsize (char a) {
+ if (a == '"')
+ return 2;
+ if (a == '\\')
+ return 2;
+ if (a == '\b')
+ return 2;
+ if (a == '\f')
+ return 2;
+ if (a == '\n')
+ return 2;
+ if (a == '\r')
+ return 2;
+ if (a == '\t')
+ return 2;
+ if (a < ' ')
+ return 6;
+ return 1;
+}
+
+/**
+ * write a string representation of a character in a JSON string
+ *
+ * @param dest [out] destination
+ * @param a [in] the character in question
+ * @return the destination pointer, incremented for the number of bytes written
+ */
+
+char * b2json_charrepr (char * dest, char a) {
+ switch (a) {
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstringop-truncation"
+ case '"':
+ strncpy(dest, "\\\"", 2);
+ return dest+2;
+ case '\\':
+ strncpy(dest, "\\\\", 2);
+ return dest+2;
+ case '\b':
+ strncpy(dest, "\\b", 2);
+ return dest+2;
+ case '\f':
+ strncpy(dest, "\\f", 2);
+ return dest+2;
+ case '\n':
+ strncpy(dest, "\\n", 2);
+ return dest+2;
+ case '\r':
+ strncpy(dest, "\\r", 2);
+ return dest+2;
+ case '\t':
+ strncpy(dest, "\\t", 2);
+ return dest+2;
+ default:
+ if (a < ' ') {
+ char buf[7];
+ sprintf(buf, "\\u00%02x", a);
+ strncpy(dest, buf, 6);
+ return dest+6;
+ } else {
+ *dest++ = a;
+ return dest;
+ }
+#pragma GCC diagnostic pop
+ }
+}
+
+
+/**
+ * get size required for JSON representation of a bencoding struct. terminating NULL byte is not counted, because b2json does not write it. write it yourself.
+ *
+ * @param b [in] bencoding structure of a bdecoded element
+ */
+
+int b2json_length (struct bencoding * b) {
+ if (!b)
+ return 4;
+ if (b->type & string) {
+ int size = 2;
+ if (b->oldterminatorls)
+ size += b2json_charsize(b->oldterminatorls) - b2json_charsize('\0');
+ for (size_t i = 0; i < b->valuelen; i++)
+ size += b2json_charsize(b->value[i]);
+ return size;
+ }
+ if (b->type & num) {
+ char buf[512];
+ sprintf(buf, "%d", b->intvalue);
+ return strlen(buf);
+ }
+ if (b->type & list) {
+ if (!b->child)
+ return 2;
+ struct bencoding * t = b->child;
+ int size = 2 + b2json_length(t);
+ while (t->next) {
+ t = t->next;
+ size += b2json_length(t) + 1;
+ }
+ return size;
+ }
+ if (b->type & dict) {
+ if (!b->child)
+ return 2;
+ struct bencoding * t = b->child;
+ int size = 3 + b2json_length(t) + b2json_length(t->key);
+ while (t->next) {
+ t = t->next;
+ size += 1 + b2json_length(t) + 1 + b2json_length(t->key);
+ }
+ return size;
+ }
+ return 5;
+}
+
+/**
+ * write json representation of a bencoding struct. does not write terminating nullbyte, b2json_length does not include it in count. add it yourself. should write exactly b2json_length bytes.
+ *
+ * writes false when struct has an incorrect type and null when NULL pointer is passed, this is in ordnung with b2json_length.
+ *
+ * @param dest [in] destination
+ * @param b [in] bencoding structure of a bdecoded element
+ * @return the destination pointer, incremented for the number of bytes written
+ */
+
+char * b2json (char * dest, struct bencoding * b) {
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wstringop-truncation"
+ if (!b) {
+ strncpy(dest, "null", 4);
+ return dest+4;
+ }
+ if (b->type & string) {
+ *dest++ = '"';
+ for (size_t i = 0; i < b->valuelen; i++)
+ if (i == b->valuelen-1 && b->oldterminatorls)
+ dest = b2json_charrepr(dest, b->oldterminatorls);
+ else
+ dest = b2json_charrepr(dest, b->value[i]);
+ *dest++ = '"';
+ return dest;
+ }
+ if (b->type & num) {
+ char buf[512];
+ sprintf(buf, "%d", b->intvalue);
+ strncpy(dest, buf, strlen(buf));
+ return dest+strlen(buf);
+ }
+ if (b->type & list) {
+ if (!b->child) {
+ strncpy(dest, "[]", 2);
+ return dest+2;
+ }
+ struct bencoding * t = b->child;
+ *dest++ = '[';
+ dest = b2json(dest, t);
+ while (t->next) {
+ t = t->next;
+ *dest++ = ',';
+ dest = b2json(dest, t);
+ }
+ *dest++ = ']';
+ return dest;
+ }
+ if (b->type & dict) {
+ if (!b->child) {
+ strncpy(dest, "{}", 2);
+ return dest+2;
+ }
+ *dest++ = '{';
+ struct bencoding * t = b->child;
+ dest = b2json(dest, t->key);
+ *dest++ = ':';
+ dest = b2json(dest, t);
+ while (t->next) {
+ t = t->next;
+ *dest++ = ',';
+ dest = b2json(dest, t->key);
+ *dest++ = ':';
+ dest = b2json(dest, t);
+ }
+ *dest++ = '}';
+ return dest;
+ }
+ strncpy(dest, "false", 4);
+ return dest+4;
+#pragma GCC diagnostic pop
+}
+
+/**
* macro that allocas a C string from a bencoding string or other element. non-string elements return their raw bencoded content.
* dereferences structure without checking.
* resulting C string is NULL terminated, cannot contain NULL, DO NOT dereference bytes after the NULL terminator.
@@ -108,7 +300,7 @@ struct bencoding * bdecode (char * s, int len, enum benc opts) {
b->value = s+1;
if (len == -1 || memchr(s, 'e', len)) { /* correct string or end found */
b->intvalue = strtol(b->value, &c, 10);
- b->valuelen = (c-1)-b->value;
+ b->valuelen = c-b->value;
}
break;
case 'd': /* dict */
@@ -117,49 +309,54 @@ struct bencoding * bdecode (char * s, int len, enum benc opts) {
case 'l': /* list */
if (!b->type)
b->type = list;
- c = s;
+ c = s+1;
struct bencoding * arbeit = NULL;
struct bencoding * oldarbeit = NULL;
struct bencoding * oldoldarbeit = NULL; /* for dicts, holds previous value */
int index = 0;
- b->value = s+1;
- char oldterminator = '\0';
- while (len == -1 || ++c <= s+len) { /* s+len is max we are allowed to read */
- if (opts&terminate && oldarbeit && oldarbeit->oldterminator)
- c[0] = oldterminator;
+ while (len == -1 || c <= s+len) { /* s+len is max we are allowed to read */
+ if (oldarbeit && oldarbeit->type & string && oldarbeit->type & terminate && oldarbeit->oldterminator)
+ c[0] = oldarbeit->oldterminator;
arbeit = bdecode(c, len == -1 ? -1 : len-(c-s), opts);
- if (opts&terminate && oldarbeit && oldarbeit->oldterminator)
+ if (oldarbeit && oldarbeit->type & string && oldarbeit->type & terminate && oldarbeit->oldterminator)
c[0] = '\0';
if (!arbeit) /* bdecoding failed or last element */
break;
-#define ISDICT (b->type == dict)
+#define ISDICT (b->type & dict)
#define ISLIST !ISDICT
-#define ISVAL (index % 2 == 1)
+#define ISVAL (index % 2)
#define ISKEY !ISVAL
if (ISDICT && ISVAL)
arbeit->key = oldarbeit;
- c = arbeit->value+arbeit->valuelen; /* this is safe, function's vallen should not be in forbidden */
- if (arbeit->type&(num|dict|list) && c <= s+len && c[0] == 'e') /* but vallen+1 may be */
- c++;
- c--; /* while cond will inc again */
+ if (arbeit->type & num)
+ c = arbeit->value+arbeit->valuelen+1;
+ else if (arbeit->type & string)
+ c = arbeit->value+arbeit->valuelen;
+ else if (arbeit->type & (list | dict))
+ c += arbeit->valuelen;
arbeit->prev = ISDICT ? ISVAL ? oldoldarbeit : oldarbeit : oldarbeit;
arbeit->index = ISDICT ? index/2 : index;
- if (ISLIST)
+ if (ISLIST) {
if (index)
oldarbeit->next = arbeit;
else
b->child = arbeit;
- if (ISDICT)
+ }
+ if (ISDICT) {
if (index == 1)
- b->child = oldarbeit;
+ b->child = arbeit;
else if (ISVAL)
oldoldarbeit->next = arbeit;
+ }
oldoldarbeit = oldarbeit;
oldarbeit = arbeit;
index++;
}
- b->valuelen = (c-1)-b->value; /* c-1 is the last character in list or last readable character if out of l */
- break;
+ b->valuelen = c-s + 1;
+ b->type = b->type | opts;
+ if (ISDICT && ISVAL) // e je torej value, če je prej samoten key
+ free_bencoding(oldarbeit); // this key would be otherwise leaked
+ return b;
case 'e': /* end of list/dict */
free(b);
return NULL;
@@ -170,15 +367,15 @@ struct bencoding * bdecode (char * s, int len, enum benc opts) {
}
b->type = string;
if (len == -1 || (b->value = memchr(s, ':', len))) {
- b->valuelen = strtol(s, NULL, 10);
- b->value++;
+ b->valuelen = strtol(s, &c, 10);
+ b->value = c+1;
if (len != -1 && (unsigned)len < b->valuelen + (b->value - s) /* len minus prefix; strlen & colon */)
b->valuelen = len - (b->value - s); /* malformed bencoded data, truncating string */
}
break;
}
if (opts & terminate) {
- if (len != -1 && b->valuelen+1+(b->value-s) < (unsigned) len) { /* no space for terminator, put it on last char */
+ if (len != -1 && b->valuelen+1+(b->value-s) > (unsigned) len) { /* no space for terminator, put it on last char */
b->oldterminatorls = b->value[b->valuelen-1];
b->value[b->valuelen-1] = '\0';
} else {