summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHiltjo Posthuma <hiltjo@codemadness.org>2020-09-30 11:42:07 +0100
committerKatolaZ <katolaz@freaknet.org>2020-09-30 11:42:07 +0100
commit60c249ec24ab865c4a55759c7ffde2da99530b1d (patch)
tree38494b9c66d9e915a6de57e89f3720627aaa1f6a
parentb416c171bb34297d7f8bc4c027de7136a113d144 (diff)
several improvements and more efficient xml parser
-rw-r--r--xml.c36
-rw-r--r--xml.h12
-rw-r--r--xml2tsv.c93
3 files changed, 38 insertions, 103 deletions
diff --git a/xml.c b/xml.c
index e6c27d7..67ad5a8 100644
--- a/xml.c
+++ b/xml.c
@@ -116,49 +116,19 @@ startvalue:
static void
xml_parsecomment(XMLParser *x)
{
- size_t datalen = 0, i = 0;
+ size_t i = 0;
int c;
- if (x->xmlcommentstart)
- x->xmlcommentstart(x);
while ((c = GETNEXT()) != EOF) {
- if (c == '-' || c == '>') {
- if (x->xmlcomment && datalen) {
- x->data[datalen] = '\0';
- x->xmlcomment(x, x->data, datalen);
- datalen = 0;
- }
- }
-
if (c == '-') {
- if (++i > 2) {
- if (x->xmlcomment)
- for (; i > 2; i--)
- x->xmlcomment(x, "-", 1);
+ if (++i > 2)
i = 2;
- }
continue;
} else if (c == '>' && i == 2) {
- if (x->xmlcommentend)
- x->xmlcommentend(x);
return;
} else if (i) {
- if (x->xmlcomment) {
- for (; i > 0; i--)
- x->xmlcomment(x, "-", 1);
- }
i = 0;
}
-
- if (datalen < sizeof(x->data) - 1) {
- x->data[datalen++] = c;
- } else {
- x->data[datalen] = '\0';
- if (x->xmlcomment)
- x->xmlcomment(x, x->data, datalen);
- x->data[0] = c;
- datalen = 1;
- }
}
}
@@ -286,7 +256,7 @@ numericentitytostr(const char *e, char *buf, size_t bufsiz)
l = strtol(++e, &end, 16);
else
l = strtol(e, &end, 10);
- /* invalid value or not a well-formed entity or invalid codepoint */
+ /* invalid value or not a well-formed entity or invalid code point */
if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff)
return -1;
len = codepointtoutf8(l, buf);
diff --git a/xml.h b/xml.h
index 6ee18b4..a2742db 100644
--- a/xml.h
+++ b/xml.h
@@ -1,5 +1,5 @@
-#ifndef _XML_H
-#define _XML_H
+#ifndef _XML_H_
+#define _XML_H_
#include <stdio.h>
@@ -16,9 +16,6 @@ typedef struct xmlparser {
void (*xmlcdatastart)(struct xmlparser *);
void (*xmlcdata)(struct xmlparser *, const char *, size_t);
void (*xmlcdataend)(struct xmlparser *);
- void (*xmlcommentstart)(struct xmlparser *);
- void (*xmlcomment)(struct xmlparser *, const char *, size_t);
- void (*xmlcommentend)(struct xmlparser *);
void (*xmldata)(struct xmlparser *, const char *, size_t);
void (*xmldataend)(struct xmlparser *);
void (*xmldataentity)(struct xmlparser *, const char *, size_t);
@@ -29,8 +26,9 @@ typedef struct xmlparser {
size_t, int);
#ifndef GETNEXT
- #define GETNEXT (x)->getnext
- int (*getnext)(void);
+ /* GETNEXT overridden to reduce function call overhead and
+ further context optimizations. */
+ #define GETNEXT getchar
#endif
/* current tag */
diff --git a/xml2tsv.c b/xml2tsv.c
index 36aef3a..342d900 100644
--- a/xml2tsv.c
+++ b/xml2tsv.c
@@ -64,7 +64,7 @@ void stack_init(tstack_t *t){
/* utility functions */
/* quote_print: quote \\, \n, \t, and strip other ctrl chars */
-void quote_print(FILE *f, const char *s){
+void quote_print(const char *s){
const char *tmp = s;
size_t len;
int i;
@@ -72,36 +72,45 @@ void quote_print(FILE *f, const char *s){
len = strcspn(tmp, "\\\n\t");
for(i=0; i<len; i++, tmp++){
if (!iscntrl((unsigned char)*tmp)){
- fwrite(tmp, 1, 1, f);
+ putchar(*tmp);
}
}
switch (*tmp){
case '\n':
if (len > 0){
- fprintf(f, "\\n");
+ fputs("\\n", stdout);
}
tmp ++;
break;
case '\t':
- fprintf(f, "\\t");
+ fputs("\\t", stdout);
tmp ++;
break;
case '\r':
- fprintf(f, "\\r");
+ fputs("\\r", stdout);
tmp ++;
break;
case '\\':
- fprintf(f, "\\\\");
+ fputs("\\\\", stdout);
tmp ++;
break;
}
}
}
-void print_cur_str(FILE *f, tstack_t *t){
+void print_cur_str(tstack_t *t){
int i;
for (i=0; i<=t->top; i++){
- fprintf(f, "/%s", t->st[i]);
+ putchar('/');
+ fputs(t->st[i], stdout);
+ }
+}
+
+void print_cur_str_fp(FILE *f, tstack_t *t){
+ int i;
+ for (i=0; i<=t->top; i++){
+ fputc('/', f);
+ fputs(t->st[i], f);
}
}
@@ -110,13 +119,13 @@ void print_cur_str(FILE *f, tstack_t *t){
tstack_t st;
char emitsep;
-/* xml callbacks */
+/* XML callbacks */
void
xmlattr(XMLParser *x, const char *t, size_t tl, const char *a, size_t al,
const char *v, size_t vl)
{
- printf("%s", v);
+ fputs(v, stdout);
}
void
@@ -133,56 +142,33 @@ xmlattrentity(XMLParser *x, const char *t, size_t tl, const char *a, size_t al,
}
void
-xmlattrend(XMLParser *x, const char *t, size_t tl, const char *a, size_t al)
-{
-}
-
-void
xmlattrstart(XMLParser *x, const char *t, size_t tl, const char *a, size_t al)
{
- printf("%c%s%c", SEP, a, SATTR);
+ putchar(SEP);
+ fputs(a, stdout);
+ putchar(SATTR);
}
void
xmlcdatastart(XMLParser *x)
{
- printf("%c", SEP);
+ putchar(SEP);
}
void
xmlcdata(XMLParser *x, const char *d, size_t dl)
{
- quote_print(stdout, d);
-}
-
-void
-xmlcdataend(XMLParser *x)
-{
-}
-
-void
-xmlcommentstart(XMLParser *x)
-{
-}
-
-void
-xmlcomment(XMLParser *x, const char *c, size_t cl)
-{
-}
-
-void
-xmlcommentend(XMLParser *x)
-{
+ quote_print(d);
}
void
xmldata(XMLParser *x, const char *d, size_t dl)
{
if (strcspn(d, " \t\n") && emitsep){
- printf("%c", SEP);
+ putchar(SEP);
emitsep = FALSE;
}
- quote_print(stdout, d);
+ quote_print(d);
}
void
@@ -220,12 +206,6 @@ xmltagend(XMLParser *x, const char *t, size_t tl, int isshort)
if (strcmp(t, tag)){
fprintf(stderr, "Error: tag-end '%s' closes tag '%s'\n", t, tag);
}
-
-/* if (isshort) {
- printf("\n");
- print_cur_str(stdout, &st);
- }
-*/
}
void
@@ -235,13 +215,8 @@ xmltagstart(XMLParser *x, const char *t, size_t tl)
fprintf(stderr, "Error: stack full. Ignoring tag '%s' (parent tag: '%s')\n", t, stack_peek(&st));
return;
}
- printf("\n");
- print_cur_str(stdout, &st);
-}
-
-void
-xmltagstartparsed(XMLParser *x, const char *t, size_t tl, int isshort)
-{
+ putchar('\n');
+ print_cur_str(&st);
}
int
@@ -252,30 +227,22 @@ main(void)
XMLParser x = { 0 };
x.xmlattr = xmlattr;
- x.xmlattrend = xmlattrend;
x.xmlattrstart = xmlattrstart;
x.xmlattrentity = xmlattrentity;
x.xmlcdatastart = xmlcdatastart;
x.xmlcdata = xmlcdata;
- x.xmlcdataend = xmlcdataend;
- x.xmlcommentstart = xmlcommentstart;
- x.xmlcomment = xmlcomment;
- x.xmlcommentend = xmlcommentend;
x.xmldata = xmldata;
x.xmldataend = xmldataend;
x.xmldataentity = xmldataentity;
x.xmldatastart = xmldatastart;
x.xmltagend = xmltagend;
x.xmltagstart = xmltagstart;
- x.xmltagstartparsed = xmltagstartparsed;
-
- x.getnext = getchar;
xml_parse(&x);
- printf("\n");
+ putchar('\n');
if (! stack_empty(&st)) {
fprintf(stderr, "Error: tags still open at EOF: ");
- print_cur_str(stderr, &st);
+ print_cur_str_fp(stderr, &st);
fprintf(stderr, "\n");
}
return 0;