diff options
-rw-r--r-- | Makefile | 11 | ||||
-rw-r--r-- | src/blocks.c | 58 | ||||
-rw-r--r-- | src/buffer.c | 69 | ||||
-rw-r--r-- | src/buffer.h | 19 | ||||
-rw-r--r-- | src/html.c | 276 | ||||
-rw-r--r-- | src/inlines.c | 4 | ||||
-rw-r--r-- | src/main.c | 142 | ||||
-rw-r--r-- | src/print.c | 307 | ||||
-rw-r--r-- | src/stmd.h | 13 | ||||
-rw-r--r-- | src/utf8.c | 6 | ||||
-rw-r--r-- | src/utf8.h | 6 |
11 files changed, 304 insertions, 607 deletions
@@ -6,7 +6,7 @@ DATADIR=data PROG=./stmd .PHONY: all oldtests test spec benchjs testjs -all: $(SRCDIR)/case_fold_switch.c $(PROG) +all: $(SRCDIR)/case_fold_switch.inc $(PROG) README.html: README.md template.html pandoc --template template.html -S -s -t html5 -o $@ $< @@ -41,13 +41,16 @@ testjs: spec.txt benchjs: node js/bench.js ${BENCHINP} -$(PROG): $(SRCDIR)/main.c $(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/html.o $(SRCDIR)/utf8.o +HTML_OBJ=$(SRCDIR)/html/html.o $(SRCDIR)/html/houdini_href_e.o $(SRCDIR)/html/houdini_html_e.o +STMD_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o + +$(PROG): $(SRCDIR)/main.c $(HTML_OBJ) $(STMD_OBJ) $(CC) $(LDFLAGS) -o $@ $^ $(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re re2c --case-insensitive -bis $< > $@ || (rm $@ && false) -$(SRCDIR)/case_fold_switch.inc $(DATADIR)/CaseFolding-3.2.0.txt +$(SRCDIR)/case_fold_switch.inc: $(DATADIR)/CaseFolding-3.2.0.txt perl mkcasefold.pl < $< > $@ .PHONY: leakcheck clean fuzztest dingus upload @@ -72,7 +75,7 @@ update-site: spec.html narrative.html (cd _site ; git pull ; git commit -a -m "Updated site for latest spec, narrative, js" ; git push; cd ..) clean: - -rm -f test $(SRCDIR)/*.o $(SRCDIR)/scanners.c + -rm -f test $(SRCDIR)/*.o $(SRCDIR)/scanners.c $(SRCDIR)/html/*.o -rm -rf *.dSYM -rm -f README.html -rm -f spec.md fuzz.txt spec.html diff --git a/src/blocks.c b/src/blocks.c index eabac03..71dc830 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -3,11 +3,12 @@ #include <stdio.h> #include <stdbool.h> #include <ctype.h> -#include "bstrlib.h" + #include "stmd.h" -#include "uthash.h" -#include "debug.h" #include "scanners.h" +#include "uthash.h" + +static void finalize(block* b, int line_number); static block* make_block(int tag, int start_line, int start_column) { @@ -140,7 +141,7 @@ static int break_out_of_lists(block ** bptr, int line_number) } -extern void finalize(block* b, int line_number) +static void finalize(block* b, int line_number) { int firstlinelen; int pos; @@ -364,7 +365,7 @@ static int lists_match(struct ListData list_data, list_data.bullet_char == item_data.bullet_char); } -static void expand_tabs(gh_buf *ob, const char *line, size_t size) +static void expand_tabs(gh_buf *ob, const unsigned char *line, size_t size) { size_t i = 0, tab = 0; @@ -389,13 +390,43 @@ static void expand_tabs(gh_buf *ob, const char *line, size_t size) } } -extern block *stmd_parse_document(const char *buffer, size_t len) +static block *finalize_parsing(block *document, int linenum) { - gh_buf line = GH_BUF_INIT; + while (document != document->top) { + finalize(document, linenum); + document = document->parent; + } + + finalize(document, linenum); + process_inlines(document, document->attributes.refmap); + + return document; +} +extern block *stmd_parse_file(FILE *f) +{ + gh_buf line = GH_BUF_INIT; + unsigned char buffer[4096]; + int linenum = 1; block *document = make_document(); + + while (fgets((char *)buffer, sizeof(buffer), f)) { + expand_tabs(&line, buffer, strlen(buffer)); + incorporate_line(&line, linenum, &document); + gh_buf_clear(&line); + linenum++; + } + + gh_buf_free(&line); + return finalize_document(document, linenum); +} + +extern block *stmd_parse_document(const unsigned char *buffer, size_t len) +{ + gh_buf line = GH_BUF_INIT; int linenum = 1; - const char *end = buffer + len; + const unsigned char *end = buffer + len; + block *document = make_document(); while (buffer < end) { const char *eol = memchr(buffer, '\n', end - buffer); @@ -414,16 +445,7 @@ extern block *stmd_parse_document(const char *buffer, size_t len) } gh_buf_free(&line); - - while (document != document->top) { - finalize(document, linenum); - document = document->parent; - } - - finalize(document, linenum); - process_inlines(document, document->attributes.refmap); - - return document; + return finalize_document(document, linenum); } // Process one line at a time, modifying a block. diff --git a/src/buffer.c b/src/buffer.c index b81e7fa..17dc864 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -31,10 +31,10 @@ void gh_buf_init(gh_buf *buf, int initial_size) int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom) { - char *new_ptr; - size_t new_size; + unsigned char *new_ptr; + int new_size; - if (buf->ptr == gh_buf__oom || buf->asize < 0) + if (buf->ptr == gh_buf__oom) return -1; if (target_size <= buf->asize) @@ -79,7 +79,7 @@ void gh_buf_free(gh_buf *buf) { if (!buf) return; - if (buf->asize > 0 && buf->ptr != gh_buf__initbuf && buf->ptr != gh_buf__oom) + if (buf->ptr != gh_buf__initbuf && buf->ptr != gh_buf__oom) free(buf->ptr); gh_buf_init(buf, 0); @@ -91,14 +91,9 @@ void gh_buf_clear(gh_buf *buf) if (buf->asize > 0) buf->ptr[0] = '\0'; - - if (buf->asize < 0) { - buf->ptr = gh_buf__initbuf; - buf->asize = 0; - } } -int gh_buf_set(gh_buf *buf, const char *data, int len) +int gh_buf_set(gh_buf *buf, const unsigned char *data, int len) { if (len == 0 || data == NULL) { gh_buf_clear(buf); @@ -115,10 +110,12 @@ int gh_buf_set(gh_buf *buf, const char *data, int len) int gh_buf_sets(gh_buf *buf, const char *string) { - return gh_buf_set(buf, string, string ? strlen(string) : 0); + return gh_buf_set(buf, + (const unsigned char *)string, + string ? strlen(string) : 0); } -int gh_buf_putc(gh_buf *buf, char c) +int gh_buf_putc(gh_buf *buf, int c) { ENSURE_SIZE(buf, buf->size + 2); buf->ptr[buf->size++] = c; @@ -126,7 +123,7 @@ int gh_buf_putc(gh_buf *buf, char c) return 0; } -int gh_buf_put(gh_buf *buf, const char *data, int len) +int gh_buf_put(gh_buf *buf, const unsigned char *data, int len) { ENSURE_SIZE(buf, buf->size + len + 1); memmove(buf->ptr + buf->size, data, len); @@ -137,8 +134,7 @@ int gh_buf_put(gh_buf *buf, const char *data, int len) int gh_buf_puts(gh_buf *buf, const char *string) { - assert(string); - return gh_buf_put(buf, string, strlen(string)); + return gh_buf_put(buf, (const unsigned char *)string, strlen(string)); } int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap) @@ -153,7 +149,7 @@ int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap) va_copy(args, ap); len = vsnprintf( - buf->ptr + buf->size, + (char *)buf->ptr + buf->size, buf->asize - buf->size, format, args ); @@ -187,9 +183,9 @@ int gh_buf_printf(gh_buf *buf, const char *format, ...) return r; } -void gh_buf_copy_cstr(char *data, size_t datasize, const gh_buf *buf) +void gh_buf_copy_cstr(char *data, int datasize, const gh_buf *buf) { - size_t copylen; + int copylen; assert(data && datasize && buf); @@ -212,9 +208,9 @@ void gh_buf_swap(gh_buf *buf_a, gh_buf *buf_b) *buf_b = t; } -char *gh_buf_detach(gh_buf *buf) +unsigned char *gh_buf_detach(gh_buf *buf) { - char *data = buf->ptr; + unsigned char *data = buf->ptr; if (buf->asize == 0 || buf->ptr == gh_buf__oom) return NULL; @@ -224,13 +220,13 @@ char *gh_buf_detach(gh_buf *buf) return data; } -void gh_buf_attach(gh_buf *buf, char *ptr, int asize) +void gh_buf_attach(gh_buf *buf, unsigned char *ptr, int asize) { gh_buf_free(buf); if (ptr) { buf->ptr = ptr; - buf->size = strlen(ptr); + buf->size = strlen((char *)ptr); if (asize) buf->asize = (asize < buf->size) ? buf->size + 1 : asize; else /* pass 0 to fall back on strlen + 1 */ @@ -249,11 +245,11 @@ int gh_buf_cmp(const gh_buf *a, const gh_buf *b) int gh_buf_strchr(const gh_buf *buf, int c, int pos) { - const char *p = memchr(buf->ptr + pos, c, buf->size - pos); - if (!p) - return -1; + const char *p = memchr(buf->ptr + pos, c, buf->size - pos); + if (!p) + return -1; - return (int)(p - p->ptr); + return (int)(p - buf->ptr); } int gh_buf_strrchr(const gh_buf *buf, int c, int pos) @@ -270,36 +266,21 @@ int gh_buf_strrchr(const gh_buf *buf, int c, int pos) void gh_buf_truncate(gh_buf *buf, size_t len) { - assert(buf->asize >= 0); - if (len < buf->size) { buf->size = len; buf->ptr[buf->size] = '\0'; } } -void gh_buf_ltruncate(gh_buf *buf, size_t len) -{ - assert(buf->asize >= 0); - - if (len && len < buf->size) { - memmove(buf->ptr, buf->ptr + len, buf->size - len); - buf->size -= len; - buf->ptr[buf->size] = '\0'; - } -} - void gh_buf_trim(gh_buf *buf) { - size_t i = 0; - - assert(buf->asize >= 0); - - /* ltrim */ + /* TODO: leading whitespace? */ + /* while (i < buf->size && isspace(buf->ptr[i])) i++; gh_buf_truncate(buf, i); + */ /* rtrim */ while (buf->size > 0) { diff --git a/src/buffer.h b/src/buffer.h index 2581ee3..422ef02 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -24,13 +24,6 @@ extern unsigned char gh_buf__oom[]; */ extern void gh_buf_init(gh_buf *buf, int initial_size); -static inline void gh_buf_static(gh_buf *buf, unsigned char *source) -{ - buf->ptr = source; - buf->size = strlen(source); - buf->asize = -1; -} - /** * Attempt to grow the buffer to hold at least `target_size` bytes. * @@ -81,13 +74,13 @@ static inline size_t gh_buf_len(const gh_buf *buf) extern int gh_buf_cmp(const gh_buf *a, const gh_buf *b); -extern void gh_buf_attach(gh_buf *buf, char *ptr, int asize); -extern char *gh_buf_detach(gh_buf *buf); +extern void gh_buf_attach(gh_buf *buf, unsigned char *ptr, int asize); +extern unsigned char *gh_buf_detach(gh_buf *buf); extern void gh_buf_copy_cstr(char *data, int datasize, const gh_buf *buf); static inline const char *gh_buf_cstr(const gh_buf *buf) { - return buf->ptr; + return (char *)buf->ptr; } #define gh_buf_at(buf, n) ((buf)->ptr[n]) @@ -100,10 +93,10 @@ static inline const char *gh_buf_cstr(const gh_buf *buf) * return code of these functions and call them in a series then just call * gh_buf_oom at the end. */ -extern int gh_buf_set(gh_buf *buf, const char *data, int len); +extern int gh_buf_set(gh_buf *buf, const unsigned char *data, int len); extern int gh_buf_sets(gh_buf *buf, const char *string); -extern int gh_buf_putc(gh_buf *buf, char c); -extern int gh_buf_put(gh_buf *buf, const char *data, int len); +extern int gh_buf_putc(gh_buf *buf, int c); +extern int gh_buf_put(gh_buf *buf, const unsigned char *data, int len); extern int gh_buf_puts(gh_buf *buf, const char *string); extern int gh_buf_printf(gh_buf *buf, const char *format, ...) __attribute__((format (printf, 2, 3))); diff --git a/src/html.c b/src/html.c deleted file mode 100644 index aeec5f1..0000000 --- a/src/html.c +++ /dev/null @@ -1,276 +0,0 @@ -#include <stdlib.h> -#include <stdio.h> -#include <stdbool.h> -#include "bstrlib.h" -#include "stmd.h" -#include "debug.h" -#include "scanners.h" - -// Functions to convert block and inline lists to HTML strings. - -// Escape special characters in HTML. More efficient than -// three calls to bfindreplace. If preserve_entities is set, -// existing entities are left alone. -static bstring escape_html(bstring inp, bool preserve_entities) -{ - int pos = 0; - int match; - char c; - bstring escapable = blk2bstr("&<>\"", 4); - bstring ent; - bstring s = bstrcpy(inp); - while ((pos = binchr(s, pos, escapable)) != BSTR_ERR) { - c = bchar(s,pos); - switch (c) { - case '<': - bdelete(s, pos, 1); - ent = blk2bstr("<", 4); - binsert(s, pos, ent, ' '); - bdestroy(ent); - pos += 4; - break; - case '>': - bdelete(s, pos, 1); - ent = blk2bstr(">", 4); - binsert(s, pos, ent, ' '); - bdestroy(ent); - pos += 4; - break; - case '&': - if (preserve_entities && (match = scan_entity(s, pos))) { - pos += match; - } else { - bdelete(s, pos, 1); - ent = blk2bstr("&", 5); - binsert(s, pos, ent, ' '); - bdestroy(ent); - pos += 5; - } - break; - case '"': - bdelete(s, pos, 1); - ent = blk2bstr(""", 6); - binsert(s, pos, ent, ' '); - bdestroy(ent); - pos += 6; - break; - default: - bdelete(s, pos, 1); - log_err("unexpected character %02x", c); - } - } - bdestroy(escapable); - return s; -} - -static inline void cr(bstring buffer) -{ - int c = bchar(buffer, blength(buffer) - 1); - if (c != '\n' && c) { - bconchar(buffer, '\n'); - } -} - -// Convert a block list to HTML. Returns 0 on success, and sets result. -extern int blocks_to_html(block* b, bstring* result, bool tight) -{ - bstring contents = NULL; - bstring escaped, escaped2; - struct bstrList * info_words; - struct ListData * data; - bstring mbstart; - bstring html = blk2bstr("", 0); - - while(b != NULL) { - switch(b->tag) { - case document: - check(blocks_to_html(b->children, &contents, false) == 0, - "error converting blocks to html"); - bformata(html, "%s", contents->data); - bdestroy(contents); - break; - case paragraph: - check(inlines_to_html(b->inline_content, &contents) == 0, - "error converting inlines to html"); - if (tight) { - bformata(html, "%s", contents->data); - } else { - cr(html); - bformata(html, "<p>%s</p>", contents->data); - cr(html); - } - bdestroy(contents); - break; - case block_quote: - check(blocks_to_html(b->children, &contents, false) == 0, - "error converting blocks to html"); - cr(html); - bformata(html, "<blockquote>\n%s</blockquote>", contents->data); - cr(html); - bdestroy(contents); - break; - case list_item: - check(blocks_to_html(b->children, &contents, tight) == 0, - "error converting blocks to html"); - brtrimws(contents); - cr(html); - bformata(html, "<li>%s</li>", contents->data); - cr(html); - bdestroy(contents); - break; - case list: - // make sure a list starts at the beginning of the line: - cr(html); - data = &(b->attributes.list_data); - check(blocks_to_html(b->children, &contents, data->tight) == 0, - "error converting blocks to html"); - mbstart = bformat(" start=\"%d\"", data->start); - bformata(html, "<%s%s>\n%s</%s>", - data->list_type == bullet ? "ul" : "ol", - data->start == 1 ? "" : (char*) mbstart->data, - contents->data, - data->list_type == bullet ? "ul" : "ol"); - cr(html); - bdestroy(contents); - bdestroy(mbstart); - break; - case atx_header: - case setext_header: - check(inlines_to_html(b->inline_content, &contents) == 0, - "error converting inlines to html"); - cr(html); - bformata(html, "<h%d>%s</h%d>", - b->attributes.header_level, - contents->data, - b->attributes.header_level); - cr(html); - bdestroy(contents); - break; - case indented_code: - escaped = escape_html(b->string_content, false); - cr(html); - bformata(html, "<pre><code>%s</code></pre>", escaped->data); - cr(html); - bdestroy(escaped); - break; - case fenced_code: - escaped = escape_html(b->string_content, false); - cr(html); - bformata(html, "<pre><code"); - if (blength(b->attributes.fenced_code_data.info) > 0) { - escaped2 = escape_html(b->attributes.fenced_code_data.info, true); - info_words = bsplit(escaped2, ' '); - bformata(html, " class=\"language-%s\"", info_words->entry[0]->data); - bdestroy(escaped2); - bstrListDestroy(info_words); - } - bformata(html, ">%s</code></pre>", escaped->data); - cr(html); - bdestroy(escaped); - break; - case html_block: - bformata(html, "%s", b->string_content->data); - break; - case hrule: - bformata(html, "<hr />"); - cr(html); - break; - case reference_def: - break; - default: - log_warn("block type %d not implemented\n", b->tag); - break; - } - b = b->next; - } - *result = html; - return 0; - error: - return -1; -} - -// Convert an inline list to HTML. Returns 0 on success, and sets result. -extern int inlines_to_html(inl* ils, bstring* result) -{ - bstring contents = NULL; - bstring html = blk2bstr("", 0); - bstring mbtitle, escaped, escaped2; - - while(ils != NULL) { - switch(ils->tag) { - case str: - escaped = escape_html(ils->content.literal, false); - bformata(html, "%s", escaped->data); - bdestroy(escaped); - break; - case linebreak: - bformata(html, "<br />\n"); - break; - case softbreak: - bformata(html, "\n"); - break; - case code: - escaped = escape_html(ils->content.literal, false); - bformata(html, "<code>%s</code>", escaped->data); - bdestroy(escaped); - break; - case raw_html: - case entity: - bformata(html, "%s", ils->content.literal->data); - break; - case link: - check(inlines_to_html(ils->content.inlines, &contents) == 0, - "error converting inlines to html"); - if (blength(ils->content.linkable.title) > 0) { - escaped = escape_html(ils->content.linkable.title, true); - mbtitle = bformat(" title=\"%s\"", escaped->data); - bdestroy(escaped); - } else { - mbtitle = blk2bstr("",0); - } - escaped = escape_html(ils->content.linkable.url, true); - bformata(html, "<a href=\"%s\"%s>%s</a>", - escaped->data, - mbtitle->data, - contents->data); - bdestroy(escaped); - bdestroy(mbtitle); - bdestroy(contents); - break; - case image: - check(inlines_to_html(ils->content.inlines, &contents) == 0, - "error converting inlines to html"); - escaped = escape_html(ils->content.linkable.url, true); - escaped2 = escape_html(contents, false); - bdestroy(contents); - bformata(html, "<img src=\"%s\" alt=\"%s\"", - escaped->data, escaped2->data); - bdestroy(escaped); - bdestroy(escaped2); - if (blength(ils->content.linkable.title) > 0) { - escaped = escape_html(ils->content.linkable.title, true); - bformata(html, " title=\"%s\"", escaped->data); - bdestroy(escaped); - } - bformata(html, " />"); - break; - case strong: - check(inlines_to_html(ils->content.inlines, &contents) == 0, - "error converting inlines to html"); - bformata(html, "<strong>%s</strong>", contents->data); - bdestroy(contents); - break; - case emph: - check(inlines_to_html(ils->content.inlines, &contents) == 0, - "error converting inlines to html"); - bformata(html, "<em>%s</em>", contents->data); - bdestroy(contents); - break; - } - ils = ils->next; - } - *result = html; - return 0; - error: - return -1; -} diff --git a/src/inlines.c b/src/inlines.c index 4ff45ad..82c7219 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -6,9 +6,7 @@ #include "stmd.h" #include "uthash.h" -#include "debug.h" #include "scanners.h" -#include "utf8.h" typedef struct Subject { const gh_buf *buffer; @@ -119,7 +117,7 @@ inline static inl* make_linkable(int t, inl* label, chunk url, chunk title) e->tag = t; e->content.linkable.label = label; e->content.linkable.url = chunk_to_cstr(&url); - e->content.linkable.title = chunk_to_cstr(&title); + e->content.linkable.title = url.len ? chunk_to_cstr(&title) : NULL; e->next = NULL; return e; } @@ -1,99 +1,77 @@ #include <stdlib.h> #include <stdio.h> -#include "bstrlib.h" +#include <string.h> #include "stmd.h" #include "debug.h" void print_usage() { - printf("Usage: stmd [FILE*]\n"); - printf("Options: --help, -h Print usage information\n"); - printf(" --ast Print AST instead of HTML\n"); - printf(" --version Print version\n"); + printf("Usage: stmd [FILE*]\n"); + printf("Options: --help, -h Print usage information\n"); + printf(" --ast Print AST instead of HTML\n"); + printf(" --version Print version\n"); } -int main(int argc, char *argv[]) { - int i; - bool ast = false; - int g = 0; - int numfps = 0; - int files[argc]; +static void print_document(block *document, bool ast) +{ + gh_buf html = GH_BUF_INIT; + + if (ast) { + print_blocks(document, 0); + } else { + blocks_to_html(&html, document, false); + printf("%s", html.ptr); + gh_buf_free(&html); + } +} - for (i=1; i < argc; i++) { - if (strcmp(argv[i], "--version") == 0) { - printf("stmd %s", VERSION); - printf(" - CommonMark converter (c) 2014 John MacFarlane\n"); - exit(0); - } else if ((strcmp(argv[i], "--help") == 0) || - (strcmp(argv[i], "-h") == 0)) { - print_usage(); - exit(0); - } else if (strcmp(argv[i], "--ast") == 0) { - ast = true; - } else if (*argv[i] == '-') { - print_usage(); - exit(1); - } else { // treat as file argument - files[g] = i; - g++; - } - } +int main(int argc, char *argv[]) +{ + int i, numfps = 0; + bool ast = false; + int files[argc]; + block *document = NULL; - numfps = g; - bstring s = NULL; - bstring html; - g = 0; - block * cur = make_document(); - int linenum = 1; - extern int errno; - FILE * fp = NULL; + for (i = 1; i < argc; i++) { + if (strcmp(argv[i], "--version") == 0) { + printf("stmd %s", VERSION); + printf(" - CommonMark converter (c) 2014 John MacFarlane\n"); + exit(0); + } else if ((strcmp(argv[i], "--help") == 0) || + (strcmp(argv[i], "-h") == 0)) { + print_usage(); + exit(0); + } else if (strcmp(argv[i], "--ast") == 0) { + ast = true; + } else if (*argv[i] == '-') { + print_usage(); + exit(1); + } else { // treat as file argument + files[numfps++] = i; + } + } - if (numfps == 0) { - // read from stdin - while ((s = bgets((bNgetc) fgetc, stdin, '\n'))) { - check(incorporate_line(s, linenum, &cur) == 0, - "error incorporating line %d", linenum); - bdestroy(s); - linenum++; - } - } else { - // iterate over input file pointers - for (g=0; g < numfps; g++) { + if (numfps == 0) { + document = stmd_parse_file(stdin); + print_document(document, ast); + free_blocks(document); + } else { + for (i = 0; i < numfps; i++) { + FILE *fp = fopen(argv[files[i]], "r"); - fp = fopen(argv[files[g]], "r"); - if (fp == NULL) { - fprintf(stderr, "Error opening file %s: %s\n", - argv[files[g]], strerror(errno)); - exit(1); - } + if (fp == NULL) { + fprintf(stderr, "Error opening file %s: %s\n", + argv[files[i]], strerror(errno)); + exit(1); + } - while ((s = bgets((bNgetc) fgetc, fp, '\n'))) { - check(incorporate_line(s, linenum, &cur) == 0, - "error incorporating line %d", linenum); - bdestroy(s); - linenum++; - } - fclose(fp); - } - } + document = stmd_parse_file(fp); + print_document(document, ast); + free_blocks(document); + fclose(fp); + } + } - while (cur != cur->top) { - finalize(cur, linenum); - cur = cur->parent; - } - check(cur == cur->top, "problems finalizing open containers"); - finalize(cur, linenum); - process_inlines(cur, cur->attributes.refmap); - if (ast) { - print_blocks(cur, 0); - } else { - check(blocks_to_html(cur, &html, false) == 0, "could not format as HTML"); - // printf("%s", html->data); - bdestroy(html); - } - free_blocks(cur); - return 0; -error: - return -1; + return 0; } diff --git a/src/print.c b/src/print.c index a924870..3ebde16 100644 --- a/src/print.c +++ b/src/print.c @@ -1,168 +1,175 @@ #include <stdlib.h> #include <stdio.h> -#include "bstrlib.h" +#include <string.h> #include "stmd.h" #include "debug.h" -static bstring format_str(bstring s) +static void print_str(const unsigned char *s, int len) { - int pos = 0; - int len = blength(s); - bstring result = bfromcstr(""); - char c; - bformata(result, "\""); - while (pos < len) { - c = bchar(s, pos); - switch (c) { - case '\n': - bformata(result, "\\n"); - break; - case '"': - bformata(result, "\\\""); - break; - case '\\': - bformata(result, "\\\\"); - break; - default: - bformata(result, "%c", c); - } - pos++; - } - bformata(result, "\""); - return result; + int i; + + if (len < 0) + len = strlen(s); + + putchar('"'); + for (i = 0; i < len; ++i) { + unsigned char c = s[i]; + + switch (c) { + case '\n': + printf("\\n"); + break; + case '"': + printf("\\\""); + break; + case '\\': + printf("\\\\"); + break; + default: + putchar((int)c); + } + } + putchar('"'); } // Functions to pretty-print inline and block lists, for debugging. // Prettyprint an inline list, for debugging. extern void print_blocks(block* b, int indent) { - struct ListData * data; - while(b != NULL) { - // printf("%3d %3d %3d| ", b->start_line, b->start_column, b->end_line); - for (int i=0; i < indent; i++) { - putchar(' '); - } - switch(b->tag) { - case document: - printf("document\n"); - print_blocks(b->children, indent + 2); - break; - case block_quote: - printf("block_quote\n"); - print_blocks(b->children, indent + 2); - break; - case list_item: - data = &(b->attributes.list_data); - printf("list_item\n"); - print_blocks(b->children, indent + 2); - break; - case list: - data = &(b->attributes.list_data); - if (data->list_type == ordered) { - printf("list (type=ordered tight=%s start=%d delim=%s)\n", - (data->tight ? "true" : "false"), - data->start, - (data->delimiter == parens ? "parens" : "period")); - } else { - printf("list (type=bullet tight=%s bullet_char=%c)\n", - (data->tight ? "true" : "false"), - data->bullet_char); - } - print_blocks(b->children, indent + 2); - break; - case atx_header: - printf("atx_header (level=%d)\n", b->attributes.header_level); - print_inlines(b->inline_content, indent + 2); - break; - case setext_header: - printf("setext_header (level=%d)\n", b->attributes.header_level); - print_inlines(b->inline_content, indent + 2); - break; - case paragraph: - printf("paragraph\n"); - print_inlines(b->inline_content, indent + 2); - break; - case hrule: - printf("hrule\n"); - break; - case indented_code: - printf("indented_code %s\n", format_str(b->string_content)->data); - break; - case fenced_code: - printf("fenced_code length=%d info=%s %s\n", - b->attributes.fenced_code_data.fence_length, - format_str(b->attributes.fenced_code_data.info)->data, - format_str(b->string_content)->data); - break; - case html_block: - printf("html_block %s\n", format_str(b->string_content)->data); - break; - case reference_def: - printf("reference_def\n"); - break; - default: - log_warn("block type %d not implemented\n", b->tag); - break; - } - b = b->next; - } + struct ListData *data; + + while(b != NULL) { + // printf("%3d %3d %3d| ", b->start_line, b->start_column, b->end_line); + for (int i=0; i < indent; i++) { + putchar(' '); + } + + switch(b->tag) { + case document: + printf("document\n"); + print_blocks(b->children, indent + 2); + break; + case block_quote: + printf("block_quote\n"); + print_blocks(b->children, indent + 2); + break; + case list_item: + data = &(b->attributes.list_data); + printf("list_item\n"); + print_blocks(b->children, indent + 2); + break; + case list: + data = &(b->attributes.list_data); + if (data->list_type == ordered) { + printf("list (type=ordered tight=%s start=%d delim=%s)\n", + (data->tight ? "true" : "false"), + data->start, + (data->delimiter == parens ? "parens" : "period")); + } else { + printf("list (type=bullet tight=%s bullet_char=%c)\n", + (data->tight ? "true" : "false"), + data->bullet_char); + } + print_blocks(b->children, indent + 2); + break; + case atx_header: + printf("atx_header (level=%d)\n", b->attributes.header_level); + print_inlines(b->inline_content, indent + 2); + break; + case setext_header: + printf("setext_header (level=%d)\n", b->attributes.header_level); + print_inlines(b->inline_content, indent + 2); + break; + case paragraph: + printf("paragraph\n"); + print_inlines(b->inline_content, indent + 2); + break; + case hrule: + printf("hrule\n"); + break; + case indented_code: + printf("indented_code "); + print_str(b->string_content.ptr, -1); + putchar('\n'); + break; + case fenced_code: + printf("fenced_code length=%d info=", + b->attributes.fenced_code_data.fence_length); + print_str(b->attributes.fenced_code_data.info.ptr, -1); + putchar(' '); + print_str(b->string_content.ptr, -1); + putchar('\n'); + break; + case html_block: + printf("html_block "); + print_str(b->string_content.ptr, -1); + putchar('\n'); + break; + case reference_def: + printf("reference_def\n"); + break; + default: + printf("# NOT IMPLEMENTED (%d)\n", b->tag); + break; + } + b = b->next; + } } // Prettyprint an inline list, for debugging. extern void print_inlines(inl* ils, int indent) { - while(ils != NULL) { - /* - // we add 11 extra spaces for the line/column info - for (int i=0; i < 11; i++) { - putchar(' '); - } - putchar('|'); - putchar(' '); - */ - for (int i=0; i < indent; i++) { - putchar(' '); - } - switch(ils->tag) { - case str: - printf("str %s\n", format_str(ils->content.literal)->data); - break; - case linebreak: - printf("linebreak\n"); - break; - case softbreak: - printf("softbreak\n"); - break; - case code: - printf("code %s\n", format_str(ils->content.literal)->data); - break; - case raw_html: - printf("html %s\n", format_str(ils->content.literal)->data); - break; - case entity: - printf("entity %s\n", format_str(ils->content.literal)->data); - break; - case link: - printf("link url=%s title=%s\n", - format_str(ils->content.linkable.url)->data, - format_str(ils->content.linkable.title)->data); - print_inlines(ils->content.linkable.label, indent + 2); - break; - case image: - printf("image url=%s title=%s\n", - format_str(ils->content.linkable.url)->data, - format_str(ils->content.linkable.title)->data); - print_inlines(ils->content.linkable.label, indent + 2); - break; - case strong: - printf("strong\n"); - print_inlines(ils->content.linkable.label, indent + 2); - break; - case emph: - printf("emph\n"); - print_inlines(ils->content.linkable.label, indent + 2); - break; - } - ils = ils->next; - } + while(ils != NULL) { + for (int i=0; i < indent; i++) { + putchar(' '); + } + switch(ils->tag) { + case str: + printf("str "); + print_str(ils->content.literal.data, ils->content.literal.len); + putchar('\n'); + break; + case linebreak: + printf("linebreak\n"); + break; + case softbreak: + printf("softbreak\n"); + break; + case code: + printf("code "); + print_str(ils->content.literal.data, ils->content.literal.len); + putchar('\n'); + break; + case raw_html: + printf("html "); + print_str(ils->content.literal.data, ils->content.literal.len); + putchar('\n'); + break; + case entity: + printf("entity "); + print_str(ils->content.literal.data, ils->content.literal.len); + putchar('\n'); + break; + case link: + case image: + printf("%s url=", ils->tag == link ? "link" : "image"); + print_str(ils->content.linkable.url, -1); + if (ils->content.linkable.title) { + printf(" title="); + print_str(ils->content.linkable.title, -1); + } + putchar('\n'); + print_inlines(ils->content.linkable.label, indent + 2); + break; + case strong: + printf("strong\n"); + print_inlines(ils->content.linkable.label, indent + 2); + break; + case emph: + printf("emph\n"); + print_inlines(ils->content.linkable.label, indent + 2); + break; + } + ils = ils->next; + } } @@ -105,19 +105,14 @@ extern block* add_child(block* parent, int block_type, int start_line, int start_column); void free_blocks(block* e); -block *stmd_parse_document(const char *buffer, size_t len); - -// FOR NOW: -void process_inlines(block* cur, reference** refmap); -void incorporate_line(gh_buf *ln, int line_number, block** curptr); -void finalize(block* b, int line_number); +extern block *stmd_parse_document(const unsigned char *buffer, size_t len); +extern block *stmd_parse_file(FILE *f); void print_inlines(inl* ils, int indent); void print_blocks(block* blk, int indent); -/* TODO */ -// int blocks_to_html(block* b, bstring* result, bool tight); -// int inlines_to_html(inl* b, bstring* result); +void blocks_to_html(gh_buf *html, block *b, bool tight); +void inlines_to_html(gh_buf *html, inl *b); void utf8proc_case_fold(gh_buf *dest, const unsigned char *str, int len); @@ -1,6 +1,8 @@ #include <stdlib.h> -#include "bstrlib.h" -#include "debug.h" +#include <stdint.h> +#include <unistd.h> + +#include "stmd.h" static const int8_t utf8proc_utf8class[256] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, diff --git a/src/utf8.h b/src/utf8.h deleted file mode 100644 index fe59a90..0000000 --- a/src/utf8.h +++ /dev/null @@ -1,6 +0,0 @@ -#include <stdlib.h> -#include "bstrlib.h" - -extern unsigned char * from_utf8(unsigned char * s, unsigned int *n); -extern int to_utf8(unsigned int c, bstring dest); -extern bstring case_fold(bstring source); |