1
0
Fork 0
mirror of https://git.sr.ht/~sircmpwn/gmni synced 2024-06-09 00:06:33 +02:00

Initial pass on text/gemini parser

This commit is contained in:
Drew DeVault 2020-09-20 17:47:14 -04:00
parent 33495e8dd8
commit 48d0feed6d
4 changed files with 208 additions and 1 deletions

1
configure vendored
View File

@ -7,6 +7,7 @@ gmni() {
src/client.c \
src/escape.c \
src/gmni.c \
src/parser.c \
src/url.c
}

View File

@ -103,4 +103,64 @@ char *gemini_input_url(const char *url, const char *input);
// of the given Gemini status code.
enum gemini_status_class gemini_response_class(enum gemini_status status);
enum gemini_tok {
GEMINI_TEXT,
GEMINI_LINK,
GEMINI_PREFORMATTED,
GEMINI_HEADING,
GEMINI_LIST_ITEM,
GEMINI_QUOTE,
};
struct gemini_token {
enum gemini_tok token;
// The token field determines which of the union members is valid.
union {
char *text;
struct {
char *text;
char *url; // May be NULL
} link;
struct {
char *text;
char *alt_text; // May be NULL
} preformatted;
struct {
char *title;
int level; // 1, 2, or 3
} heading;
char *list_item;
char *quote_text;
};
};
struct gemini_parser {
BIO *f;
char *buf;
size_t bufsz;
size_t bufln;
};
// Initializes a text/gemini parser which reads from the specified BIO.
void gemini_parser_init(struct gemini_parser *p, BIO *f);
// Finishes this text/gemini parser and frees up its resources.
void gemini_parser_finish(struct gemini_parser *p);
// Reads the next token from a text/gemini file.
//
// Returns 0 on success, 1 on EOF, and -1 on failure.
//
// Caller must call gemini_token_finish before exiting or re-using the token
// parameter.
int gemini_parser_next(struct gemini_parser *p, struct gemini_token *token);
// Must be called after gemini_next to free up resources for the next token.
void gemini_token_finish(struct gemini_token *token);
#endif

View File

@ -233,9 +233,11 @@ main(int argc, char *argv[])
printf("%d %s\n", resp.status, resp.meta);
/* fallthrough */
case OMIT_HEADERS:
if (resp.status / 10 != 2) {
if (gemini_response_class(resp.status) !=
GEMINI_STATUS_CLASS_SUCCESS) {
break;
}
char last;
char buf[BUFSIZ];
for (int n = 1; n > 0;) {

144
src/parser.c Normal file
View File

@ -0,0 +1,144 @@
#include <assert.h>
#include <ctype.h>
#include <openssl/bio.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include "gmni.h"
void
gemini_parser_init(struct gemini_parser *p, BIO *f)
{
p->f = f;
p->bufln = 0;
p->bufsz = BUFSIZ;
p->buf = malloc(p->bufsz + 1);
p->buf[0] = 0;
BIO_up_ref(p->f);
}
void
gemini_parser_finish(struct gemini_parser *p)
{
if (!p) {
return;
}
BIO_free(p->f);
free(p->buf);
}
int
gemini_parser_next(struct gemini_parser *p, struct gemini_token *tok)
{
memset(tok, 0, sizeof(*tok));
int eof = 0;
while (!strstr(p->buf, "\n")) {
if (p->bufln == p->bufsz) {
p->bufsz *= 2;
char *buf = realloc(p->buf, p->bufsz);
assert(buf);
p->buf = buf;
}
int n = BIO_read(p->f, &p->buf[p->bufln], p->bufsz - p->bufln);
if (n == -1) {
return -1;
} else if (n == 0) {
eof = 1;
break;
}
p->bufln += n;
p->buf[p->bufln] = 0;
}
// TODO: Collapse multi-line text for the user-agent to wrap
char *end;
if ((end = strstr(p->buf, "\n")) != NULL) {
*end = 0;
}
// TODO: Provide whitespace trimming helper function
if (strncmp(p->buf, "=>", 2) == 0) {
tok->token = GEMINI_LINK;
int i = 2;
while (p->buf[i] && isspace(p->buf[i])) ++i;
tok->link.url = &p->buf[i];
for (; p->buf[i]; ++i) {
if (isspace(p->buf[i])) {
p->buf[i++] = 0;
while (isspace(p->buf[i])) ++i;
if (p->buf[i]) {
tok->link.text = strdup(&p->buf[i]);
}
break;
}
}
tok->link.url = strdup(tok->link.url);
} else if (strncmp(p->buf, "```", 3) == 0) {
tok->token = GEMINI_PREFORMATTED; // TODO
tok->preformatted.text = strdup("<text>");
tok->preformatted.alt_text = strdup("<alt-text>");
} else if (p->buf[0] == '#') {
tok->token = GEMINI_HEADING;
int level = 1;
while (p->buf[level] == '#' && level < 3) {
++level;
}
tok->heading.level = level;
tok->heading.title = strdup(&p->buf[level]);
} else if (p->buf[0] == '*') {
tok->token = GEMINI_LIST_ITEM;
tok->list_item = strdup(&p->buf[1]);
} else if (p->buf[0] == '>') {
tok->token = GEMINI_QUOTE;
tok->quote_text = strdup(&p->buf[1]);
} else {
tok->token = GEMINI_TEXT;
tok->text = strdup(p->buf);
}
if (end && end + 1 < p->buf + p->bufln) {
size_t len = end - p->buf + 1;
memmove(p->buf, end + 1, p->bufln - len);
p->bufln -= len;
} else {
p->buf[0] = 0;
p->bufln = 0;
}
return eof;
}
void
gemini_token_finish(struct gemini_token *tok)
{
if (!tok) {
return;
}
switch (tok->token) {
case GEMINI_TEXT:
free(tok->text);
break;
case GEMINI_LINK:
free(tok->link.text);
free(tok->link.url);
break;
case GEMINI_PREFORMATTED:
free(tok->preformatted.text);
free(tok->preformatted.alt_text);
break;
case GEMINI_HEADING:
free(tok->heading.title);
break;
case GEMINI_LIST_ITEM:
free(tok->list_item);
break;
case GEMINI_QUOTE:
free(tok->quote_text);
break;
}
}