|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211 |
- #include "parse/lex.h"
-
- #include <stdlib.h>
-
- const char *l2_token_kind_name(enum l2_token_kind kind) {
- switch (kind) {
- case L2_TOK_OPEN_PAREN:
- return "open-paren";
- case L2_TOK_CLOSE_PAREN:
- return "close-paren";
- case L2_TOK_OPEN_BRACE:
- return "open-brace";
- case L2_TOK_CLOSE_BRACE:
- return "close-brace";
- case L2_TOK_OPEN_BRACKET:
- return "open-bracket";
- case L2_TOK_CLOSE_BRACKET:
- return "close-bracket";
- case L2_TOK_COMMA:
- return "comma";
- case L2_TOK_PERIOD:
- return "period";
- case L2_TOK_EOF:
- return "end-of-file";
- case L2_TOK_NUMBER:
- return "number";
- case L2_TOK_STRING:
- return "string";
- case L2_TOK_ERROR:
- return "error";
- }
- }
-
- void l2_token_free(struct l2_token *tok) {
- if (tok->kind == L2_TOK_STRING) {
- free(tok->v.str);
- }
- }
-
- struct l2_token l2_token_move(struct l2_token *tok) {
- struct l2_token dup = *tok;
- if (tok->kind == L2_TOK_STRING) {
- tok->v.str = NULL;
- }
-
- return dup;
- }
-
- void l2_lexer_init(struct l2_lexer *lexer, struct l2_io_reader *r) {
- lexer->currtok.kind = L2_TOK_EOF,
- lexer->tokidx = 0;
- lexer->line = 1;
- lexer->ch = 1;
- l2_bufio_reader_init(&lexer->reader, r);
- }
-
- static int read_ch(struct l2_lexer *lexer) {
- int ch = l2_bufio_get(&lexer->reader);
- lexer->ch += 1;
- if (ch == '\n') {
- lexer->ch = 1;
- lexer->line += 1;
- }
-
- return ch;
- }
-
- static int is_whitespace(int ch) {
- return ch == ' ' || ch == '\r' || ch == '\n' || ch == '\t';
- }
-
- static void skip_whitespace(struct l2_lexer *lexer) {
- while (is_whitespace(l2_bufio_peek(&lexer->reader, 1))) read_ch(lexer);
- }
-
- static void read_string(struct l2_lexer *lexer, struct l2_token *tok) {
- tok->kind = L2_TOK_STRING;
- tok->v.str = malloc(16);
- if (tok->v.str == NULL) {
- tok->kind = L2_TOK_ERROR;
- tok->v.str = "Allocaton failure";
- return;
- }
-
- size_t size = 16;
- size_t idx = 0;
-
- while (1) {
- int ch = read_ch(lexer);
- if (ch == '"') {
- return;
- } else if (ch == EOF) {
- tok->kind = L2_TOK_EOF;
- free(tok->v.str);
- tok->v.str = "Unexpected EOF";
- return;
- } else if (ch == '\\') {
- int ch2 = read_ch(lexer);
- switch (ch2) {
- case 'n':
- ch = '\n';
- break;
-
- case 'r':
- ch = '\r';
- break;
-
- case 't':
- ch = '\t';
- break;
-
- case EOF:
- tok->kind = L2_TOK_EOF;
- free(tok->v.str);
- tok->v.str = "Unexpected EOF";
- return;
-
- default:
- ch = ch2;
- break;
- }
- }
-
- tok->v.str[idx++] = (char)ch;
- if (idx >= size) {
- size *= 2;
- char *newbuf = realloc(tok->v.str, size);
- if (newbuf == NULL) {
- free(tok->v.str);
- tok->kind = L2_TOK_ERROR;
- tok->v.str = "Allocation failure";
- return;
- }
-
- tok->v.str = newbuf;
- }
- }
- }
-
- static void read_tok(struct l2_lexer *lexer, struct l2_token *tok) {
- skip_whitespace(lexer);
-
- tok->line = lexer->line;
- tok->ch = lexer->ch;
-
- int ch = read_ch(lexer);
- switch (ch) {
- case '(':
- tok->kind = L2_TOK_OPEN_PAREN;
- break;
-
- case ')':
- tok->kind = L2_TOK_CLOSE_PAREN;
- break;
-
- case '{':
- tok->kind = L2_TOK_OPEN_BRACE;
- break;
-
- case '}':
- tok->kind = L2_TOK_CLOSE_BRACE;
- break;
-
- case '[':
- tok->kind = L2_TOK_OPEN_BRACKET;
- break;
-
- case ']':
- tok->kind = L2_TOK_CLOSE_BRACKET;
- break;
-
- case ',':
- tok->kind = L2_TOK_COMMA;
- break;
-
- case '.':
- tok->kind = L2_TOK_PERIOD;
- break;
-
- case EOF:
- tok->kind = L2_TOK_EOF;
- break;
-
- case '"':
- read_string(lexer, tok);
- break;
- }
- }
-
- struct l2_token *l2_lexer_peek(struct l2_lexer *lexer, int count) {
- int offset = count - 1;
-
- while (offset >= lexer->tokidx) {
- read_tok(lexer, &lexer->toks[lexer->tokidx++]);
- }
-
- return &lexer->toks[offset];
- }
-
- struct l2_token *l2_lexer_get(struct l2_lexer *lexer) {
- l2_token_free(&lexer->currtok);
-
- if (lexer->tokidx == 0) {
- read_tok(lexer, &lexer->currtok);
- } else {
- memmove(lexer->toks, lexer->toks + 1, lexer->tokidx - 1);
- lexer->tokidx -= 1;
- }
-
- return &lexer->currtok;
- }
|