@@ -15,27 +15,29 @@ struct l2_io_writer { | |||
}; | |||
struct l2_bufio_reader { | |||
size_t bufsiz; | |||
struct l2_io_reader *r; | |||
size_t len; | |||
size_t idx; | |||
char buf[]; | |||
char buf[L2_IO_BUFSIZ]; | |||
}; | |||
void l2_bufio_shift(struct l2_bufio_reader *b, struct l2_io_reader *r); | |||
int l2_bufio_shift_peek(struct l2_bufio_reader *b, struct l2_io_reader *r, size_t count); | |||
int l2_bufio_shift_get(struct l2_bufio_reader *b, struct l2_io_reader *r); | |||
int l2_bufio_peek(struct l2_bufio_reader *b, struct l2_io_reader *r, size_t count); | |||
int l2_bufio_get(struct l2_bufio_reader *b, struct l2_io_reader *r); | |||
void l2_bufio_reader_init(struct l2_bufio_reader *b, struct l2_io_reader *r); | |||
void l2_bufio_shift(struct l2_bufio_reader *b); | |||
int l2_bufio_shift_peek(struct l2_bufio_reader *b, size_t count); | |||
int l2_bufio_shift_get(struct l2_bufio_reader *b); | |||
static inline int l2_bufio_peek(struct l2_bufio_reader *b, size_t count); | |||
static inline int l2_bufio_get(struct l2_bufio_reader *b); | |||
struct l2_bufio_writer { | |||
size_t bufsiz; | |||
struct l2_io_writer *w; | |||
size_t idx; | |||
char buf[]; | |||
char buf[L2_IO_BUFSIZ]; | |||
}; | |||
void l2_bufio_flush(struct l2_bufio_writer *b, struct l2_io_writer *w); | |||
void l2_bufio_put(struct l2_bufio_writer *b, struct l2_io_writer *w, char ch); | |||
void l2_bufio_put_n(struct l2_bufio_writer *b, struct l2_io_writer *w, const char *ptr, size_t len); | |||
void l2_bufio_writer_init(struct l2_bufio_writer *b, struct l2_io_writer *w); | |||
void l2_bufio_flush(struct l2_bufio_writer *b); | |||
static inline void l2_bufio_put(struct l2_bufio_writer *b, char ch); | |||
static inline void l2_bufio_put_n(struct l2_bufio_writer *b, const char *ptr, size_t len); | |||
/* | |||
* Useful readers and writers | |||
@@ -43,8 +45,8 @@ void l2_bufio_put_n(struct l2_bufio_writer *b, struct l2_io_writer *w, const cha | |||
struct l2_io_mem_reader { | |||
struct l2_io_reader r; | |||
size_t len; | |||
size_t idx; | |||
size_t len; | |||
const char *mem; | |||
}; | |||
size_t l2_io_mem_read(struct l2_io_reader *self, char *buf, size_t len); | |||
@@ -72,38 +74,38 @@ void l2_io_file_write(struct l2_io_writer *self, const char *buf, size_t len); | |||
* Defined in the header to let the compiler inline | |||
*/ | |||
inline int l2_bufio_peek(struct l2_bufio_reader *b, struct l2_io_reader *r, size_t count) {\ | |||
static inline int l2_bufio_peek(struct l2_bufio_reader *b, size_t count) {\ | |||
size_t offset = count - 1; | |||
if (b->idx + offset < b->len) { | |||
return b->buf[b->idx + offset]; | |||
} else { | |||
return l2_bufio_shift_peek(b, r, count); | |||
return l2_bufio_shift_peek(b, count); | |||
} | |||
} | |||
inline int l2_bufio_get(struct l2_bufio_reader *b, struct l2_io_reader *r) { | |||
if (b->idx < b->bufsiz) { | |||
static inline int l2_bufio_get(struct l2_bufio_reader *b) { | |||
if (b->idx < b->len) { | |||
return b->buf[b->idx++]; | |||
} else { | |||
return l2_bufio_shift_get(b, r); | |||
return l2_bufio_shift_get(b); | |||
} | |||
} | |||
inline void l2_bufio_put(struct l2_bufio_writer *b, struct l2_io_writer *w, char ch) { | |||
if (b->idx >= b->bufsiz) { | |||
l2_bufio_flush(b, w); | |||
static inline void l2_bufio_put(struct l2_bufio_writer *b, char ch) { | |||
if (b->idx >= sizeof(b->buf)) { | |||
l2_bufio_flush(b); | |||
} | |||
b->buf[b->idx++] = ch; | |||
} | |||
inline void l2_bufio_put_n(struct l2_bufio_writer *b, struct l2_io_writer *w, const char *ptr, size_t len) { | |||
size_t freespace = b->bufsiz - b->idx; | |||
static inline void l2_bufio_put_n(struct l2_bufio_writer *b, const char *ptr, size_t len) { | |||
size_t freespace = sizeof(b->buf) - b->idx; | |||
if (len < freespace) { | |||
memcpy(b->buf + b->idx, ptr, len); | |||
b->idx += len; | |||
} else { | |||
l2_bufio_flush(b, w); | |||
w->write(w, ptr, len); | |||
l2_bufio_flush(b); | |||
b->w->write(b->w, ptr, len); | |||
} | |||
} | |||
@@ -0,0 +1,49 @@ | |||
#ifndef L2_PARSE_LEX_H | |||
#define L2_PARSE_LEX_H | |||
#include "../io.h" | |||
enum l2_token_kind { | |||
L2_TOK_OPEN_PAREN, | |||
L2_TOK_CLOSE_PAREN, | |||
L2_TOK_OPEN_BRACE, | |||
L2_TOK_CLOSE_BRACE, | |||
L2_TOK_OPEN_BRACKET, | |||
L2_TOK_CLOSE_BRACKET, | |||
L2_TOK_COMMA, | |||
L2_TOK_PERIOD, | |||
L2_TOK_NUMBER, | |||
L2_TOK_EOF, | |||
L2_TOK_STRING, | |||
L2_TOK_ERROR, | |||
}; | |||
struct l2_token { | |||
enum l2_token_kind kind; | |||
int line; | |||
int ch; | |||
union { | |||
char *str; | |||
double num; | |||
} v; | |||
}; | |||
void l2_token_free(struct l2_token *tok); | |||
struct l2_token l2_token_move(struct l2_token *tok); | |||
struct l2_lexer { | |||
struct l2_token currtok; | |||
struct l2_token toks[2]; | |||
int tokidx; | |||
int line; | |||
int ch; | |||
struct l2_bufio_reader reader; | |||
}; | |||
void l2_lexer_init(struct l2_lexer *lexer, struct l2_io_reader *r); | |||
struct l2_token *l2_lexer_peek(struct l2_lexer *lexer, int count); | |||
struct l2_token *l2_lexer_get(struct l2_lexer *lexer); | |||
#endif |
@@ -2,18 +2,25 @@ | |||
#include <stdlib.h> | |||
void l2_bufio_shift(struct l2_bufio_reader *b, struct l2_io_reader *r) { | |||
void l2_bufio_reader_init(struct l2_bufio_reader *b, struct l2_io_reader *r) { | |||
b->r = r; | |||
b->len = 0; | |||
b->idx = 0; | |||
} | |||
void l2_bufio_shift(struct l2_bufio_reader *b) { | |||
if (b->idx > 0) { | |||
b->len -= b->idx; | |||
memmove(b->buf, b->buf + b->idx, b->len); | |||
b->len += r->read(r, b->buf + b->len, b->bufsiz - b->len); | |||
b->idx = 0; | |||
} | |||
b->len += b->r->read(b->r, b->buf + b->len, sizeof(b->buf) - b->len); | |||
b->idx = 0; | |||
} | |||
int l2_bufio_shift_peek(struct l2_bufio_reader *b, struct l2_io_reader *r, size_t count) { | |||
int l2_bufio_shift_peek(struct l2_bufio_reader *b, size_t count) { | |||
size_t offset = count - 1; | |||
l2_bufio_shift(b, r); | |||
l2_bufio_shift(b); | |||
if (b->len <= offset) { | |||
return EOF; | |||
} | |||
@@ -21,8 +28,8 @@ int l2_bufio_shift_peek(struct l2_bufio_reader *b, struct l2_io_reader *r, size_ | |||
return b->buf[offset]; | |||
} | |||
int l2_bufio_shift_get(struct l2_bufio_reader *b, struct l2_io_reader *r) { | |||
l2_bufio_shift(b, r); | |||
int l2_bufio_shift_get(struct l2_bufio_reader *b) { | |||
l2_bufio_shift(b); | |||
if (b->len == 0) { | |||
return EOF; | |||
} | |||
@@ -30,8 +37,13 @@ int l2_bufio_shift_get(struct l2_bufio_reader *b, struct l2_io_reader *r) { | |||
return b->buf[b->idx++]; | |||
} | |||
void l2_bufio_flush(struct l2_bufio_writer *b, struct l2_io_writer *w) { | |||
w->write(w, b->buf, b->idx); | |||
void l2_bufio_writer_init(struct l2_bufio_writer *b, struct l2_io_writer *w) { | |||
b->w = w; | |||
b->idx = 0; | |||
} | |||
void l2_bufio_flush(struct l2_bufio_writer *b) { | |||
b->w->write(b->w, b->buf, b->idx); | |||
b->idx = 0; | |||
} | |||
@@ -0,0 +1,17 @@ | |||
#include "io.h" | |||
int main() { | |||
struct l2_io_mem_reader r = { l2_io_mem_read }; | |||
r.mem = "Hello World"; | |||
r.len = strlen(r.mem); | |||
struct l2_bufio_reader rb; | |||
l2_bufio_reader_init(&rb, &r.r); | |||
while (1) { | |||
int ch = l2_bufio_get(&rb); | |||
if (ch == EOF) break; | |||
printf("%c", (char)ch); | |||
} | |||
printf("\n"); | |||
} |
@@ -0,0 +1,182 @@ | |||
#include "parse/lex.h" | |||
#include <stdlib.h> | |||
void l2_token_free(struct l2_token *tok) { | |||
if (tok->kind == L2_TOK_STRING) { | |||
free(tok->v.str); | |||
} | |||
} | |||
struct l2_token l2_token_move(struct l2_token *tok) { | |||
struct l2_token dup = *tok; | |||
if (tok->kind == L2_TOK_STRING) { | |||
tok->v.str = NULL; | |||
} | |||
return dup; | |||
} | |||
void l2_lexer_init(struct l2_lexer *lexer, struct l2_io_reader *r) { | |||
lexer->currtok.kind = L2_TOK_EOF, | |||
lexer->tokidx = 0; | |||
lexer->line = 1; | |||
lexer->ch = 1; | |||
l2_bufio_reader_init(&lexer->reader, r); | |||
} | |||
static int read_ch(struct l2_lexer *lexer) { | |||
int ch = l2_bufio_get(&lexer->reader); | |||
lexer->ch += 1; | |||
if (ch == '\n') { | |||
lexer->ch = 1; | |||
lexer->line += 1; | |||
} | |||
return ch; | |||
} | |||
static int is_whitespace(int ch) { | |||
return ch == ' ' || ch == '\r' || ch == '\n' || ch == '\t'; | |||
} | |||
static void skip_whitespace(struct l2_lexer *lexer) { | |||
while (is_whitespace(l2_bufio_peek(&lexer->reader, 1))) read_ch(lexer); | |||
} | |||
static void read_string(struct l2_lexer *lexer, struct l2_token *tok) { | |||
tok->kind = L2_TOK_STRING; | |||
tok->v.str = malloc(16); | |||
if (tok->v.str == NULL) { | |||
tok->kind = L2_TOK_ERROR; | |||
tok->v.str = "Allocaton failure"; | |||
return; | |||
} | |||
size_t size = 16; | |||
size_t idx = 0; | |||
while (1) { | |||
int ch = read_ch(lexer); | |||
if (ch == '"') { | |||
return; | |||
} else if (ch == EOF) { | |||
tok->kind = L2_TOK_EOF; | |||
free(tok->v.str); | |||
tok->v.str = "Unexpected EOF"; | |||
return; | |||
} else if (ch == '\\') { | |||
int ch2 = read_ch(lexer); | |||
switch (ch2) { | |||
case 'n': | |||
ch = '\n'; | |||
break; | |||
case 'r': | |||
ch = '\r'; | |||
break; | |||
case 't': | |||
ch = '\t'; | |||
break; | |||
case EOF: | |||
tok->kind = L2_TOK_EOF; | |||
free(tok->v.str); | |||
tok->v.str = "Unexpected EOF"; | |||
return; | |||
default: | |||
ch = ch2; | |||
break; | |||
} | |||
} | |||
tok->v.str[idx++] = (char)ch; | |||
if (idx >= size) { | |||
size *= 2; | |||
char *newbuf = realloc(tok->v.str, size); | |||
if (newbuf == NULL) { | |||
free(tok->v.str); | |||
tok->kind = L2_TOK_ERROR; | |||
tok->v.str = "Allocation failure"; | |||
return; | |||
} | |||
tok->v.str = newbuf; | |||
} | |||
} | |||
} | |||
static void read_tok(struct l2_lexer *lexer, struct l2_token *tok) { | |||
skip_whitespace(lexer); | |||
tok->line = lexer->line; | |||
tok->ch = lexer->ch; | |||
int ch = read_ch(lexer); | |||
switch (ch) { | |||
case '(': | |||
tok->kind = L2_TOK_OPEN_PAREN; | |||
break; | |||
case ')': | |||
tok->kind = L2_TOK_CLOSE_PAREN; | |||
break; | |||
case '{': | |||
tok->kind = L2_TOK_OPEN_BRACE; | |||
break; | |||
case '}': | |||
tok->kind = L2_TOK_CLOSE_BRACE; | |||
break; | |||
case '[': | |||
tok->kind = L2_TOK_OPEN_BRACKET; | |||
break; | |||
case ']': | |||
tok->kind = L2_TOK_CLOSE_BRACKET; | |||
break; | |||
case ',': | |||
tok->kind = L2_TOK_COMMA; | |||
break; | |||
case '.': | |||
tok->kind = L2_TOK_PERIOD; | |||
break; | |||
case EOF: | |||
tok->kind = L2_TOK_EOF; | |||
break; | |||
case '"': | |||
read_string(lexer, tok); | |||
break; | |||
} | |||
} | |||
struct l2_token *l2_lexer_peek(struct l2_lexer *lexer, int count) { | |||
int offset = count - 1; | |||
while (offset >= lexer->tokidx) { | |||
read_tok(lexer, &lexer->toks[lexer->tokidx++]); | |||
} | |||
return &lexer->toks[offset]; | |||
} | |||
struct l2_token *l2_lexer_get(struct l2_lexer *lexer) { | |||
l2_token_free(&lexer->currtok); | |||
if (lexer->tokidx == 0) { | |||
read_tok(lexer, &lexer->currtok); | |||
} else { | |||
memmove(lexer->toks, lexer->toks + 1, lexer->tokidx - 1); | |||
lexer->tokidx -= 1; | |||
} | |||
return &lexer->currtok; | |||
} |