Browse Source

short string optimization for tokens

master
Martin Dørum 3 years ago
parent
commit
ded80df7a8
9 changed files with 339 additions and 158 deletions
  1. 7
    0
      include/lang2/gen/gen.h
  2. 27
    7
      include/lang2/parse/lex.h
  3. 49
    0
      lib/gen/gen.c
  4. 1
    0
      lib/io.c
  5. 1
    1
      lib/parse/error.c
  6. 103
    75
      lib/parse/lex.c
  7. 112
    54
      lib/parse/parse.c
  8. 1
    0
      test/src/eval.t.c
  9. 38
    21
      test/src/lex.t.c

+ 7
- 0
include/lang2/gen/gen.h View File

@@ -30,19 +30,26 @@ void l2_gen_ret(struct l2_generator *gen);
void l2_gen_none(struct l2_generator *gen);
void l2_gen_number(struct l2_generator *gen, double num);
void l2_gen_string(struct l2_generator *gen, char **str);
void l2_gen_string_copy(struct l2_generator *gen, char *str);
void l2_gen_atom(struct l2_generator *gen, char **ident);
void l2_gen_atom_copy(struct l2_generator *gen, char *ident);
void l2_gen_function(struct l2_generator *gen, l2_word pos);
void l2_gen_array(struct l2_generator *gen, l2_word count);
void l2_gen_namespace(struct l2_generator *gen);
void l2_gen_namespace_set(struct l2_generator *gen, char **ident);
void l2_gen_namespace_set_copy(struct l2_generator *gen, char *ident);
void l2_gen_namespace_lookup(struct l2_generator *gen, char **ident);
void l2_gen_namespace_lookup_copy(struct l2_generator *gen, char *ident);
void l2_gen_array_lookup(struct l2_generator *gen, int number);
void l2_gen_array_set(struct l2_generator *gen, int number);
void l2_gen_dynamic_lookup(struct l2_generator *gen);
void l2_gen_dynamic_set(struct l2_generator *gen);
void l2_gen_stack_frame_lookup(struct l2_generator *gen, char **ident);
void l2_gen_stack_frame_lookup_copy(struct l2_generator *gen, char *ident);
void l2_gen_stack_frame_set(struct l2_generator *gen, char **ident);
void l2_gen_stack_frame_set_copy(struct l2_generator *gen, char *ident);
void l2_gen_stack_frame_replace(struct l2_generator *gen, char **ident);
void l2_gen_stack_frame_replace_copy(struct l2_generator *gen, char *ident);
void l2_gen_func_call(struct l2_generator *gen, l2_word argc);

#endif

+ 27
- 7
include/lang2/parse/lex.h View File

@@ -25,22 +25,42 @@ enum l2_token_kind {
L2_TOK_ERROR,
};

enum l2_token_flags {
L2_TOK_SMALL = 1 << 7,
};

const char *l2_token_kind_name(enum l2_token_kind kind);

struct l2_token_value {
union {
struct {
unsigned char flags;
union {
char *str;
double num;
int integer;
};
};

struct {
unsigned char padding;
char strbuf[15];
};
};
};

struct l2_token {
enum l2_token_kind kind;
int line;
int ch;

union {
char *str;
double num;
int integer;
} v;
struct l2_token_value v;
};

#define l2_token_get_kind(tok) ((enum l2_token_kind)((tok)->v.flags & ~(1 << 7)))
#define l2_token_get_name(tok) (l2_token_kind_name(l2_token_get_kind(tok)))
#define l2_token_is_small(tok) ((tok)->v.flags & (1 << 7))
void l2_token_free(struct l2_token *tok);
char *l2_token_extract_str(struct l2_token *tok);
struct l2_token_value l2_token_extract_val(struct l2_token *tok);
void l2_token_print(struct l2_token *tok, struct l2_io_writer *w);

struct l2_lexer {

+ 49
- 0
lib/gen/gen.c View File

@@ -70,6 +70,12 @@ void l2_gen_atom(struct l2_generator *gen, char **str) {
put(gen, id);
}

void l2_gen_atom_copy(struct l2_generator *gen, char *str) {
size_t id = l2_strset_put_copy(&gen->atomset, str);
put(gen, L2_OP_ALLOC_ATOM);
put(gen, id);
}

void l2_gen_string(struct l2_generator *gen, char **str) {
size_t id = l2_strset_get(&gen->stringset, *str);
if (id == 0) {
@@ -106,6 +112,19 @@ void l2_gen_string(struct l2_generator *gen, char **str) {
}
}

void l2_gen_string_copy(struct l2_generator *gen, char *str) {
size_t id = l2_strset_get(&gen->stringset, str);
if (id == 0) {
char *s = strdup(str);
l2_gen_string(gen, &s);
} else {
struct l2_generator_string *s = &gen->strings[id - 1];
put(gen, L2_OP_ALLOC_BUFFER_STATIC);
put(gen, s->length);
put(gen, s->pos);
}
}

void l2_gen_function(struct l2_generator *gen, l2_word pos) {
put(gen, L2_OP_ALLOC_FUNCTION);
put(gen, pos);
@@ -126,12 +145,24 @@ void l2_gen_namespace_set(struct l2_generator *gen, char **ident) {
put(gen, atom_id);
}

void l2_gen_namespace_set_copy(struct l2_generator *gen, char *ident) {
size_t atom_id = l2_strset_put_copy(&gen->atomset, ident);
put(gen, L2_OP_NAMESPACE_SET);
put(gen, atom_id);
}

void l2_gen_namespace_lookup(struct l2_generator *gen, char **ident) {
size_t atom_id = l2_strset_put(&gen->atomset, ident);
put(gen, L2_OP_NAMESPACE_LOOKUP);
put(gen, atom_id);
}

void l2_gen_namespace_lookup_copy(struct l2_generator *gen, char *ident) {
size_t atom_id = l2_strset_put_copy(&gen->atomset, ident);
put(gen, L2_OP_NAMESPACE_LOOKUP);
put(gen, atom_id);
}

void l2_gen_array_lookup(struct l2_generator *gen, int number) {
put(gen, L2_OP_ARRAY_LOOKUP);
put(gen, number);
@@ -156,18 +187,36 @@ void l2_gen_stack_frame_lookup(struct l2_generator *gen, char **ident) {
put(gen, atom_id);
}

void l2_gen_stack_frame_lookup_copy(struct l2_generator *gen, char *ident) {
size_t atom_id = l2_strset_put_copy(&gen->atomset, ident);
put(gen, L2_OP_STACK_FRAME_LOOKUP);
put(gen, atom_id);
}

void l2_gen_stack_frame_set(struct l2_generator *gen, char **ident) {
size_t atom_id = l2_strset_put(&gen->atomset, ident);
put(gen, L2_OP_STACK_FRAME_SET);
put(gen, atom_id);
}

void l2_gen_stack_frame_set_copy(struct l2_generator *gen, char *ident) {
size_t atom_id = l2_strset_put_copy(&gen->atomset, ident);
put(gen, L2_OP_STACK_FRAME_SET);
put(gen, atom_id);
}

void l2_gen_stack_frame_replace(struct l2_generator *gen, char **ident) {
size_t atom_id = l2_strset_put(&gen->atomset, ident);
put(gen, L2_OP_STACK_FRAME_REPLACE);
put(gen, atom_id);
}

void l2_gen_stack_frame_replace_copy(struct l2_generator *gen, char *ident) {
size_t atom_id = l2_strset_put_copy(&gen->atomset, ident);
put(gen, L2_OP_STACK_FRAME_REPLACE);
put(gen, atom_id);
}

void l2_gen_func_call(struct l2_generator *gen, l2_word argc) {
put(gen, L2_OP_FUNC_CALL);
put(gen, argc);

+ 1
- 0
lib/io.c View File

@@ -80,6 +80,7 @@ void l2_bufio_writer_init(struct l2_bufio_writer *b, struct l2_io_writer *w) {
}

void l2_bufio_flush(struct l2_bufio_writer *b) {
if (b->idx == 0) return;
b->w->write(b->w, b->buf, b->idx);
b->idx = 0;
}

+ 1
- 1
lib/parse/error.c View File

@@ -10,7 +10,7 @@ void l2_parse_err(struct l2_parse_error *err, struct l2_token *tok, const char *
err->is_static = 0;
err->ch = tok->ch;

if (tok->kind == L2_TOK_ERROR) {
if (l2_token_get_kind(tok) == L2_TOK_ERROR) {
l2_trace("Error token: %s", tok->v.str);
err->message = tok->v.str;
err->is_static = 1;

+ 103
- 75
lib/parse/lex.c View File

@@ -3,22 +3,22 @@
#include <stdlib.h>

static void log_token(struct l2_token *tok) {
switch (tok->kind) {
switch (l2_token_get_kind(tok)) {
case L2_TOK_STRING:
case L2_TOK_IDENT:
case L2_TOK_ERROR:
printf("%i:%i\t%s '%s'\n", tok->line, tok->ch,
l2_token_kind_name(tok->kind), tok->v.str);
l2_token_get_name(tok), tok->v.str);
break;

case L2_TOK_NUMBER:
printf("%i:%i\t%s '%g'\n", tok->line, tok->ch,
l2_token_kind_name(tok->kind), tok->v.num);
l2_token_get_name(tok), tok->v.num);
break;

default:
printf("%i:%i\t%s\n", tok->line, tok->ch,
l2_token_kind_name(tok->kind));
l2_token_get_name(tok));
break;
}
}
@@ -69,19 +69,22 @@ const char *l2_token_kind_name(enum l2_token_kind kind) {
}

void l2_token_free(struct l2_token *tok) {
if (tok->kind == L2_TOK_STRING || tok->kind == L2_TOK_IDENT) {
enum l2_token_kind kind = l2_token_get_kind(tok);
if (
(kind == L2_TOK_STRING || kind == L2_TOK_IDENT) &&
!l2_token_is_small(tok)) {
free(tok->v.str);
}
}

char *l2_token_extract_str(struct l2_token *tok) {
char *str = tok->v.str;
struct l2_token_value l2_token_extract_val(struct l2_token *tok) {
struct l2_token_value v = tok->v;
tok->v.str = NULL;
return str;
return v;
}

void l2_lexer_init(struct l2_lexer *lexer, struct l2_io_reader *r) {
lexer->toks[0].kind = L2_TOK_EOF,
lexer->toks[0].v.flags = L2_TOK_EOF,
lexer->tokidx = 0;
lexer->line = 1;
lexer->ch = 1;
@@ -232,7 +235,7 @@ static int read_integer(struct l2_lexer *lexer, long long *num, long long *base,
}

static void read_number(struct l2_lexer *lexer, struct l2_token *tok) {
tok->kind = L2_TOK_NUMBER;
tok->v.flags = L2_TOK_NUMBER;

float sign = 1;
if (peek_ch(lexer) == '-') {
@@ -241,7 +244,7 @@ static void read_number(struct l2_lexer *lexer, struct l2_token *tok) {
}

if (!is_numeric(peek_ch(lexer))) {
tok->kind = L2_TOK_ERROR;
tok->v.flags = L2_TOK_ERROR;
tok->v.str = "No number in number literal";
return;
}
@@ -250,7 +253,7 @@ static void read_number(struct l2_lexer *lexer, struct l2_token *tok) {
long long base;
char *err;
if (read_integer(lexer, &integral, &base, &err) < 0) {
tok->kind = L2_TOK_ERROR;
tok->v.flags = L2_TOK_ERROR;
tok->v.str = err;
return;
}
@@ -283,7 +286,7 @@ static void read_number(struct l2_lexer *lexer, struct l2_token *tok) {
}

if (digit >= base) {
tok->kind = L2_TOK_ERROR;
tok->v.flags = L2_TOK_ERROR;
tok->v.str = "Number with digits too big for the base";
return;
}
@@ -293,7 +296,7 @@ static void read_number(struct l2_lexer *lexer, struct l2_token *tok) {
}

if (fraction_len < 1) {
tok->kind = L2_TOK_ERROR;
tok->v.flags = L2_TOK_ERROR;
tok->v.str = "Trailing dot in number literal";
return;
}
@@ -310,25 +313,22 @@ static void read_number(struct l2_lexer *lexer, struct l2_token *tok) {
}

static void read_string(struct l2_lexer *lexer, struct l2_token *tok) {
tok->kind = L2_TOK_STRING;
tok->v.str = malloc(16);
if (tok->v.str == NULL) {
tok->kind = L2_TOK_ERROR;
tok->v.str = "Allocaton failure";
return;
}
tok->v.flags = L2_TOK_STRING | L2_TOK_SMALL;

size_t size = 16;
char *dest = tok->v.strbuf;
size_t size = sizeof(tok->v.strbuf);
size_t idx = 0;

while (1) {
int ch = read_ch(lexer);
if (ch == '"') {
tok->v.str[idx] = '\0';
dest[idx] = '\0';
return;
} else if (ch == EOF) {
tok->kind = L2_TOK_EOF;
free(tok->v.str);
if (!l2_token_is_small(tok)) {
free(tok->v.str);
}
tok->v.flags = L2_TOK_ERROR;
tok->v.str = "Unexpected EOF";
return;
} else if (ch == '\\') {
@@ -347,8 +347,10 @@ static void read_string(struct l2_lexer *lexer, struct l2_token *tok) {
break;

case EOF:
tok->kind = L2_TOK_EOF;
free(tok->v.str);
if (!l2_token_is_small(tok)) {
free(tok->v.str);
}
tok->v.flags = L2_TOK_ERROR;
tok->v.str = "Unexpected EOF";
return;

@@ -358,39 +360,51 @@ static void read_string(struct l2_lexer *lexer, struct l2_token *tok) {
}
}

tok->v.str[idx++] = (char)ch;
if (idx >= size) {
size *= 2;
char *newbuf = realloc(tok->v.str, size);
if (newbuf == NULL) {
free(tok->v.str);
tok->kind = L2_TOK_ERROR;
tok->v.str = "Allocation failure";
return;
dest[idx++] = (char)ch;

// The first time we run out of space, we have to switch away from
// the small-string optimization and malloc memory.
if (idx + 1 >= size) {
char *newbuf;
if (l2_token_is_small(tok)) {
tok->v.flags &= ~L2_TOK_SMALL;
size = 32;
newbuf = malloc(size);
if (newbuf == NULL) {
tok->v.flags = L2_TOK_ERROR;
tok->v.str = "Allocation failure";
return;
}
memcpy(newbuf, tok->v.strbuf, idx);
} else {
size *= 2;
newbuf = realloc(tok->v.str, size);
if (newbuf == NULL) {
free(tok->v.str);
tok->v.flags = L2_TOK_ERROR;
tok->v.str = "Allocation failure";
return;
}
}

tok->v.str = newbuf;
dest = newbuf;
}
}
}

static void read_ident(struct l2_lexer *lexer, struct l2_token *tok) {
tok->kind = L2_TOK_IDENT;
tok->v.str = malloc(16);
if (tok->v.str == NULL) {
tok->kind = L2_TOK_ERROR;
tok->v.str = "Allocaton failure";
return;
}
tok->v.flags = L2_TOK_IDENT | L2_TOK_SMALL;

size_t size = 16;
char *dest = tok->v.strbuf;
size_t size = sizeof(tok->v.strbuf);
size_t idx = 0;

while (1) {
int ch = peek_ch(lexer);

if (is_whitespace(ch)) {
tok->v.str[idx] = '\0';
dest[idx] = '\0';
return;
}

@@ -408,22 +422,39 @@ static void read_ident(struct l2_lexer *lexer, struct l2_token *tok) {
case '=':
case ';':
case EOF:
tok->v.str[idx] = '\0';
dest[idx] = '\0';
return;
}

tok->v.str[idx++] = (char)read_ch(lexer);
dest[idx++] = (char)read_ch(lexer);

// The first time we run out of space, we have to switch away from
// the small-string optimization and malloc memory.
if (idx + 1 >= size) {
size *= 2;
char *newbuf = realloc(tok->v.str, size);
if (newbuf == NULL) {
free(tok->v.str);
tok->kind = L2_TOK_ERROR;
tok->v.str = "Allocation failure";
return;
char *newbuf;
if (l2_token_is_small(tok)) {
tok->v.flags &= ~L2_TOK_SMALL;
size = 32;
newbuf = malloc(size);
if (newbuf == NULL) {
tok->v.flags = L2_TOK_ERROR;
tok->v.str = "Allocation failure";
return;
}
memcpy(newbuf, tok->v.strbuf, idx);
} else {
size *= 2;
newbuf = realloc(tok->v.str, size);
if (newbuf == NULL) {
free(tok->v.str);
tok->v.flags = L2_TOK_ERROR;
tok->v.str = "Allocation failure";
return;
}
}

tok->v.str = newbuf;
dest = newbuf;
}
}
}
@@ -434,7 +465,7 @@ static void read_tok(struct l2_lexer *lexer, struct l2_token *tok) {
int nl = skip_whitespace(lexer);

if (nl && lexer->parens == 0) {
tok->kind = L2_TOK_EOL;
tok->v.flags = L2_TOK_EOL;
return;
}

@@ -442,38 +473,38 @@ static void read_tok(struct l2_lexer *lexer, struct l2_token *tok) {
switch (ch) {
case '(':
read_ch(lexer);
tok->kind = L2_TOK_OPEN_PAREN;
tok->v.flags = L2_TOK_OPEN_PAREN;
lexer->parens += 1;
break;

case ')':
read_ch(lexer);
tok->kind = L2_TOK_CLOSE_PAREN;
tok->v.flags = L2_TOK_CLOSE_PAREN;
lexer->parens -= 1;
break;

case '{':
read_ch(lexer);
tok->kind = L2_TOK_OPEN_BRACE;
tok->v.flags = L2_TOK_OPEN_BRACE;
break;

case '}':
read_ch(lexer);
tok->kind = L2_TOK_CLOSE_BRACE;
tok->v.flags = L2_TOK_CLOSE_BRACE;
break;

case '[':
read_ch(lexer);
tok->kind = L2_TOK_OPEN_BRACKET;
tok->v.flags = L2_TOK_OPEN_BRACKET;
break;

case ']':
read_ch(lexer);
tok->kind = L2_TOK_CLOSE_BRACKET;
tok->v.flags = L2_TOK_CLOSE_BRACKET;
break;

case ';':
tok->kind = L2_TOK_EOL;
tok->v.flags = L2_TOK_EOL;
do {
read_ch(lexer);
skip_whitespace(lexer);
@@ -482,28 +513,28 @@ static void read_tok(struct l2_lexer *lexer, struct l2_token *tok) {

case '\'':
read_ch(lexer);
tok->kind = L2_TOK_QUOT;
tok->v.flags = L2_TOK_QUOT;
break;

case ',':
read_ch(lexer);
tok->kind = L2_TOK_COMMA;
tok->v.flags = L2_TOK_COMMA;
break;

case '.':
read_ch(lexer);
if (is_numeric(peek_ch(lexer))) {
tok->kind = L2_TOK_DOT_NUMBER;
tok->v.flags = L2_TOK_DOT_NUMBER;
long long num, base;
char *err;
if (read_integer(lexer, &num, &base, &err) < 0) {
tok->kind = L2_TOK_ERROR;
tok->v.flags = L2_TOK_ERROR;
tok->v.str = err;
} else {
tok->v.integer = (int)num;
}
} else {
tok->kind = L2_TOK_PERIOD;
tok->v.flags = L2_TOK_PERIOD;
}
break;

@@ -513,22 +544,22 @@ static void read_tok(struct l2_lexer *lexer, struct l2_token *tok) {
switch (ch) {
case '=':
read_ch(lexer);
tok->kind = L2_TOK_COLON_EQ;
tok->v.flags = L2_TOK_COLON_EQ;
break;

default:
tok->kind = L2_TOK_COLON;
tok->v.flags = L2_TOK_COLON;
break;
}
break;

case '=':
read_ch(lexer);
tok->kind = L2_TOK_EQUALS;
tok->v.flags = L2_TOK_EQUALS;
break;

case EOF:
tok->kind = L2_TOK_EOF;
tok->v.flags = L2_TOK_EOF;
break;

case '"':
@@ -543,9 +574,6 @@ static void read_tok(struct l2_lexer *lexer, struct l2_token *tok) {
}

read_ident(lexer, tok);
if (tok->kind != L2_TOK_IDENT) {
break;
}
}
}

@@ -569,7 +597,7 @@ void l2_lexer_consume(struct l2_lexer *lexer) {
}

void l2_lexer_skip_opt(struct l2_lexer *lexer, enum l2_token_kind kind) {
if (l2_lexer_peek(lexer, 1)->kind == kind) {
if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == kind) {
l2_lexer_consume(lexer);
}
}

+ 112
- 54
lib/parse/parse.c View File

@@ -4,10 +4,11 @@
#include "gen/gen.h"

static int tok_is_end(struct l2_token *tok) {
enum l2_token_kind kind = l2_token_get_kind(tok);
return
tok->kind == L2_TOK_CLOSE_BRACE || tok->kind == L2_TOK_CLOSE_BRACKET ||
tok->kind == L2_TOK_CLOSE_PAREN || tok->kind == L2_TOK_EOF ||
tok->kind == L2_TOK_EOL;
kind == L2_TOK_CLOSE_BRACE || kind == L2_TOK_CLOSE_BRACKET ||
kind == L2_TOK_CLOSE_PAREN || kind == L2_TOK_EOF ||
kind == L2_TOK_EOL;
}

static int parse_expression(
@@ -25,43 +26,52 @@ static int parse_object_literal(

while (1) {
struct l2_token *tok = l2_lexer_peek(lexer, 1);
if (tok->kind == L2_TOK_CLOSE_BRACE) {
if (l2_token_get_kind(tok) == L2_TOK_CLOSE_BRACE) {
l2_lexer_consume(lexer); // '}'
break;
} else if (tok->kind != L2_TOK_IDENT) {
} else if (l2_token_get_kind(tok) != L2_TOK_IDENT) {
l2_parse_err(err, tok, "In object literal: Expected identifier, got %s",
l2_token_kind_name(tok->kind));
l2_token_get_name(tok));
return -1;
}

l2_trace("key: '%s'", tok->v.str);
char *key = l2_token_extract_str(tok);
struct l2_token_value key = l2_token_extract_val(tok);
l2_lexer_consume(lexer); // ident

tok = l2_lexer_peek(lexer, 1);
if (tok->kind != L2_TOK_COLON) {
if (l2_token_get_kind(tok) != L2_TOK_COLON) {
if (!(key.flags & L2_TOK_SMALL)) free(key.str);
l2_parse_err(err, tok, "In object literal: Expected ':', got %s",
l2_token_kind_name(tok->kind));
l2_token_get_name(tok));
return -1;
}

l2_lexer_consume(lexer); // ':'

if (parse_expression(lexer, gen, err) < 0) {
if (!(key.flags & L2_TOK_SMALL)) free(key.str);
return -1;
}

l2_gen_namespace_set(gen, &key);
if (key.flags & L2_TOK_SMALL) {
l2_gen_namespace_set_copy(gen, key.strbuf);
} else {
l2_gen_namespace_set_copy(gen, key.str);
}

l2_gen_discard(gen);

tok = l2_lexer_peek(lexer, 1);
if (tok->kind != L2_TOK_EOL && tok->kind != L2_TOK_CLOSE_BRACE) {
if (
l2_token_get_kind(tok) != L2_TOK_EOL &&
l2_token_get_kind(tok) != L2_TOK_CLOSE_BRACE) {
l2_parse_err(err, tok, "In object literal: Expected EOL or '}', got %s",
l2_token_kind_name(tok->kind));
l2_token_get_name(tok));
return -1;
}

if (tok->kind == L2_TOK_EOL) {
if (l2_token_get_kind(tok) == L2_TOK_EOL) {
l2_lexer_consume(lexer); // EOL
}
}
@@ -81,7 +91,7 @@ static int parse_function_literal_impl(

int first = 1;
while (1) {
if (l2_lexer_peek(lexer, 1)->kind == L2_TOK_CLOSE_BRACE) {
if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_CLOSE_BRACE) {
l2_lexer_consume(lexer); // '}'
break;
}
@@ -156,12 +166,14 @@ static int parse_object_or_function_literal(
struct l2_token *tok = l2_lexer_peek(lexer, 1);
struct l2_token *tok2 = l2_lexer_peek(lexer, 2);

if (tok->kind == L2_TOK_CLOSE_BRACE) {
if (l2_token_get_kind(tok) == L2_TOK_CLOSE_BRACE) {
l2_trace_scope("empty object literal");
l2_lexer_consume(lexer); // '}'

l2_gen_namespace(gen);
} else if (tok->kind == L2_TOK_IDENT && tok2->kind == L2_TOK_COLON) {
} else if (
l2_token_get_kind(tok) == L2_TOK_IDENT &&
l2_token_get_kind(tok2) == L2_TOK_COLON) {
if (parse_object_literal(lexer, gen, err) < 0) {
return -1;
}
@@ -182,7 +194,7 @@ static int parse_array_literal(

int count = 0;
while (1) {
if (l2_lexer_peek(lexer, 1)->kind == L2_TOK_CLOSE_BRACKET) {
if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_CLOSE_BRACKET) {
l2_lexer_consume(lexer); // ']'
break;
}
@@ -205,7 +217,7 @@ static int parse_arg_level_expression_base(
struct l2_token *tok = l2_lexer_peek(lexer, 1);
struct l2_token *tok2 = l2_lexer_peek(lexer, 2);

if (tok->kind == L2_TOK_OPEN_PAREN) {
if (l2_token_get_kind(tok) == L2_TOK_OPEN_PAREN) {
l2_trace_scope("group expr");
l2_lexer_consume(lexer); // '('

@@ -214,53 +226,67 @@ static int parse_arg_level_expression_base(
}

tok = l2_lexer_peek(lexer, 1);
if (tok->kind != L2_TOK_CLOSE_PAREN) {
if (l2_token_get_kind(tok) != L2_TOK_CLOSE_PAREN) {
l2_parse_err(err, tok, "Expected '(', got %s",
l2_token_kind_name(tok->kind));
l2_token_get_name(tok));
return -1;
}

l2_lexer_consume(lexer); // ')'
} else if (tok->kind == L2_TOK_IDENT) {
} else if (l2_token_get_kind(tok) == L2_TOK_IDENT) {
l2_trace_scope("ident");
l2_trace("ident '%s'", tok->v.str);
char *ident = l2_token_extract_str(tok);
struct l2_token_value ident = l2_token_extract_val(tok);
l2_lexer_consume(lexer); // ident

l2_gen_stack_frame_lookup(gen, &ident);
} else if (tok->kind == L2_TOK_NUMBER) {
if (ident.flags & L2_TOK_SMALL) {
l2_gen_stack_frame_lookup_copy(gen, ident.strbuf);
} else {
l2_gen_stack_frame_lookup(gen, &ident.str);
}
} else if (l2_token_get_kind(tok) == L2_TOK_NUMBER) {
l2_trace_scope("number literal");
l2_trace("number %g", tok->v.num);
double number = tok->v.num;
l2_lexer_consume(lexer); // number

l2_gen_number(gen, number);
} else if (tok->kind == L2_TOK_STRING) {
} else if (l2_token_get_kind(tok) == L2_TOK_STRING) {
l2_trace_scope("string literal");
l2_trace("string '%s'", tok->v.str);
char *str = l2_token_extract_str(tok);
struct l2_token_value str = l2_token_extract_val(tok);
l2_lexer_consume(lexer); // string

l2_gen_string(gen, &str);
} else if (tok->kind == L2_TOK_QUOT && tok2->kind == L2_TOK_IDENT) {
if (str.flags & L2_TOK_SMALL) {
l2_gen_string_copy(gen, str.strbuf);
} else {
l2_gen_string(gen, &str.str);
}
} else if (
l2_token_get_kind(tok) == L2_TOK_QUOT &&
l2_token_get_kind(tok2) == L2_TOK_IDENT) {
l2_trace_scope("atom literal");
l2_trace("atom '%s'", tok->v.str);
char *ident = l2_token_extract_str(tok2);
struct l2_token_value ident = l2_token_extract_val(tok2);
l2_lexer_consume(lexer); // "'"
l2_lexer_consume(lexer); // ident

l2_gen_atom(gen, &ident);
} else if (tok->kind == L2_TOK_OPEN_BRACE) {
if (ident.flags & L2_TOK_SMALL) {
l2_gen_atom_copy(gen, ident.strbuf);
} else {
l2_gen_atom(gen, &ident.str);
}
} else if (l2_token_get_kind(tok) == L2_TOK_OPEN_BRACE) {
if (parse_object_or_function_literal(lexer, gen, err) < 0) {
return -1;
}
} else if (tok->kind == L2_TOK_OPEN_BRACKET) {
} else if (l2_token_get_kind(tok) == L2_TOK_OPEN_BRACKET) {
if (parse_array_literal(lexer, gen, err) < 0) {
return -1;
}
} else {
l2_parse_err(err, tok, "Unexpected token %s",
l2_token_kind_name(tok->kind));
l2_token_get_name(tok));
return -1;
}

@@ -279,37 +305,53 @@ static int parse_arg_level_expression(
struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
struct l2_token *tok3 = l2_lexer_peek(lexer, 3);

if (tok->kind == L2_TOK_OPEN_PAREN && tok2->kind == L2_TOK_CLOSE_PAREN) {
if (
l2_token_get_kind(tok) == L2_TOK_OPEN_PAREN &&
l2_token_get_kind(tok2) == L2_TOK_CLOSE_PAREN) {
l2_trace_scope("niladic func call");
l2_lexer_consume(lexer); // '('
l2_lexer_consume(lexer); // ')'

l2_gen_func_call(gen, 0);
} else if (
tok->kind == L2_TOK_PERIOD && tok2->kind == L2_TOK_IDENT &&
tok3->kind == L2_TOK_EQUALS) {
l2_token_get_kind(tok) == L2_TOK_PERIOD &&
l2_token_get_kind(tok2) == L2_TOK_IDENT &&
l2_token_get_kind(tok3) == L2_TOK_EQUALS) {
l2_trace_scope("namespace assign");
l2_trace("ident '%s'", tok2->v.str);
char *ident = l2_token_extract_str(tok2);
struct l2_token_value ident = l2_token_extract_val(tok2);
l2_lexer_consume(lexer); // '.'
l2_lexer_consume(lexer); // ident
l2_lexer_consume(lexer); // '='

if (parse_expression(lexer, gen, err) < 0) {
if (!(ident.flags & L2_TOK_SMALL)) free(ident.str);
return -1;
}

l2_gen_namespace_set(gen, &ident);
if (ident.flags & L2_TOK_SMALL) {
l2_gen_namespace_set_copy(gen, ident.strbuf);
} else {
l2_gen_namespace_set(gen, &ident.str);
}
l2_gen_swap_discard(gen);
} else if (tok->kind == L2_TOK_PERIOD && tok2->kind == L2_TOK_IDENT) {
} else if (
l2_token_get_kind(tok) == L2_TOK_PERIOD &&
l2_token_get_kind(tok2) == L2_TOK_IDENT) {
l2_trace_scope("namespace lookup");
l2_trace("ident '%s'", tok2->v.str);
char *ident = l2_token_extract_str(tok2);
struct l2_token_value ident = l2_token_extract_val(tok2);
l2_lexer_consume(lexer); // '.'
l2_lexer_consume(lexer); // ident

l2_gen_namespace_lookup(gen, &ident);
} else if (tok->kind == L2_TOK_DOT_NUMBER && tok2->kind == L2_TOK_EQUALS) {
if (ident.flags & L2_TOK_SMALL) {
l2_gen_namespace_lookup_copy(gen, ident.strbuf);
} else {
l2_gen_namespace_lookup(gen, &ident.str);
}
} else if (
l2_token_get_kind(tok) == L2_TOK_DOT_NUMBER &&
l2_token_get_kind(tok2) == L2_TOK_EQUALS) {
l2_trace_scope("direct array assign");
int number = tok->v.integer;
l2_lexer_consume(lexer); // dot-number
@@ -321,13 +363,15 @@ static int parse_arg_level_expression(

l2_gen_array_set(gen, number);
l2_gen_swap_discard(gen);
} else if (tok->kind == L2_TOK_DOT_NUMBER) {
} else if (l2_token_get_kind(tok) == L2_TOK_DOT_NUMBER) {
l2_trace_scope("direct array lookup");
int number = tok->v.integer;
l2_lexer_consume(lexer); // dot-number

l2_gen_array_lookup(gen, number);
} else if (tok->kind == L2_TOK_PERIOD && tok2->kind == L2_TOK_OPEN_PAREN) {
} else if (
l2_token_get_kind(tok) == L2_TOK_PERIOD &&
l2_token_get_kind(tok2) == L2_TOK_OPEN_PAREN) {
l2_trace_scope("dynamic lookup");
l2_lexer_consume(lexer); // '.'
l2_lexer_consume(lexer); // '('
@@ -336,14 +380,14 @@ static int parse_arg_level_expression(
return -1;
}

if (l2_lexer_peek(lexer, 1)->kind != L2_TOK_CLOSE_PAREN) {
if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) != L2_TOK_CLOSE_PAREN) {
l2_parse_err(err, tok, "Expected '(', got %s",
l2_token_kind_name(tok->kind));
l2_token_get_name(tok));
return -1;
}
l2_lexer_consume(lexer); // ')'

if (l2_lexer_peek(lexer, 1)->kind == L2_TOK_EQUALS) {
if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_EQUALS) {
l2_lexer_consume(lexer); // '='
if (parse_expression(lexer, gen, err) < 0) {
return -1;
@@ -387,30 +431,44 @@ static int parse_expression(
struct l2_token *tok = l2_lexer_peek(lexer, 1);
struct l2_token *tok2 = l2_lexer_peek(lexer, 2);

if (tok->kind == L2_TOK_IDENT && tok2->kind == L2_TOK_COLON_EQ) {
if (
l2_token_get_kind(tok) == L2_TOK_IDENT &&
l2_token_get_kind(tok2) == L2_TOK_COLON_EQ) {
l2_trace_scope("assign expression");
l2_trace("ident '%s'", tok->v.str);
char *ident = l2_token_extract_str(tok);
struct l2_token_value ident = l2_token_extract_val(tok);
l2_lexer_consume(lexer); // ident
l2_lexer_consume(lexer); // :=

if (parse_expression(lexer, gen, err) < 0) {
if (!(ident.flags & L2_TOK_SMALL)) free(ident.str);
return -1;
}

l2_gen_stack_frame_set(gen, &ident);
} else if (tok->kind == L2_TOK_IDENT && tok2->kind == L2_TOK_EQUALS) {
if (ident.flags & L2_TOK_SMALL) {
l2_gen_stack_frame_set_copy(gen, ident.strbuf);
} else {
l2_gen_stack_frame_set(gen, &ident.str);
}
} else if (
l2_token_get_kind(tok) == L2_TOK_IDENT &&
l2_token_get_kind(tok2) == L2_TOK_EQUALS) {
l2_trace_scope("replacement assign expression");
l2_trace("ident '%s'", tok->v.str);
char *ident = l2_token_extract_str(tok);
struct l2_token_value ident = l2_token_extract_val(tok);
l2_lexer_consume(lexer); // ident
l2_lexer_consume(lexer); // =

if (parse_expression(lexer, gen, err) < 0) {
if (!(ident.flags & L2_TOK_SMALL)) free(ident.str);
return -1;
}

l2_gen_stack_frame_replace(gen, &ident);
if (ident.flags & L2_TOK_SMALL) {
l2_gen_stack_frame_replace_copy(gen, ident.strbuf);
} else {
l2_gen_stack_frame_replace(gen, &ident.str);
}
} else {
if (parse_arg_level_expression(lexer, gen, err) < 0) {
return -1;
@@ -431,7 +489,7 @@ int l2_parse_program(
l2_trace_scope("program");
while (1) {
l2_lexer_skip_opt(lexer, L2_TOK_EOL);
if (l2_lexer_peek(lexer, 1)->kind == L2_TOK_EOF) {
if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_EOF) {
break;
}


+ 1
- 0
test/src/eval.t.c View File

@@ -26,6 +26,7 @@ static int eval_impl(const char *str, struct l2_parse_error *err) {

w.w.write = l2_io_mem_write;
w.len = 0;
w.size = 0;
w.mem = NULL;
l2_gen_init(&gen, (struct l2_io_writer *)&w);


+ 38
- 21
test/src/lex.t.c View File

@@ -18,49 +18,66 @@ describe(lex) {
test("lex assignment") {
lex("foo := 10");

asserteq(l2_lexer_peek(&lexer, 1)->kind, L2_TOK_IDENT);
asserteq(l2_lexer_peek(&lexer, 1)->v.str, "foo");
asserteq(l2_token_get_kind(l2_lexer_peek(&lexer, 1)), L2_TOK_IDENT);
asserteq(l2_lexer_peek(&lexer, 1)->v.strbuf, "foo");
l2_lexer_consume(&lexer);

asserteq(l2_lexer_peek(&lexer, 1)->kind, L2_TOK_COLON_EQ);
asserteq(l2_token_get_kind(l2_lexer_peek(&lexer, 1)), L2_TOK_COLON_EQ);
l2_lexer_consume(&lexer);

asserteq(l2_lexer_peek(&lexer, 1)->kind, L2_TOK_NUMBER);
asserteq(l2_token_get_kind(l2_lexer_peek(&lexer, 1)), L2_TOK_NUMBER);
asserteq(l2_lexer_peek(&lexer, 1)->v.num, 10);
l2_lexer_consume(&lexer);

asserteq(l2_lexer_peek(&lexer, 1)->kind, L2_TOK_EOF);
asserteq(l2_token_get_kind(l2_lexer_peek(&lexer, 1)), L2_TOK_EOF);
}

test("lex assignment, non-sso") {
lex("foo-very-long-name := 10");

asserteq(l2_token_get_kind(l2_lexer_peek(&lexer, 1)), L2_TOK_IDENT);
asserteq(l2_lexer_peek(&lexer, 1)->v.str, "foo-very-long-name");
l2_lexer_consume(&lexer);

asserteq(l2_token_get_kind(l2_lexer_peek(&lexer, 1)), L2_TOK_COLON_EQ);
l2_lexer_consume(&lexer);

asserteq(l2_token_get_kind(l2_lexer_peek(&lexer, 1)), L2_TOK_NUMBER);
asserteq(l2_lexer_peek(&lexer, 1)->v.num, 10);
l2_lexer_consume(&lexer);

asserteq(l2_token_get_kind(l2_lexer_peek(&lexer, 1)), L2_TOK_EOF);
}

test("lex var deref assignment") {
lex("foo := 10\nbar := foo");

asserteq(l2_lexer_peek(&lexer, 1)->kind, L2_TOK_IDENT);
asserteq(l2_lexer_peek(&lexer, 1)->v.str, "foo");
asserteq(l2_token_get_kind(l2_lexer_peek(&lexer, 1)), L2_TOK_IDENT);
asserteq(l2_lexer_peek(&lexer, 1)->v.strbuf, "foo");
l2_lexer_consume(&lexer);

asserteq(l2_lexer_peek(&lexer, 1)->kind, L2_TOK_COLON_EQ);
asserteq(l2_token_get_kind(l2_lexer_peek(&lexer, 1)), L2_TOK_COLON_EQ);
l2_lexer_consume(&lexer);

asserteq(l2_lexer_peek(&lexer, 1)->kind, L2_TOK_NUMBER);
asserteq(l2_token_get_kind(l2_lexer_peek(&lexer, 1)), L2_TOK_NUMBER);
asserteq(l2_lexer_peek(&lexer, 1)->v.num, 10);
l2_lexer_consume(&lexer);

asserteq(l2_lexer_peek(&lexer, 1)->kind, L2_TOK_EOL);
asserteq(l2_token_get_kind(l2_lexer_peek(&lexer, 1)), L2_TOK_EOL);
l2_lexer_consume(&lexer);

asserteq(l2_lexer_peek(&lexer, 1)->kind, L2_TOK_IDENT);
asserteq(l2_lexer_peek(&lexer, 1)->v.str, "bar");
asserteq(l2_token_get_kind(l2_lexer_peek(&lexer, 1)), L2_TOK_IDENT);
asserteq(l2_lexer_peek(&lexer, 1)->v.strbuf, "bar");
l2_lexer_consume(&lexer);

asserteq(l2_lexer_peek(&lexer, 1)->kind, L2_TOK_COLON_EQ);
asserteq(l2_token_get_kind(l2_lexer_peek(&lexer, 1)), L2_TOK_COLON_EQ);
l2_lexer_consume(&lexer);

asserteq(l2_lexer_peek(&lexer, 1)->kind, L2_TOK_IDENT);
asserteq(l2_lexer_peek(&lexer, 1)->v.str, "foo");
asserteq(l2_token_get_kind(l2_lexer_peek(&lexer, 1)), L2_TOK_IDENT);
asserteq(l2_lexer_peek(&lexer, 1)->v.strbuf, "foo");
l2_lexer_consume(&lexer);

asserteq(l2_lexer_peek(&lexer, 1)->kind, L2_TOK_EOF);
asserteq(l2_token_get_kind(l2_lexer_peek(&lexer, 1)), L2_TOK_EOF);
}

test("lex peek multiple") {
@@ -68,17 +85,17 @@ describe(lex) {

l2_lexer_peek(&lexer, 3);

asserteq(l2_lexer_peek(&lexer, 1)->kind, L2_TOK_IDENT);
asserteq(l2_lexer_peek(&lexer, 1)->v.str, "foo");
asserteq(l2_token_get_kind(l2_lexer_peek(&lexer, 1)), L2_TOK_IDENT);
asserteq(l2_lexer_peek(&lexer, 1)->v.strbuf, "foo");
l2_lexer_consume(&lexer);

asserteq(l2_lexer_peek(&lexer, 1)->kind, L2_TOK_COLON_EQ);
asserteq(l2_token_get_kind(l2_lexer_peek(&lexer, 1)), L2_TOK_COLON_EQ);
l2_lexer_consume(&lexer);

asserteq(l2_lexer_peek(&lexer, 1)->kind, L2_TOK_NUMBER);
asserteq(l2_token_get_kind(l2_lexer_peek(&lexer, 1)), L2_TOK_NUMBER);
asserteq(l2_lexer_peek(&lexer, 1)->v.num, 10);
l2_lexer_consume(&lexer);

asserteq(l2_lexer_peek(&lexer, 1)->kind, L2_TOK_EOF);
asserteq(l2_token_get_kind(l2_lexer_peek(&lexer, 1)), L2_TOK_EOF);
}
}

Loading…
Cancel
Save