Browse Source

basic programs can now execute correctly

master
Martin Dørum 3 years ago
parent
commit
2dffcd295d
11 changed files with 199 additions and 61 deletions
  1. 7
    10
      include/lang2/bytecode.h
  2. 2
    1
      include/lang2/gen/gen.h
  3. 3
    0
      include/lang2/parse/lex.h
  4. 6
    0
      include/lang2/vm/vm.h
  5. 9
    4
      lib/gen/gen.c
  6. 27
    4
      lib/parse/lex.c
  7. 65
    18
      lib/parse/parse.c
  8. 5
    1
      lib/vm/namespace.c
  9. 2
    6
      lib/vm/print.c
  10. 70
    17
      lib/vm/vm.c
  11. 3
    0
      test/src/lex.t.c

+ 7
- 10
include/lang2/bytecode.h View File

@@ -46,11 +46,14 @@ enum l2_opcode {

/*
* Call a function.
* Pop <word>
* Pop <argc>
* Pop <func>
* Pop argc times
* Push <iptr> + 1
* Jump to <word>
* Push array with args
* Call <func>
*/
L2_OP_CALL,
L2_OP_FUNC_CALL,

/*
* Jump relative.
@@ -59,13 +62,6 @@ enum l2_opcode {
*/
L2_OP_RJMP,

/*
* Generate a stack frame.
* Alloc namespace <var>
* NSPush <var>
*/
L2_OP_GEN_STACK_FRAME,

/*
* Look up a value from the current stack frame.
* Pop <word>
@@ -85,6 +81,7 @@ enum l2_opcode {
/*
* Return from a function.
* NSPop
* Pop (discard args array)
* Pop <word>
* Jump to <word>
*/

+ 2
- 1
include/lang2/gen/gen.h View File

@@ -23,14 +23,15 @@ void l2_gen_flush(struct l2_generator *gen);
void l2_gen_free(struct l2_generator *gen);

void l2_gen_halt(struct l2_generator *gen);
void l2_gen_stack_frame(struct l2_generator *gen);
void l2_gen_rjmp(struct l2_generator *gen, l2_word len);
void l2_gen_pop(struct l2_generator *gen);
void l2_gen_push(struct l2_generator *gen, l2_word word);
void l2_gen_ret(struct l2_generator *gen);
void l2_gen_assignment(struct l2_generator *gen, char **ident);
void l2_gen_number(struct l2_generator *gen, double num);
void l2_gen_string(struct l2_generator *gen, char **str);
void l2_gen_function(struct l2_generator *gen, l2_word pos);
void l2_gen_namespace_lookup(struct l2_generator *gen, char **ident);
void l2_gen_func_call(struct l2_generator *gen);

#endif

+ 3
- 0
include/lang2/parse/lex.h View File

@@ -13,6 +13,7 @@ enum l2_token_kind {
L2_TOK_COMMA,
L2_TOK_PERIOD,
L2_TOK_COLON_EQ,
L2_TOK_EOL,
L2_TOK_EOF,
L2_TOK_NUMBER,
L2_TOK_STRING,
@@ -42,6 +43,7 @@ struct l2_lexer {
int tokidx;
int line;
int ch;
int parens;

struct l2_bufio_reader reader;
};
@@ -49,5 +51,6 @@ struct l2_lexer {
void l2_lexer_init(struct l2_lexer *lexer, struct l2_io_reader *r);
struct l2_token *l2_lexer_peek(struct l2_lexer *lexer, int count);
void l2_lexer_consume(struct l2_lexer *lexer);
void l2_lexer_skip_opt(struct l2_lexer *lexer, enum l2_token_kind kind);

#endif

+ 6
- 0
include/lang2/vm/vm.h View File

@@ -6,6 +6,10 @@
#include "../bytecode.h"
#include "../bitset.h"

struct l2_vm;
struct l2_vm_array;
typedef l2_word (*l2_vm_cfunction)(struct l2_vm *vm, struct l2_vm_array *args);

struct l2_vm_value {
enum l2_value_flags {
L2_VAL_TYPE_NONE,
@@ -15,6 +19,7 @@ struct l2_vm_value {
L2_VAL_TYPE_ARRAY,
L2_VAL_TYPE_NAMESPACE,
L2_VAL_TYPE_FUNCTION,
L2_VAL_TYPE_CFUNCTION,
L2_VAL_MARKED = 1 << 7,
L2_VAL_CONST = 1 << 8,
} flags;
@@ -25,6 +30,7 @@ struct l2_vm_value {
l2_word pos;
l2_word namespace;
} func;
l2_vm_cfunction cfunc;
void *data;
};
};

+ 9
- 4
lib/gen/gen.c View File

@@ -28,10 +28,6 @@ void l2_gen_halt(struct l2_generator *gen) {
put(gen, L2_OP_HALT);
}

void l2_gen_stack_frame(struct l2_generator *gen) {
put(gen, L2_OP_GEN_STACK_FRAME);
}

void l2_gen_rjmp(struct l2_generator *gen, l2_word len) {
put(gen, L2_OP_PUSH);
put(gen, len);
@@ -42,6 +38,11 @@ void l2_gen_pop(struct l2_generator *gen) {
put(gen, L2_OP_POP);
}

void l2_gen_push(struct l2_generator *gen, l2_word word) {
put(gen, L2_OP_PUSH);
put(gen, word);
}

void l2_gen_ret(struct l2_generator *gen) {
put(gen, L2_OP_RET);
}
@@ -111,3 +112,7 @@ void l2_gen_namespace_lookup(struct l2_generator *gen, char **ident) {
put(gen, atom_id);
put(gen, L2_OP_STACK_FRAME_LOOKUP);
}

void l2_gen_func_call(struct l2_generator *gen) {
put(gen, L2_OP_FUNC_CALL);
}

+ 27
- 4
lib/parse/lex.c View File

@@ -40,6 +40,8 @@ const char *l2_token_kind_name(enum l2_token_kind kind) {
return "period";
case L2_TOK_COLON_EQ:
return "period";
case L2_TOK_EOL:
return "end-of-line";
case L2_TOK_EOF:
return "end-of-file";
case L2_TOK_NUMBER:
@@ -70,6 +72,7 @@ void l2_lexer_init(struct l2_lexer *lexer, struct l2_io_reader *r) {
lexer->tokidx = 0;
lexer->line = 1;
lexer->ch = 1;
lexer->parens = 0;
l2_bufio_reader_init(&lexer->reader, r);
}

@@ -93,8 +96,16 @@ static int is_whitespace(int ch) {
return ch == ' ' || ch == '\r' || ch == '\n' || ch == '\t';
}

static void skip_whitespace(struct l2_lexer *lexer) {
while (is_whitespace(l2_bufio_peek(&lexer->reader, 1))) read_ch(lexer);
static int skip_whitespace(struct l2_lexer *lexer) {
int nl = 0;
while (is_whitespace(l2_bufio_peek(&lexer->reader, 1))) {
int ch = read_ch(lexer);
if (ch == '\n') {
nl = 1;
}
}

return nl;
}

static void read_string(struct l2_lexer *lexer, struct l2_token *tok) {
@@ -213,21 +224,27 @@ static void read_ident(struct l2_lexer *lexer, struct l2_token *tok) {
}

static void read_tok(struct l2_lexer *lexer, struct l2_token *tok) {
skip_whitespace(lexer);

tok->line = lexer->line;
tok->ch = lexer->ch;
int nl = skip_whitespace(lexer);

if (nl && lexer->parens == 0) {
tok->kind = L2_TOK_EOL;
return;
}

int ch = peek_ch(lexer);
switch (ch) {
case '(':
read_ch(lexer);
tok->kind = L2_TOK_OPEN_PAREN;
lexer->parens += 1;
break;

case ')':
read_ch(lexer);
tok->kind = L2_TOK_CLOSE_PAREN;
lexer->parens -= 1;
break;

case '{':
@@ -317,3 +334,9 @@ void l2_lexer_consume(struct l2_lexer *lexer) {
lexer->tokidx -= 1;
memmove(lexer->toks, lexer->toks + 1, lexer->tokidx * sizeof(*lexer->toks));
}

void l2_lexer_skip_opt(struct l2_lexer *lexer, enum l2_token_kind kind) {
if (l2_lexer_peek(lexer, 1)->kind == kind) {
l2_lexer_consume(lexer);
}
}

+ 65
- 18
lib/parse/parse.c View File

@@ -1,16 +1,21 @@
#include "parse/parse.h"

#include <stdbool.h>

#include "gen/gen.h"

static int is_end_tok(struct l2_token *tok) {
return
tok->kind == L2_TOK_CLOSE_PAREN || tok->kind == L2_TOK_CLOSE_BRACE ||
tok->kind == L2_TOK_CLOSE_BRACKET || tok->kind == L2_TOK_EOL ||
tok->kind == L2_TOK_EOF;
}

static int parse_expression(
struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err);

static int parse_function_impl(
struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
l2_gen_stack_frame(gen);
l2_lexer_consume(lexer); // {
l2_lexer_skip_opt(lexer, L2_TOK_EOL);

while (1) {
struct l2_token *tok = l2_lexer_peek(lexer, 1);
@@ -25,6 +30,8 @@ static int parse_function_impl(
if (parse_expression(lexer, gen, err) < 0) {
return -1;
}

l2_lexer_skip_opt(lexer, L2_TOK_EOL);
}

l2_gen_ret(gen);
@@ -71,22 +78,24 @@ static int parse_function(
return 0;
}

static int parse_expression(
static int parse_sub_expression(
struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
struct l2_token *tok = l2_lexer_peek(lexer, 1);
struct l2_token *tok2 = l2_lexer_peek(lexer, 2);

if (tok->kind == L2_TOK_IDENT && tok2->kind == L2_TOK_COLON_EQ) {
char *ident = l2_token_extract_str(tok);
l2_lexer_consume(lexer); // ident
l2_lexer_consume(lexer); // :=

if (tok->kind == L2_TOK_OPEN_PAREN) {
l2_lexer_consume(lexer); // (
if (parse_expression(lexer, gen, err) < 0) {
free(ident);
return -1;
}

l2_gen_assignment(gen, &ident);
tok = l2_lexer_peek(lexer, 1);
if (tok->kind != L2_TOK_CLOSE_PAREN) {
l2_parse_err(err, tok, "In paren expression: Expected close paren, got %s",
l2_token_kind_name(tok->kind));
return -1;
}

l2_lexer_consume(lexer); // )
return 0;
} else if (tok->kind == L2_TOK_NUMBER) {
l2_gen_number(gen, tok->v.num);
@@ -106,16 +115,52 @@ static int parse_expression(
return parse_function(lexer, gen, err);
}

l2_parse_err(err, tok, "In expression: Unexpected tokens %s, %s",
l2_token_kind_name(tok->kind), l2_token_kind_name(tok2->kind));
l2_parse_err(err, tok, "In expression: Unexpected token %s",
l2_token_kind_name(tok->kind));
return -1;
}

int l2_parse_program(
static int parse_expression(
struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
l2_gen_stack_frame(gen);
struct l2_token *tok = l2_lexer_peek(lexer, 1);
struct l2_token *tok2 = l2_lexer_peek(lexer, 2);

if (tok->kind == L2_TOK_IDENT && tok2->kind == L2_TOK_COLON_EQ) {
char *ident = l2_token_extract_str(tok);
l2_lexer_consume(lexer); // ident
l2_lexer_consume(lexer); // :=

if (parse_expression(lexer, gen, err) < 0) {
free(ident);
return -1;
}

l2_gen_assignment(gen, &ident);
return 0;
} else if (tok->kind == L2_TOK_IDENT && !is_end_tok(tok2)) {
char *ident = l2_token_extract_str(tok);
l2_lexer_consume(lexer);

l2_word count = 0;
while (!is_end_tok(l2_lexer_peek(lexer, 1))) {
count += 1;
if (parse_sub_expression(lexer, gen, err) < 0) {
return -1;
}
}

bool first = true;
l2_gen_push(gen, count);
l2_gen_namespace_lookup(gen, &ident);
l2_gen_func_call(gen);
return 0;
}

return parse_sub_expression(lexer, gen, err);
}

int l2_parse_program(
struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
int first = 1;

while (1) {
struct l2_token *tok = l2_lexer_peek(lexer, 1);
@@ -134,7 +179,9 @@ int l2_parse_program(
return -1;
}

first = false;
l2_lexer_skip_opt(lexer, L2_TOK_EOL);

first = 0;
}

l2_gen_halt(gen);

+ 5
- 1
lib/vm/namespace.c View File

@@ -91,7 +91,11 @@ static l2_word get(struct l2_vm_namespace *ns, l2_word key) {
l2_word hash = (key + i) & ns->mask;
l2_word k = ns->data[hash];
if (k == 0) {
return 0;
if (ns->parent == NULL) {
return 0;
} else {
return get(ns->parent->data, key);
}
} else if (k == key) {
return ns->data[ns->size + hash];
}

+ 2
- 6
lib/vm/print.c View File

@@ -137,18 +137,14 @@ void l2_vm_print_op(l2_word *ops, size_t opcount, size_t *ptr) {
printf("ADD\n");
break;

case L2_OP_CALL:
printf("CALL\n");
case L2_OP_FUNC_CALL:
printf("FUNC_CALL\n");
break;

case L2_OP_RJMP:
printf("RJMP\n");
break;

case L2_OP_GEN_STACK_FRAME:
printf("GEN_STACK_FRAME\n");
break;

case L2_OP_STACK_FRAME_LOOKUP:
printf("STACK_FRAME_LOOKUP\n");
break;

+ 70
- 17
lib/vm/vm.c View File

@@ -38,8 +38,12 @@ static void gc_mark_array(struct l2_vm *vm, struct l2_vm_value *val);
static void gc_mark_namespace(struct l2_vm *vm, struct l2_vm_value *val);

static void gc_mark(struct l2_vm *vm, l2_word id) {
printf("GC MARK %i\n", id);
struct l2_vm_value *val = &vm->values[id];
if (val->flags & L2_VAL_MARKED) {
return;
}

printf("GC MARK %i\n", id);
val->flags |= L2_VAL_MARKED;

int typ = l2_vm_value_type(val);
@@ -77,6 +81,10 @@ static void gc_mark_namespace(struct l2_vm *vm, struct l2_vm_value *val) {

gc_mark(vm, ns->data[ns->size + i]);
}

if (ns->parent != NULL) {
gc_mark_namespace(vm, ns->parent);
}
}

static void gc_free(struct l2_vm *vm, l2_word id) {
@@ -101,6 +109,7 @@ static size_t gc_sweep(struct l2_vm *vm) {

struct l2_vm_value *val = &vm->values[i];
if (!(val->flags & L2_VAL_MARKED)) {
printf("GC FREE %zi\n", i);
gc_free(vm, i);
freed += 1;
} else {
@@ -125,6 +134,13 @@ void l2_vm_init(struct l2_vm *vm, l2_word *ops, size_t opcount) {
// variable ID 0 should be the only 'none' variable in the system
l2_word none_id = alloc_val(vm);
vm->values[none_id].flags = L2_VAL_TYPE_NONE | L2_VAL_CONST;

// Need to allocate a root namespace
l2_word root = alloc_val(vm);
vm->values[root].flags = L2_VAL_TYPE_NAMESPACE;
vm->values[root].data = NULL; // Will be allocated on first insert
vm->nstack[vm->nsptr] = root;
vm->nsptr += 1;
}

void l2_vm_free(struct l2_vm *vm) {
@@ -192,10 +208,51 @@ void l2_vm_step(struct l2_vm *vm) {
vm->sptr -= 1;
break;

case L2_OP_CALL:
word = vm->stack[vm->sptr - 1];
vm->stack[vm->sptr - 1] = vm->iptr + 1;
vm->iptr = word;
case L2_OP_FUNC_CALL:
{
l2_word func_id = vm->stack[--vm->sptr];
l2_word argc = vm->stack[--vm->sptr];
struct l2_vm_value *func = &vm->values[func_id];

l2_word arr_id = alloc_val(vm);
vm->values[arr_id].flags = L2_VAL_TYPE_ARRAY;
vm->values[arr_id].data = malloc(
sizeof(struct l2_vm_array) + sizeof(l2_word) * argc);
struct l2_vm_array *arr = vm->values[arr_id].data;
arr->len = argc;
arr->size = argc;

vm->sptr -= argc;
for (l2_word i = 0; i < argc; ++i) {
arr->data[i] = vm->stack[vm->sptr + i];
}

enum l2_value_flags typ = l2_vm_value_type(func);

// C functions are called differently from language functions
if (typ == L2_VAL_TYPE_CFUNCTION) {
vm->stack[vm->sptr++] = func->cfunc(vm, arr);
break;
}

// Don't interpret a non-function as a function
if (typ != L2_VAL_TYPE_FUNCTION) {
// TODO: Error mechanism
break;
}

vm->stack[vm->sptr++] = vm->iptr;
vm->stack[vm->sptr++] = arr_id;

l2_word ns_id = alloc_val(vm);
vm->values[ns_id].flags = L2_VAL_TYPE_NAMESPACE;
vm->values[ns_id].data = calloc(1, sizeof(struct l2_vm_namespace));
struct l2_vm_namespace *ns = vm->values[ns_id].data;
ns->parent = &vm->values[func->func.namespace]; // TODO: This won't work if values is realloc'd
vm->nstack[vm->nsptr++] = ns_id;

vm->iptr = func->func.pos;
}
break;

case L2_OP_RJMP:
@@ -204,14 +261,6 @@ void l2_vm_step(struct l2_vm *vm) {
vm->iptr += word;
break;

case L2_OP_GEN_STACK_FRAME:
word = alloc_val(vm);
vm->values[word].flags = L2_VAL_TYPE_NAMESPACE;
vm->values[word].data = NULL; // Will be allocated on first insert
vm->nstack[vm->nsptr] = word;
vm->nsptr += 1;
break;

case L2_OP_STACK_FRAME_LOOKUP:
{
l2_word key = vm->stack[vm->sptr - 1];
@@ -231,10 +280,14 @@ void l2_vm_step(struct l2_vm *vm) {
break;

case L2_OP_RET:
vm->nsptr -= 1;
vm->iptr = vm->stack[vm->sptr - 1];
vm->sptr -= 1;
vm->nsptr -= 1;
{
l2_word retval = vm->stack[--vm->sptr];
vm->sptr -= 1; // Discard arguments array
l2_word retaddr = vm->stack[--vm->sptr];
vm->stack[vm->sptr++] = retval;
vm->nsptr -= 1; // Pop function stack frame
vm->iptr = retaddr;
}
break;

case L2_OP_ALLOC_INTEGER_32:

+ 3
- 0
test/src/lex.t.c View File

@@ -46,6 +46,9 @@ describe(lex) {
asserteq(l2_lexer_peek(&lexer, 1)->v.num, 10);
l2_lexer_consume(&lexer);

asserteq(l2_lexer_peek(&lexer, 1)->kind, L2_TOK_EOL);
l2_lexer_consume(&lexer);

asserteq(l2_lexer_peek(&lexer, 1)->kind, L2_TOK_IDENT);
asserteq(l2_lexer_peek(&lexer, 1)->v.str, "bar");
l2_lexer_consume(&lexer);

Loading…
Cancel
Save