You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

vm.c 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579
  1. #include "vm/vm.h"
  2. #include <string.h>
  3. #include <stdio.h>
  4. #include <stdarg.h>
  5. #include "vm/builtins.h"
  6. static int stdio_inited = 0;
  7. static struct l2_io_file_writer std_output;
  8. static struct l2_io_file_writer std_error;
  9. static l2_word alloc_val(struct l2_vm *vm) {
  10. size_t id = l2_bitset_set_next(&vm->valueset);
  11. if (id + 16 >= vm->valuessize) {
  12. if (id >= vm->valuessize) {
  13. if (vm->valuessize == 0) {
  14. vm->valuessize = 64;
  15. }
  16. while (id >= vm->valuessize) {
  17. vm->valuessize *= 2;
  18. }
  19. vm->values = realloc(vm->values, sizeof(*vm->values) * vm->valuessize);
  20. } else {
  21. vm->gc_scheduled = 1;
  22. }
  23. }
  24. return (l2_word)id;
  25. }
  26. static double u32s_to_double(uint32_t high, uint32_t low) {
  27. double d;
  28. uint64_t num = (uint64_t)high << 32 | (uint64_t)low;
  29. memcpy(&d, &num, sizeof(num));
  30. return d;
  31. }
  32. static void gc_mark_array(struct l2_vm *vm, struct l2_vm_value *val);
  33. static void gc_mark_namespace(struct l2_vm *vm, struct l2_vm_value *val);
  34. static void gc_mark(struct l2_vm *vm, l2_word id) {
  35. struct l2_vm_value *val = &vm->values[id];
  36. if (val->flags & L2_VAL_MARKED) {
  37. return;
  38. }
  39. val->flags |= L2_VAL_MARKED;
  40. int typ = l2_vm_value_type(val);
  41. if (typ == L2_VAL_TYPE_ARRAY) {
  42. gc_mark_array(vm, val);
  43. } else if (typ == L2_VAL_TYPE_NAMESPACE) {
  44. gc_mark_namespace(vm, val);
  45. } else if (typ == L2_VAL_TYPE_FUNCTION) {
  46. gc_mark(vm, val->func.ns);
  47. }
  48. }
  49. static void gc_mark_array(struct l2_vm *vm, struct l2_vm_value *val) {
  50. if (val->array == NULL) {
  51. return;
  52. }
  53. for (size_t i = 0; i < val->array->len; ++i) {
  54. gc_mark(vm, val->array->data[i]);
  55. }
  56. }
  57. static void gc_mark_namespace(struct l2_vm *vm, struct l2_vm_value *val) {
  58. if (val->extra.ns_parent != 0) {
  59. gc_mark(vm, val->extra.ns_parent);
  60. }
  61. if (val->ns == NULL) {
  62. return;
  63. }
  64. for (size_t i = 0; i < val->ns->size; ++i) {
  65. l2_word key = val->ns->data[i];
  66. if (key == 0 || key == ~(l2_word)0) {
  67. continue;
  68. }
  69. gc_mark(vm, val->ns->data[val->ns->size + i]);
  70. }
  71. }
  72. static void gc_free(struct l2_vm *vm, l2_word id) {
  73. struct l2_vm_value *val = &vm->values[id];
  74. l2_bitset_unset(&vm->valueset, id);
  75. // Don't need to do anything more; the next round of GC will free
  76. // whichever values were only referenced by the array
  77. int typ = l2_vm_value_type(val);
  78. if (typ == L2_VAL_TYPE_ARRAY) {
  79. free(val->array);
  80. } else if (typ == L2_VAL_TYPE_BUFFER) {
  81. free(val->buffer);
  82. } else if (typ == L2_VAL_TYPE_NAMESPACE) {
  83. free(val->ns);
  84. } else if (typ == L2_VAL_TYPE_ERROR) {
  85. free(val->error);
  86. }
  87. }
  88. static size_t gc_sweep(struct l2_vm *vm) {
  89. size_t freed = 0;
  90. // Skip ID 0, because that should always exist
  91. for (size_t i = 1; i < vm->valuessize; ++i) {
  92. if (!l2_bitset_get(&vm->valueset, i)) {
  93. continue;
  94. }
  95. struct l2_vm_value *val = &vm->values[i];
  96. if (!(val->flags & L2_VAL_MARKED)) {
  97. l2_bitset_unset(&vm->valueset, i);
  98. gc_free(vm, i);
  99. freed += 1;
  100. } else {
  101. val->flags &= ~L2_VAL_MARKED;
  102. }
  103. }
  104. return freed;
  105. }
  106. const char *l2_value_type_name(enum l2_value_type typ) {
  107. switch (typ) {
  108. case L2_VAL_TYPE_NONE: return "NONE";
  109. case L2_VAL_TYPE_ATOM: return "ATOM";
  110. case L2_VAL_TYPE_REAL: return "REAL";
  111. case L2_VAL_TYPE_BUFFER: return "BUFFER";
  112. case L2_VAL_TYPE_ARRAY: return "ARRAY";
  113. case L2_VAL_TYPE_NAMESPACE: return "NAMESPACE";
  114. case L2_VAL_TYPE_FUNCTION: return "FUNCTION";
  115. case L2_VAL_TYPE_CFUNCTION: return "CFUNCTION";
  116. case L2_VAL_TYPE_ERROR: return "ERROR";
  117. }
  118. return "(unknown)";
  119. }
  120. void l2_vm_init(struct l2_vm *vm, l2_word *ops, size_t opcount) {
  121. if (!stdio_inited) {
  122. std_output.w.write = l2_io_file_write;
  123. std_output.f = stdout;
  124. std_error.w.write = l2_io_file_write;
  125. std_error.f = stderr;
  126. stdio_inited = 1;
  127. }
  128. vm->std_output = &std_output.w;
  129. vm->std_error = &std_error.w;
  130. vm->halted = 0;
  131. vm->gc_scheduled = 0;
  132. vm->ops = ops;
  133. vm->opcount = opcount;
  134. vm->iptr = 0;
  135. vm->sptr = 0;
  136. vm->fsptr = 0;
  137. vm->values = NULL;
  138. vm->valuessize = 0;
  139. l2_bitset_init(&vm->valueset);
  140. // It's wasteful to allocate new 'none' variables all the time,
  141. // variable ID 0 should be the only 'none' variable in the system
  142. l2_word none_id = alloc_val(vm);
  143. vm->values[none_id].flags = L2_VAL_TYPE_NONE | L2_VAL_CONST;
  144. // Need to allocate a builtins namespace
  145. l2_word builtins = alloc_val(vm);
  146. vm->values[builtins].extra.ns_parent = 0;
  147. vm->values[builtins].ns = NULL; // Will be allocated on first insert
  148. vm->values[builtins].flags = L2_VAL_TYPE_NAMESPACE;
  149. vm->fstack[vm->fsptr].ns = builtins;
  150. vm->fstack[vm->fsptr].retptr = 0;
  151. vm->fsptr += 1;
  152. // Need to allocate a root namespace
  153. l2_word root = alloc_val(vm);
  154. vm->values[root].extra.ns_parent = builtins;
  155. vm->values[root].ns = NULL;
  156. vm->values[root].flags = L2_VAL_TYPE_NAMESPACE;
  157. vm->fstack[vm->fsptr].ns = root;
  158. vm->fstack[vm->fsptr].retptr = 0;
  159. vm->fsptr += 1;
  160. // Define a C function variable for every builtin
  161. l2_word id;
  162. l2_word key = 1;
  163. #define X(name, f) \
  164. id = alloc_val(vm); \
  165. vm->values[id].flags = L2_VAL_TYPE_CFUNCTION; \
  166. vm->values[id].cfunc = f; \
  167. l2_vm_namespace_set(&vm->values[builtins], key++, id);
  168. #include "builtins.x.h"
  169. #undef X
  170. }
  171. l2_word l2_vm_alloc(struct l2_vm *vm, enum l2_value_type typ, enum l2_value_flags flags) {
  172. l2_word id = alloc_val(vm);
  173. memset(&vm->values[id], 0, sizeof(vm->values[id]));
  174. vm->values[id].flags = typ | flags;
  175. return id;
  176. }
  177. l2_word l2_vm_error(struct l2_vm *vm, const char *fmt, ...) {
  178. l2_word id = alloc_val(vm);
  179. struct l2_vm_value *val = &vm->values[id];
  180. val->flags = L2_VAL_CONST | L2_VAL_TYPE_ERROR;
  181. char buf[256];
  182. va_list va;
  183. va_start(va, fmt);
  184. int n = vsnprintf(buf, sizeof(buf), fmt, va);
  185. if (n < 0) {
  186. const char *message = "Failed to generate error message!";
  187. val->error = malloc(strlen(message) + 1);
  188. strcpy(val->error, message);
  189. va_end(va);
  190. return id;
  191. } else if ((size_t)n + 1 < sizeof(buf)) {
  192. val->error = malloc(n + 1);
  193. strcpy(val->error, buf);
  194. va_end(va);
  195. return id;
  196. }
  197. val->error = malloc(n + 1);
  198. vsnprintf(val->error, n + 1, fmt, va);
  199. va_end(va);
  200. return id;
  201. }
  202. l2_word l2_vm_type_error(struct l2_vm *vm, struct l2_vm_value *val) {
  203. return l2_vm_error(vm, "Unexpected type %s", l2_value_type_name(l2_vm_value_type(val)));
  204. }
  205. void l2_vm_free(struct l2_vm *vm) {
  206. // Skip ID 0, because that's always NONE
  207. for (size_t i = 1; i < vm->valuessize; ++i) {
  208. if (!l2_bitset_get(&vm->valueset, i)) {
  209. continue;
  210. }
  211. gc_free(vm, i);
  212. }
  213. free(vm->values);
  214. l2_bitset_free(&vm->valueset);
  215. }
  216. size_t l2_vm_gc(struct l2_vm *vm) {
  217. for (l2_word sptr = 0; sptr < vm->sptr; ++sptr) {
  218. gc_mark(vm, vm->stack[sptr]);
  219. }
  220. for (l2_word fsptr = 0; fsptr < vm->fsptr; ++fsptr) {
  221. gc_mark(vm, vm->fstack[fsptr].ns);
  222. }
  223. return gc_sweep(vm);
  224. }
  225. void l2_vm_run(struct l2_vm *vm) {
  226. while (!vm->halted) {
  227. l2_vm_step(vm);
  228. }
  229. }
  230. void l2_vm_step(struct l2_vm *vm) {
  231. enum l2_opcode opcode = (enum l2_opcode)vm->ops[vm->iptr++];
  232. l2_word word;
  233. switch (opcode) {
  234. case L2_OP_NOP:
  235. break;
  236. case L2_OP_DISCARD:
  237. vm->sptr -= 1;
  238. if (l2_vm_value_type(&vm->values[vm->stack[vm->sptr]]) == L2_VAL_TYPE_ERROR) {
  239. l2_io_printf(vm->std_error, "Error: %s\n", vm->values[vm->stack[vm->sptr]].error);
  240. vm->halted = 1;
  241. }
  242. break;
  243. case L2_OP_SWAP_DISCARD:
  244. vm->stack[vm->sptr - 2] = vm->stack[vm->sptr - 1];
  245. vm->sptr -= 1;
  246. if (l2_vm_value_type(&vm->values[vm->stack[vm->sptr]]) == L2_VAL_TYPE_ERROR) {
  247. l2_io_printf(vm->std_error, "Error: %s\n", vm->values[vm->stack[vm->sptr]].error);
  248. vm->halted = 1;
  249. }
  250. break;
  251. case L2_OP_DUP:
  252. vm->stack[vm->sptr] = vm->ops[vm->sptr - 1];
  253. vm->sptr += 1;
  254. break;
  255. case L2_OP_ADD:
  256. vm->stack[vm->sptr - 2] += vm->stack[vm->sptr - 1];
  257. vm->sptr -= 1;
  258. break;
  259. case L2_OP_FUNC_CALL:
  260. {
  261. l2_word argc = vm->ops[vm->iptr++];
  262. l2_word arr_id = alloc_val(vm);
  263. vm->values[arr_id].flags = L2_VAL_TYPE_ARRAY;
  264. vm->values[arr_id].array = malloc(
  265. sizeof(struct l2_vm_array) + sizeof(l2_word) * argc);
  266. struct l2_vm_array *arr = vm->values[arr_id].array;
  267. arr->len = argc;
  268. arr->size = argc;
  269. vm->sptr -= argc;
  270. for (l2_word i = 0; i < argc; ++i) {
  271. arr->data[i] = vm->stack[vm->sptr + i];
  272. }
  273. l2_word func_id = vm->stack[--vm->sptr];
  274. struct l2_vm_value *func = &vm->values[func_id];
  275. l2_word stack_base = vm->sptr;
  276. enum l2_value_type typ = l2_vm_value_type(func);
  277. // C functions are called differently from language functions
  278. if (typ == L2_VAL_TYPE_CFUNCTION) {
  279. vm->stack[vm->sptr++] = func->cfunc(vm, arr);
  280. break;
  281. }
  282. // Don't interpret a non-function as a function
  283. if (typ != L2_VAL_TYPE_FUNCTION) {
  284. vm->stack[vm->sptr++] = l2_vm_error(vm, "Attempt to call non-function");
  285. break;
  286. }
  287. vm->stack[vm->sptr++] = arr_id;
  288. l2_word ns_id = alloc_val(vm);
  289. func = &vm->values[func_id]; // func might be stale after alloc
  290. vm->values[ns_id].extra.ns_parent = func->func.ns;
  291. vm->values[ns_id].ns = NULL;
  292. vm->values[ns_id].flags = L2_VAL_TYPE_NAMESPACE;
  293. vm->fstack[vm->fsptr].ns = ns_id;
  294. vm->fstack[vm->fsptr].retptr = vm->iptr;
  295. vm->fstack[vm->fsptr].sptr = stack_base;
  296. vm->fsptr += 1;
  297. vm->iptr = func->func.pos;
  298. }
  299. break;
  300. case L2_OP_RJMP:
  301. vm->iptr += vm->ops[vm->iptr] + 1;
  302. break;
  303. case L2_OP_STACK_FRAME_LOOKUP:
  304. {
  305. l2_word key = vm->ops[vm->iptr++];
  306. struct l2_vm_value *ns = &vm->values[vm->fstack[vm->fsptr - 1].ns];
  307. vm->stack[vm->sptr++] = l2_vm_namespace_get(vm, ns, key);
  308. }
  309. break;
  310. case L2_OP_STACK_FRAME_SET:
  311. {
  312. l2_word key = vm->ops[vm->iptr++];
  313. l2_word val = vm->stack[vm->sptr - 1];
  314. struct l2_vm_value *ns = &vm->values[vm->fstack[vm->fsptr - 1].ns];
  315. l2_vm_namespace_set(ns, key, val);
  316. }
  317. break;
  318. case L2_OP_STACK_FRAME_REPLACE:
  319. {
  320. l2_word key = vm->ops[vm->iptr++];
  321. l2_word val = vm->stack[vm->sptr - 1];
  322. struct l2_vm_value *ns = &vm->values[vm->fstack[vm->fsptr - 1].ns];
  323. l2_vm_namespace_replace(vm, ns, key, val); // TODO: error if returns -1
  324. }
  325. break;
  326. case L2_OP_RET:
  327. {
  328. l2_word retval = vm->stack[--vm->sptr];
  329. l2_word retptr = vm->fstack[vm->fsptr - 1].retptr;
  330. l2_word sptr = vm->fstack[vm->fsptr - 1].sptr;
  331. vm->fsptr -= 1;
  332. vm->sptr = sptr;
  333. vm->stack[vm->sptr++] = retval;
  334. vm->iptr = retptr;
  335. }
  336. break;
  337. case L2_OP_ALLOC_NONE:
  338. vm->stack[vm->sptr++] = 0;
  339. break;
  340. case L2_OP_ALLOC_ATOM:
  341. word = alloc_val(vm);
  342. vm->values[word].flags = L2_VAL_TYPE_ATOM;
  343. vm->values[word].atom = vm->ops[vm->iptr++];
  344. vm->stack[vm->sptr++] = word;
  345. break;
  346. case L2_OP_ALLOC_REAL:
  347. {
  348. word = alloc_val(vm);
  349. l2_word high = vm->ops[vm->iptr++];
  350. l2_word low = vm->ops[vm->iptr++];
  351. vm->values[word].flags = L2_VAL_TYPE_REAL;
  352. vm->values[word].real = u32s_to_double(high, low);
  353. vm->stack[vm->sptr++] = word;
  354. }
  355. break;
  356. case L2_OP_ALLOC_BUFFER_STATIC:
  357. {
  358. word = alloc_val(vm);
  359. l2_word length = vm->ops[vm->iptr++];
  360. l2_word offset = vm->ops[vm->iptr++];
  361. vm->values[word].flags = L2_VAL_TYPE_BUFFER;
  362. vm->values[word].buffer = malloc(sizeof(struct l2_vm_buffer) + length);
  363. vm->values[word].buffer->len = length;
  364. memcpy(
  365. (unsigned char *)vm->values[word].buffer + sizeof(struct l2_vm_buffer),
  366. vm->ops + offset, length);
  367. vm->stack[vm->sptr] = word;
  368. vm->sptr += 1;
  369. }
  370. break;
  371. case L2_OP_ALLOC_ARRAY:
  372. {
  373. l2_word count = vm->ops[vm->iptr++];
  374. l2_word arr_id = alloc_val(vm);
  375. struct l2_vm_value *arr = &vm->values[arr_id];
  376. arr->flags = L2_VAL_TYPE_ARRAY;
  377. if (count == 0) {
  378. arr->array = NULL;
  379. vm->stack[vm->sptr++] = arr_id;
  380. break;
  381. }
  382. arr->array = malloc(sizeof(struct l2_vm_array) + count * sizeof(l2_word));
  383. arr->array->len = count;
  384. arr->array->size = count;
  385. for (l2_word i = 0; i < count; ++i) {
  386. arr->array->data[count - 1 - i] = vm->stack[--vm->sptr];
  387. }
  388. vm->stack[vm->sptr++] = arr_id;
  389. }
  390. break;
  391. case L2_OP_ALLOC_NAMESPACE:
  392. word = alloc_val(vm);
  393. vm->values[word].flags = L2_VAL_TYPE_NAMESPACE;
  394. vm->values[word].extra.ns_parent = 0;
  395. vm->values[word].ns = NULL; // Will be allocated on first insert
  396. vm->stack[vm->sptr] = word;
  397. vm->sptr += 1;
  398. break;
  399. case L2_OP_ALLOC_FUNCTION:
  400. word = alloc_val(vm);
  401. vm->values[word].flags = L2_VAL_TYPE_FUNCTION;
  402. vm->values[word].func.pos = vm->ops[vm->iptr++];
  403. vm->values[word].func.ns = vm->fstack[vm->fsptr - 1].ns;
  404. vm->stack[vm->sptr] = word;
  405. vm->sptr += 1;
  406. break;
  407. case L2_OP_NAMESPACE_SET:
  408. {
  409. l2_word key = vm->ops[vm->iptr++];
  410. l2_word val = vm->stack[vm->sptr - 1];
  411. l2_word ns = vm->stack[vm->sptr - 2];
  412. l2_vm_namespace_set(&vm->values[ns], key, val);
  413. }
  414. break;
  415. case L2_OP_NAMESPACE_LOOKUP:
  416. {
  417. l2_word key = vm->ops[vm->iptr++];
  418. l2_word ns = vm->stack[--vm->sptr];
  419. vm->stack[vm->sptr++] = l2_vm_namespace_get(vm, &vm->values[ns], key);
  420. }
  421. break;
  422. case L2_OP_ARRAY_LOOKUP:
  423. {
  424. l2_word key = vm->ops[vm->iptr++];
  425. l2_word arr = vm->stack[--vm->sptr];
  426. // TODO: Error if out of bounds or incorrect type
  427. vm->stack[vm->sptr++] = vm->values[arr].array->data[key];
  428. }
  429. break;
  430. case L2_OP_ARRAY_SET:
  431. {
  432. l2_word key = vm->ops[vm->iptr++];
  433. l2_word val = vm->stack[vm->sptr - 1];
  434. l2_word arr = vm->stack[vm->sptr - 2];
  435. // TODO: Error if out of bounds or incorrect type
  436. vm->values[arr].array->data[key] = val;
  437. }
  438. break;
  439. case L2_OP_DYNAMIC_LOOKUP:
  440. {
  441. l2_word key_id = vm->stack[--vm->sptr];
  442. l2_word container_id = vm->stack[--vm->sptr];
  443. struct l2_vm_value *key = &vm->values[key_id];
  444. struct l2_vm_value *container = &vm->values[container_id];
  445. if (
  446. l2_vm_value_type(key) == L2_VAL_TYPE_REAL &&
  447. l2_vm_value_type(container) == L2_VAL_TYPE_ARRAY) {
  448. // TODO: Error if out of bounds
  449. vm->stack[vm->sptr++] = container->array->data[(size_t)key->real];
  450. } else if (
  451. l2_vm_value_type(key) == L2_VAL_TYPE_ATOM &&
  452. l2_vm_value_type(container) == L2_VAL_TYPE_NAMESPACE) {
  453. // TODO: Error if out of bounds
  454. vm->stack[vm->sptr++] = l2_vm_namespace_get(vm, container, key->atom);
  455. } else {
  456. // TODO: error
  457. }
  458. }
  459. break;
  460. case L2_OP_DYNAMIC_SET:
  461. {
  462. l2_word val = vm->stack[--vm->sptr];
  463. l2_word key_id = vm->stack[--vm->sptr];
  464. l2_word container_id = vm->stack[--vm->sptr];
  465. vm->stack[vm->sptr++] = val;
  466. struct l2_vm_value *key = &vm->values[key_id];
  467. struct l2_vm_value *container = &vm->values[container_id];
  468. if (
  469. l2_vm_value_type(key) == L2_VAL_TYPE_REAL &&
  470. l2_vm_value_type(container) == L2_VAL_TYPE_ARRAY) {
  471. // TODO: Error if out of bounds
  472. container->array->data[(size_t)key->real] = val;
  473. } else if (
  474. l2_vm_value_type(key) == L2_VAL_TYPE_ATOM &&
  475. l2_vm_value_type(container) == L2_VAL_TYPE_NAMESPACE) {
  476. // TODO: Error if out of bounds
  477. l2_vm_namespace_set(container, key->atom, val);
  478. } else {
  479. // TODO: error
  480. }
  481. }
  482. break;
  483. case L2_OP_HALT:
  484. vm->halted = 1;
  485. break;
  486. }
  487. if (vm->gc_scheduled) {
  488. l2_vm_gc(vm);
  489. vm->gc_scheduled = 0;
  490. }
  491. }