You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

parse.c 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450
  1. #include "parse/parse.h"
  2. #include "trace.h"
  3. #include "gen/gen.h"
  4. static int tok_is_end(struct l2_token *tok) {
  5. return
  6. tok->kind == L2_TOK_CLOSE_BRACE || tok->kind == L2_TOK_CLOSE_BRACKET ||
  7. tok->kind == L2_TOK_CLOSE_PAREN || tok->kind == L2_TOK_EOF ||
  8. tok->kind == L2_TOK_EOL;
  9. }
  10. static int parse_expression(
  11. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err);
  12. static int parse_arg_level_expression(
  13. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err);
  14. static int parse_object_literal(
  15. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  16. l2_trace_scope("object literal");
  17. // '{' and EOL already skipped by parse_object_or_function_literal
  18. l2_gen_namespace(gen);
  19. while (1) {
  20. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  21. if (tok->kind == L2_TOK_CLOSE_BRACE) {
  22. l2_lexer_consume(lexer); // '}'
  23. break;
  24. } else if (tok->kind != L2_TOK_IDENT) {
  25. l2_parse_err(err, tok, "In object literal: Expected identifier, got %s",
  26. l2_token_kind_name(tok->kind));
  27. return -1;
  28. }
  29. l2_trace("key: '%s'", tok->v.str);
  30. char *key = l2_token_extract_str(tok);
  31. l2_lexer_consume(lexer); // ident
  32. tok = l2_lexer_peek(lexer, 1);
  33. if (tok->kind != L2_TOK_COLON) {
  34. l2_parse_err(err, tok, "In object literal: Expected ':', got %s",
  35. l2_token_kind_name(tok->kind));
  36. return -1;
  37. }
  38. l2_lexer_consume(lexer); // ':'
  39. if (parse_expression(lexer, gen, err) < 0) {
  40. return -1;
  41. }
  42. l2_gen_namespace_set(gen, &key);
  43. l2_gen_pop(gen);
  44. tok = l2_lexer_peek(lexer, 1);
  45. if (tok->kind != L2_TOK_EOL && tok->kind != L2_TOK_CLOSE_BRACE) {
  46. l2_parse_err(err, tok, "In object literal: Expected EOL or '}', got %s",
  47. l2_token_kind_name(tok->kind));
  48. return -1;
  49. }
  50. if (tok->kind == L2_TOK_EOL) {
  51. l2_lexer_consume(lexer); // EOL
  52. }
  53. }
  54. return 0;
  55. }
  56. static int parse_function_literal_impl(
  57. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  58. l2_trace_scope("function literal");
  59. // '{' and EOL already skipped by parse_object_or_function_literal
  60. // The arguments array will be at the top of the stack
  61. char *ident = malloc(2);
  62. ident[0] = '$'; ident[1] = '\0';
  63. l2_gen_stack_frame_set(gen, &ident);
  64. int first = 1;
  65. while (1) {
  66. if (l2_lexer_peek(lexer, 1)->kind == L2_TOK_CLOSE_BRACE) {
  67. l2_lexer_consume(lexer); // '}'
  68. break;
  69. }
  70. if (!first) {
  71. l2_gen_pop(gen);
  72. }
  73. l2_trace_scope("function literal expression");
  74. if (parse_expression(lexer, gen, err) < 0) {
  75. return -1;
  76. }
  77. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  78. first = 0;
  79. }
  80. // All functions must put _something_ on the stack
  81. if (first) {
  82. l2_gen_none(gen);
  83. }
  84. l2_gen_ret(gen);
  85. return 0;
  86. }
  87. static int parse_function_literal(
  88. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  89. l2_gen_flush(gen);
  90. struct l2_io_writer *prev_writer = gen->writer.w;
  91. // Generate the function to a buffer in memory
  92. struct l2_io_mem_writer w = {0};
  93. w.w.write = l2_io_mem_write;
  94. gen->writer.w = &w.w;
  95. // Generates two words; RJMP, 0
  96. l2_gen_rjmp(gen, 0);
  97. l2_word pos = gen->pos;
  98. // Generate the function body itself
  99. int ret = parse_function_literal_impl(lexer, gen, err);
  100. l2_gen_flush(gen);
  101. gen->writer.w = prev_writer;
  102. if (ret < 0) {
  103. free(w.mem);
  104. return -1;
  105. }
  106. l2_word *ops = w.mem;
  107. l2_word opcount = w.len / sizeof(l2_word);
  108. // Due to the earlier gen_rjmp, the second word will be the argument to RJMP.
  109. // Need to set it properly to skip the function body.
  110. // The '- 2' is because we don't skip the RJMP, <count> sequence.
  111. ops[1] = opcount - 2;
  112. l2_bufio_put_n(&gen->writer, ops, opcount * sizeof(l2_word));
  113. free(w.mem);
  114. l2_gen_function(gen, pos);
  115. return 0;
  116. }
  117. static int parse_object_or_function_literal(
  118. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  119. l2_trace_scope("object or function literal");
  120. l2_lexer_consume(lexer); // '{'
  121. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  122. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  123. struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
  124. if (tok->kind == L2_TOK_CLOSE_BRACE) {
  125. l2_trace_scope("empty object literal");
  126. l2_lexer_consume(lexer); // '}'
  127. l2_gen_namespace(gen);
  128. } else if (tok->kind == L2_TOK_IDENT && tok2->kind == L2_TOK_COLON) {
  129. if (parse_object_literal(lexer, gen, err) < 0) {
  130. return -1;
  131. }
  132. } else {
  133. if (parse_function_literal(lexer, gen, err) < 0) {
  134. return -1;
  135. }
  136. }
  137. return 0;
  138. }
  139. static int parse_array_literal(
  140. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  141. l2_trace_scope("array literal");
  142. l2_lexer_consume(lexer); // '['
  143. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  144. int count = 0;
  145. while (1) {
  146. if (l2_lexer_peek(lexer, 1)->kind == L2_TOK_CLOSE_BRACKET) {
  147. l2_lexer_consume(lexer); // ']'
  148. break;
  149. }
  150. count += 1;
  151. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  152. return -1;
  153. }
  154. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  155. }
  156. l2_gen_array(gen, count);
  157. return 0;
  158. }
  159. static int parse_arg_level_expression_base(
  160. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  161. l2_trace_scope("arg level expression base");
  162. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  163. struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
  164. if (tok->kind == L2_TOK_OPEN_PAREN) {
  165. l2_trace_scope("group expr");
  166. l2_lexer_consume(lexer); // '('
  167. if (parse_expression(lexer, gen, err) < 0) {
  168. return -1;
  169. }
  170. tok = l2_lexer_peek(lexer, 1);
  171. if (tok->kind != L2_TOK_CLOSE_PAREN) {
  172. l2_parse_err(err, tok, "Expected '(', got %s",
  173. l2_token_kind_name(tok->kind));
  174. return -1;
  175. }
  176. l2_lexer_consume(lexer); // ')'
  177. } else if (tok->kind == L2_TOK_IDENT) {
  178. l2_trace_scope("ident");
  179. l2_trace("ident '%s'", tok->v.str);
  180. char *ident = l2_token_extract_str(tok);
  181. l2_lexer_consume(lexer); // ident
  182. l2_gen_stack_frame_lookup(gen, &ident);
  183. } else if (tok->kind == L2_TOK_NUMBER) {
  184. l2_trace_scope("number literal");
  185. l2_trace("number %g", tok->v.num);
  186. double number = tok->v.num;
  187. l2_lexer_consume(lexer); // number
  188. l2_gen_number(gen, number);
  189. } else if (tok->kind == L2_TOK_STRING) {
  190. l2_trace_scope("string literal");
  191. l2_trace("string '%s'", tok->v.str);
  192. char *str = l2_token_extract_str(tok);
  193. l2_lexer_consume(lexer); // string
  194. l2_gen_string(gen, &str);
  195. } else if (tok->kind == L2_TOK_QUOT && tok2->kind == L2_TOK_IDENT) {
  196. l2_trace_scope("atom literal");
  197. l2_trace("atom '%s'", tok->v.str);
  198. char *ident = l2_token_extract_str(tok2);
  199. l2_lexer_consume(lexer); // "'"
  200. l2_lexer_consume(lexer); // ident
  201. l2_gen_atom(gen, &ident);
  202. } else if (tok->kind == L2_TOK_OPEN_BRACE) {
  203. if (parse_object_or_function_literal(lexer, gen, err) < 0) {
  204. return -1;
  205. }
  206. } else if (tok->kind == L2_TOK_OPEN_BRACKET) {
  207. if (parse_array_literal(lexer, gen, err) < 0) {
  208. return -1;
  209. }
  210. } else {
  211. l2_parse_err(err, tok, "Unexpected token %s",
  212. l2_token_kind_name(tok->kind));
  213. return -1;
  214. }
  215. return 0;
  216. }
  217. static int parse_arg_level_expression(
  218. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  219. l2_trace_scope("arg level expression");
  220. if (parse_arg_level_expression_base(lexer, gen, err) < 0) {
  221. return -1;
  222. }
  223. while (1) {
  224. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  225. struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
  226. struct l2_token *tok3 = l2_lexer_peek(lexer, 3);
  227. if (tok->kind == L2_TOK_OPEN_PAREN && tok2->kind == L2_TOK_CLOSE_PAREN) {
  228. l2_trace_scope("niladic func call");
  229. l2_lexer_consume(lexer); // '('
  230. l2_lexer_consume(lexer); // ')'
  231. l2_gen_func_call(gen, 0);
  232. } else if (
  233. tok->kind == L2_TOK_PERIOD && tok2->kind == L2_TOK_IDENT &&
  234. tok3->kind == L2_TOK_EQUALS) {
  235. l2_trace_scope("namespace assign");
  236. l2_trace("ident '%s'", tok2->v.str);
  237. char *ident = l2_token_extract_str(tok2);
  238. l2_lexer_consume(lexer); // '.'
  239. l2_lexer_consume(lexer); // ident
  240. l2_lexer_consume(lexer); // '='
  241. if (parse_expression(lexer, gen, err) < 0) {
  242. return -1;
  243. }
  244. l2_gen_namespace_set(gen, &ident);
  245. l2_gen_swap_pop(gen);
  246. } else if (tok->kind == L2_TOK_PERIOD && tok2->kind == L2_TOK_IDENT) {
  247. l2_trace_scope("namespace lookup");
  248. l2_trace("ident '%s'", tok2->v.str);
  249. char *ident = l2_token_extract_str(tok2);
  250. l2_lexer_consume(lexer); // '.'
  251. l2_lexer_consume(lexer); // ident
  252. l2_gen_namespace_lookup(gen, &ident);
  253. } else if (tok->kind == L2_TOK_DOT_NUMBER && tok2->kind == L2_TOK_EQUALS) {
  254. l2_trace_scope("direct array assign");
  255. int number = tok->v.integer;
  256. l2_lexer_consume(lexer); // dot-number
  257. l2_lexer_consume(lexer); // '='
  258. if (parse_expression(lexer, gen, err) < 0) {
  259. return -1;
  260. }
  261. l2_gen_array_set(gen, number);
  262. l2_gen_swap_pop(gen);
  263. } else if (tok->kind == L2_TOK_DOT_NUMBER) {
  264. l2_trace_scope("direct array lookup");
  265. int number = tok->v.integer;
  266. l2_lexer_consume(lexer); // dot-number
  267. l2_gen_array_lookup(gen, number);
  268. } else if (tok->kind == L2_TOK_PERIOD && tok2->kind == L2_TOK_OPEN_PAREN) {
  269. l2_trace_scope("dynamic lookup");
  270. l2_lexer_consume(lexer); // '.'
  271. l2_lexer_consume(lexer); // '('
  272. if (parse_expression(lexer, gen, err) < 0) {
  273. return -1;
  274. }
  275. if (l2_lexer_peek(lexer, 1)->kind != L2_TOK_CLOSE_PAREN) {
  276. l2_parse_err(err, tok, "Expected '(', got %s",
  277. l2_token_kind_name(tok->kind));
  278. return -1;
  279. }
  280. l2_lexer_consume(lexer); // ')'
  281. if (l2_lexer_peek(lexer, 1)->kind == L2_TOK_EQUALS) {
  282. l2_lexer_consume(lexer); // '='
  283. if (parse_expression(lexer, gen, err) < 0) {
  284. return -1;
  285. }
  286. l2_gen_dynamic_set(gen);
  287. } else {
  288. l2_gen_dynamic_lookup(gen);
  289. }
  290. } else {
  291. break;
  292. }
  293. }
  294. return 0;
  295. }
  296. static int parse_func_call_after_base(
  297. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  298. l2_trace_scope("func call after base");
  299. size_t argc = 0;
  300. do {
  301. argc += 1;
  302. l2_trace_scope("func call param");
  303. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  304. return -1;
  305. }
  306. } while (!tok_is_end(l2_lexer_peek(lexer, 1)));
  307. // The 'argc' previous expressions were arguments, the one before that was the function
  308. l2_gen_func_call(gen, argc);
  309. return 0;
  310. }
  311. static int parse_expression(
  312. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  313. l2_trace_scope("expression");
  314. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  315. struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
  316. if (tok->kind == L2_TOK_IDENT && tok2->kind == L2_TOK_COLON_EQ) {
  317. l2_trace_scope("assign expression");
  318. l2_trace("ident '%s'", tok->v.str);
  319. char *ident = l2_token_extract_str(tok);
  320. l2_lexer_consume(lexer); // ident
  321. l2_lexer_consume(lexer); // :=
  322. if (parse_expression(lexer, gen, err) < 0) {
  323. return -1;
  324. }
  325. l2_gen_stack_frame_set(gen, &ident);
  326. } else if (tok->kind == L2_TOK_IDENT && tok2->kind == L2_TOK_EQUALS) {
  327. l2_trace_scope("replacement assign expression");
  328. l2_trace("ident '%s'", tok->v.str);
  329. char *ident = l2_token_extract_str(tok);
  330. l2_lexer_consume(lexer); // ident
  331. l2_lexer_consume(lexer); // =
  332. if (parse_expression(lexer, gen, err) < 0) {
  333. return -1;
  334. }
  335. l2_gen_stack_frame_replace(gen, &ident);
  336. } else {
  337. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  338. return -1;
  339. }
  340. if (!tok_is_end(l2_lexer_peek(lexer, 1))) {
  341. if (parse_func_call_after_base(lexer, gen, err) < 0) {
  342. return -1;
  343. }
  344. }
  345. }
  346. return 0;
  347. }
  348. int l2_parse_program(
  349. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  350. l2_trace_scope("program");
  351. while (1) {
  352. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  353. if (l2_lexer_peek(lexer, 1)->kind == L2_TOK_EOF) {
  354. break;
  355. }
  356. if (parse_expression(lexer, gen, err) < 0) {
  357. l2_gen_halt(gen);
  358. l2_gen_flush(gen);
  359. return -1;
  360. }
  361. l2_gen_pop(gen);
  362. }
  363. l2_gen_halt(gen);
  364. l2_gen_flush(gen);
  365. return 0;
  366. }