You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

parse.c 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508
  1. #include "parse/parse.h"
  2. #include "trace.h"
  3. #include "gen/gen.h"
  4. static int tok_is_end(struct l2_token *tok) {
  5. enum l2_token_kind kind = l2_token_get_kind(tok);
  6. return
  7. kind == L2_TOK_CLOSE_BRACE || kind == L2_TOK_CLOSE_BRACKET ||
  8. kind == L2_TOK_CLOSE_PAREN || kind == L2_TOK_EOF ||
  9. kind == L2_TOK_EOL;
  10. }
  11. static int parse_expression(
  12. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err);
  13. static int parse_arg_level_expression(
  14. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err);
  15. static int parse_object_literal(
  16. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  17. l2_trace_scope("object literal");
  18. // '{' and EOL already skipped by parse_object_or_function_literal
  19. l2_gen_namespace(gen);
  20. while (1) {
  21. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  22. if (l2_token_get_kind(tok) == L2_TOK_CLOSE_BRACE) {
  23. l2_lexer_consume(lexer); // '}'
  24. break;
  25. } else if (l2_token_get_kind(tok) != L2_TOK_IDENT) {
  26. l2_parse_err(err, tok, "In object literal: Expected identifier, got %s",
  27. l2_token_get_name(tok));
  28. return -1;
  29. }
  30. l2_trace("key: '%s'", tok->v.str);
  31. struct l2_token_value key = l2_token_extract_val(tok);
  32. l2_lexer_consume(lexer); // ident
  33. tok = l2_lexer_peek(lexer, 1);
  34. if (l2_token_get_kind(tok) != L2_TOK_COLON) {
  35. if (!(key.flags & L2_TOK_SMALL)) free(key.str);
  36. l2_parse_err(err, tok, "In object literal: Expected ':', got %s",
  37. l2_token_get_name(tok));
  38. return -1;
  39. }
  40. l2_lexer_consume(lexer); // ':'
  41. if (parse_expression(lexer, gen, err) < 0) {
  42. if (!(key.flags & L2_TOK_SMALL)) free(key.str);
  43. return -1;
  44. }
  45. if (key.flags & L2_TOK_SMALL) {
  46. l2_gen_namespace_set_copy(gen, key.strbuf);
  47. } else {
  48. l2_gen_namespace_set_copy(gen, key.str);
  49. }
  50. l2_gen_discard(gen);
  51. tok = l2_lexer_peek(lexer, 1);
  52. if (
  53. l2_token_get_kind(tok) != L2_TOK_EOL &&
  54. l2_token_get_kind(tok) != L2_TOK_CLOSE_BRACE) {
  55. l2_parse_err(err, tok, "In object literal: Expected EOL or '}', got %s",
  56. l2_token_get_name(tok));
  57. return -1;
  58. }
  59. if (l2_token_get_kind(tok) == L2_TOK_EOL) {
  60. l2_lexer_consume(lexer); // EOL
  61. }
  62. }
  63. return 0;
  64. }
  65. static int parse_function_literal_impl(
  66. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  67. l2_trace_scope("function literal");
  68. // '{' and EOL already skipped by parse_object_or_function_literal
  69. // The arguments array will be at the top of the stack
  70. char *ident = malloc(2);
  71. ident[0] = '$'; ident[1] = '\0';
  72. l2_gen_stack_frame_set(gen, &ident);
  73. int first = 1;
  74. while (1) {
  75. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_CLOSE_BRACE) {
  76. l2_lexer_consume(lexer); // '}'
  77. break;
  78. }
  79. if (!first) {
  80. l2_gen_discard(gen);
  81. }
  82. l2_trace_scope("function literal expression");
  83. if (parse_expression(lexer, gen, err) < 0) {
  84. return -1;
  85. }
  86. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  87. first = 0;
  88. }
  89. // All functions must put _something_ on the stack
  90. if (first) {
  91. l2_gen_none(gen);
  92. }
  93. l2_gen_ret(gen);
  94. return 0;
  95. }
  96. static int parse_function_literal(
  97. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  98. l2_gen_flush(gen);
  99. struct l2_io_writer *prev_writer = gen->writer.w;
  100. // Generate the function to a buffer in memory
  101. struct l2_io_mem_writer w = {0};
  102. w.w.write = l2_io_mem_write;
  103. gen->writer.w = &w.w;
  104. // Generates two words; RJMP, 0
  105. l2_gen_rjmp(gen, 0);
  106. l2_word pos = gen->pos;
  107. // Generate the function body itself
  108. int ret = parse_function_literal_impl(lexer, gen, err);
  109. l2_gen_flush(gen);
  110. gen->writer.w = prev_writer;
  111. if (ret < 0) {
  112. free(w.mem);
  113. return -1;
  114. }
  115. l2_word *ops = w.mem;
  116. l2_word opcount = w.len / sizeof(l2_word);
  117. // Due to the earlier gen_rjmp, the second word will be the argument to RJMP.
  118. // Need to set it properly to skip the function body.
  119. // The '- 2' is because we don't skip the RJMP, <count> sequence.
  120. ops[1] = opcount - 2;
  121. l2_bufio_put_n(&gen->writer, ops, opcount * sizeof(l2_word));
  122. free(w.mem);
  123. l2_gen_function(gen, pos);
  124. return 0;
  125. }
  126. static int parse_object_or_function_literal(
  127. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  128. l2_trace_scope("object or function literal");
  129. l2_lexer_consume(lexer); // '{'
  130. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  131. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  132. struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
  133. if (l2_token_get_kind(tok) == L2_TOK_CLOSE_BRACE) {
  134. l2_trace_scope("empty object literal");
  135. l2_lexer_consume(lexer); // '}'
  136. l2_gen_namespace(gen);
  137. } else if (
  138. l2_token_get_kind(tok) == L2_TOK_IDENT &&
  139. l2_token_get_kind(tok2) == L2_TOK_COLON) {
  140. if (parse_object_literal(lexer, gen, err) < 0) {
  141. return -1;
  142. }
  143. } else {
  144. if (parse_function_literal(lexer, gen, err) < 0) {
  145. return -1;
  146. }
  147. }
  148. return 0;
  149. }
  150. static int parse_array_literal(
  151. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  152. l2_trace_scope("array literal");
  153. l2_lexer_consume(lexer); // '['
  154. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  155. int count = 0;
  156. while (1) {
  157. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_CLOSE_BRACKET) {
  158. l2_lexer_consume(lexer); // ']'
  159. break;
  160. }
  161. count += 1;
  162. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  163. return -1;
  164. }
  165. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  166. }
  167. l2_gen_array(gen, count);
  168. return 0;
  169. }
  170. static int parse_arg_level_expression_base(
  171. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  172. l2_trace_scope("arg level expression base");
  173. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  174. struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
  175. if (l2_token_get_kind(tok) == L2_TOK_OPEN_PAREN) {
  176. l2_trace_scope("group expr");
  177. l2_lexer_consume(lexer); // '('
  178. if (parse_expression(lexer, gen, err) < 0) {
  179. return -1;
  180. }
  181. tok = l2_lexer_peek(lexer, 1);
  182. if (l2_token_get_kind(tok) != L2_TOK_CLOSE_PAREN) {
  183. l2_parse_err(err, tok, "Expected '(', got %s",
  184. l2_token_get_name(tok));
  185. return -1;
  186. }
  187. l2_lexer_consume(lexer); // ')'
  188. } else if (l2_token_get_kind(tok) == L2_TOK_IDENT) {
  189. l2_trace_scope("ident");
  190. l2_trace("ident '%s'", tok->v.str);
  191. struct l2_token_value ident = l2_token_extract_val(tok);
  192. l2_lexer_consume(lexer); // ident
  193. if (ident.flags & L2_TOK_SMALL) {
  194. l2_gen_stack_frame_lookup_copy(gen, ident.strbuf);
  195. } else {
  196. l2_gen_stack_frame_lookup(gen, &ident.str);
  197. }
  198. } else if (l2_token_get_kind(tok) == L2_TOK_NUMBER) {
  199. l2_trace_scope("number literal");
  200. l2_trace("number %g", tok->v.num);
  201. double number = tok->v.num;
  202. l2_lexer_consume(lexer); // number
  203. l2_gen_number(gen, number);
  204. } else if (l2_token_get_kind(tok) == L2_TOK_STRING) {
  205. l2_trace_scope("string literal");
  206. l2_trace("string '%s'", tok->v.str);
  207. struct l2_token_value str = l2_token_extract_val(tok);
  208. l2_lexer_consume(lexer); // string
  209. if (str.flags & L2_TOK_SMALL) {
  210. l2_gen_string_copy(gen, str.strbuf);
  211. } else {
  212. l2_gen_string(gen, &str.str);
  213. }
  214. } else if (
  215. l2_token_get_kind(tok) == L2_TOK_QUOT &&
  216. l2_token_get_kind(tok2) == L2_TOK_IDENT) {
  217. l2_trace_scope("atom literal");
  218. l2_trace("atom '%s'", tok->v.str);
  219. struct l2_token_value ident = l2_token_extract_val(tok2);
  220. l2_lexer_consume(lexer); // "'"
  221. l2_lexer_consume(lexer); // ident
  222. if (ident.flags & L2_TOK_SMALL) {
  223. l2_gen_atom_copy(gen, ident.strbuf);
  224. } else {
  225. l2_gen_atom(gen, &ident.str);
  226. }
  227. } else if (l2_token_get_kind(tok) == L2_TOK_OPEN_BRACE) {
  228. if (parse_object_or_function_literal(lexer, gen, err) < 0) {
  229. return -1;
  230. }
  231. } else if (l2_token_get_kind(tok) == L2_TOK_OPEN_BRACKET) {
  232. if (parse_array_literal(lexer, gen, err) < 0) {
  233. return -1;
  234. }
  235. } else {
  236. l2_parse_err(err, tok, "Unexpected token %s",
  237. l2_token_get_name(tok));
  238. return -1;
  239. }
  240. return 0;
  241. }
  242. static int parse_arg_level_expression(
  243. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  244. l2_trace_scope("arg level expression");
  245. if (parse_arg_level_expression_base(lexer, gen, err) < 0) {
  246. return -1;
  247. }
  248. while (1) {
  249. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  250. struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
  251. struct l2_token *tok3 = l2_lexer_peek(lexer, 3);
  252. if (
  253. l2_token_get_kind(tok) == L2_TOK_OPEN_PAREN &&
  254. l2_token_get_kind(tok2) == L2_TOK_CLOSE_PAREN) {
  255. l2_trace_scope("niladic func call");
  256. l2_lexer_consume(lexer); // '('
  257. l2_lexer_consume(lexer); // ')'
  258. l2_gen_func_call(gen, 0);
  259. } else if (
  260. l2_token_get_kind(tok) == L2_TOK_PERIOD &&
  261. l2_token_get_kind(tok2) == L2_TOK_IDENT &&
  262. l2_token_get_kind(tok3) == L2_TOK_EQUALS) {
  263. l2_trace_scope("namespace assign");
  264. l2_trace("ident '%s'", tok2->v.str);
  265. struct l2_token_value ident = l2_token_extract_val(tok2);
  266. l2_lexer_consume(lexer); // '.'
  267. l2_lexer_consume(lexer); // ident
  268. l2_lexer_consume(lexer); // '='
  269. if (parse_expression(lexer, gen, err) < 0) {
  270. if (!(ident.flags & L2_TOK_SMALL)) free(ident.str);
  271. return -1;
  272. }
  273. if (ident.flags & L2_TOK_SMALL) {
  274. l2_gen_namespace_set_copy(gen, ident.strbuf);
  275. } else {
  276. l2_gen_namespace_set(gen, &ident.str);
  277. }
  278. l2_gen_swap_discard(gen);
  279. } else if (
  280. l2_token_get_kind(tok) == L2_TOK_PERIOD &&
  281. l2_token_get_kind(tok2) == L2_TOK_IDENT) {
  282. l2_trace_scope("namespace lookup");
  283. l2_trace("ident '%s'", tok2->v.str);
  284. struct l2_token_value ident = l2_token_extract_val(tok2);
  285. l2_lexer_consume(lexer); // '.'
  286. l2_lexer_consume(lexer); // ident
  287. if (ident.flags & L2_TOK_SMALL) {
  288. l2_gen_namespace_lookup_copy(gen, ident.strbuf);
  289. } else {
  290. l2_gen_namespace_lookup(gen, &ident.str);
  291. }
  292. } else if (
  293. l2_token_get_kind(tok) == L2_TOK_DOT_NUMBER &&
  294. l2_token_get_kind(tok2) == L2_TOK_EQUALS) {
  295. l2_trace_scope("direct array assign");
  296. int number = tok->v.integer;
  297. l2_lexer_consume(lexer); // dot-number
  298. l2_lexer_consume(lexer); // '='
  299. if (parse_expression(lexer, gen, err) < 0) {
  300. return -1;
  301. }
  302. l2_gen_array_set(gen, number);
  303. l2_gen_swap_discard(gen);
  304. } else if (l2_token_get_kind(tok) == L2_TOK_DOT_NUMBER) {
  305. l2_trace_scope("direct array lookup");
  306. int number = tok->v.integer;
  307. l2_lexer_consume(lexer); // dot-number
  308. l2_gen_array_lookup(gen, number);
  309. } else if (
  310. l2_token_get_kind(tok) == L2_TOK_PERIOD &&
  311. l2_token_get_kind(tok2) == L2_TOK_OPEN_PAREN) {
  312. l2_trace_scope("dynamic lookup");
  313. l2_lexer_consume(lexer); // '.'
  314. l2_lexer_consume(lexer); // '('
  315. if (parse_expression(lexer, gen, err) < 0) {
  316. return -1;
  317. }
  318. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) != L2_TOK_CLOSE_PAREN) {
  319. l2_parse_err(err, tok, "Expected '(', got %s",
  320. l2_token_get_name(tok));
  321. return -1;
  322. }
  323. l2_lexer_consume(lexer); // ')'
  324. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_EQUALS) {
  325. l2_lexer_consume(lexer); // '='
  326. if (parse_expression(lexer, gen, err) < 0) {
  327. return -1;
  328. }
  329. l2_gen_dynamic_set(gen);
  330. } else {
  331. l2_gen_dynamic_lookup(gen);
  332. }
  333. } else {
  334. break;
  335. }
  336. }
  337. return 0;
  338. }
  339. static int parse_func_call_after_base(
  340. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  341. l2_trace_scope("func call after base");
  342. size_t argc = 0;
  343. do {
  344. argc += 1;
  345. l2_trace_scope("func call param");
  346. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  347. return -1;
  348. }
  349. } while (!tok_is_end(l2_lexer_peek(lexer, 1)));
  350. // The 'argc' previous expressions were arguments, the one before that was the function
  351. l2_gen_func_call(gen, argc);
  352. return 0;
  353. }
  354. static int parse_expression(
  355. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  356. l2_trace_scope("expression");
  357. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  358. struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
  359. if (
  360. l2_token_get_kind(tok) == L2_TOK_IDENT &&
  361. l2_token_get_kind(tok2) == L2_TOK_COLON_EQ) {
  362. l2_trace_scope("assign expression");
  363. l2_trace("ident '%s'", tok->v.str);
  364. struct l2_token_value ident = l2_token_extract_val(tok);
  365. l2_lexer_consume(lexer); // ident
  366. l2_lexer_consume(lexer); // :=
  367. if (parse_expression(lexer, gen, err) < 0) {
  368. if (!(ident.flags & L2_TOK_SMALL)) free(ident.str);
  369. return -1;
  370. }
  371. if (ident.flags & L2_TOK_SMALL) {
  372. l2_gen_stack_frame_set_copy(gen, ident.strbuf);
  373. } else {
  374. l2_gen_stack_frame_set(gen, &ident.str);
  375. }
  376. } else if (
  377. l2_token_get_kind(tok) == L2_TOK_IDENT &&
  378. l2_token_get_kind(tok2) == L2_TOK_EQUALS) {
  379. l2_trace_scope("replacement assign expression");
  380. l2_trace("ident '%s'", tok->v.str);
  381. struct l2_token_value ident = l2_token_extract_val(tok);
  382. l2_lexer_consume(lexer); // ident
  383. l2_lexer_consume(lexer); // =
  384. if (parse_expression(lexer, gen, err) < 0) {
  385. if (!(ident.flags & L2_TOK_SMALL)) free(ident.str);
  386. return -1;
  387. }
  388. if (ident.flags & L2_TOK_SMALL) {
  389. l2_gen_stack_frame_replace_copy(gen, ident.strbuf);
  390. } else {
  391. l2_gen_stack_frame_replace(gen, &ident.str);
  392. }
  393. } else {
  394. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  395. return -1;
  396. }
  397. if (!tok_is_end(l2_lexer_peek(lexer, 1))) {
  398. if (parse_func_call_after_base(lexer, gen, err) < 0) {
  399. return -1;
  400. }
  401. }
  402. }
  403. return 0;
  404. }
  405. int l2_parse_program(
  406. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  407. l2_trace_scope("program");
  408. while (1) {
  409. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  410. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_EOF) {
  411. break;
  412. }
  413. if (parse_expression(lexer, gen, err) < 0) {
  414. l2_gen_halt(gen);
  415. l2_gen_flush(gen);
  416. return -1;
  417. }
  418. l2_gen_discard(gen);
  419. }
  420. l2_gen_halt(gen);
  421. l2_gen_flush(gen);
  422. return 0;
  423. }