You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

parse.c 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554
  1. #include "parse/parse.h"
  2. #include "trace.h"
  3. #include "gen/gen.h"
  4. static int tok_is_end(struct l2_token *tok) {
  5. enum l2_token_kind kind = l2_token_get_kind(tok);
  6. return
  7. kind == L2_TOK_CLOSE_BRACE || kind == L2_TOK_CLOSE_BRACKET ||
  8. kind == L2_TOK_CLOSE_PAREN || kind == L2_TOK_EOF ||
  9. kind == L2_TOK_EOL;
  10. }
  11. static int tok_is_infix(struct l2_token *tok) {
  12. if (l2_token_get_kind(tok) != L2_TOK_IDENT) return 0;
  13. char *str;
  14. if (l2_token_is_small(tok)) {
  15. str = tok->v.strbuf;
  16. } else {
  17. str = tok->v.str;
  18. }
  19. return
  20. (str[0] == '$' && str[1] != '\0') ||
  21. strcmp(str, "+") == 0 ||
  22. strcmp(str, "-") == 0 ||
  23. strcmp(str, "*") == 0 ||
  24. strcmp(str, "/") == 0;
  25. }
  26. static int parse_expression(
  27. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err);
  28. static int parse_arg_level_expression(
  29. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err);
  30. static int parse_object_literal(
  31. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  32. l2_trace_scope("object literal");
  33. // '{' and EOL already skipped by parse_object_or_function_literal
  34. l2_gen_namespace(gen);
  35. while (1) {
  36. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  37. if (l2_token_get_kind(tok) == L2_TOK_CLOSE_BRACE) {
  38. l2_lexer_consume(lexer); // '}'
  39. break;
  40. } else if (l2_token_get_kind(tok) != L2_TOK_IDENT) {
  41. l2_parse_err(err, tok, "In object literal: Expected identifier, got %s",
  42. l2_token_get_name(tok));
  43. return -1;
  44. }
  45. l2_trace("key: '%s'", tok->v.str);
  46. struct l2_token_value key = l2_token_extract_val(tok);
  47. l2_lexer_consume(lexer); // ident
  48. tok = l2_lexer_peek(lexer, 1);
  49. if (l2_token_get_kind(tok) != L2_TOK_COLON) {
  50. if (!(key.flags & L2_TOK_SMALL)) free(key.str);
  51. l2_parse_err(err, tok, "In object literal: Expected ':', got %s",
  52. l2_token_get_name(tok));
  53. return -1;
  54. }
  55. l2_lexer_consume(lexer); // ':'
  56. if (parse_expression(lexer, gen, err) < 0) {
  57. if (!(key.flags & L2_TOK_SMALL)) free(key.str);
  58. return -1;
  59. }
  60. if (key.flags & L2_TOK_SMALL) {
  61. l2_gen_namespace_set_copy(gen, key.strbuf);
  62. } else {
  63. l2_gen_namespace_set_copy(gen, key.str);
  64. }
  65. l2_gen_discard(gen);
  66. tok = l2_lexer_peek(lexer, 1);
  67. if (
  68. l2_token_get_kind(tok) != L2_TOK_EOL &&
  69. l2_token_get_kind(tok) != L2_TOK_CLOSE_BRACE) {
  70. l2_parse_err(err, tok, "In object literal: Expected EOL or '}', got %s",
  71. l2_token_get_name(tok));
  72. return -1;
  73. }
  74. if (l2_token_get_kind(tok) == L2_TOK_EOL) {
  75. l2_lexer_consume(lexer); // EOL
  76. }
  77. }
  78. return 0;
  79. }
  80. static int parse_function_literal_impl(
  81. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  82. l2_trace_scope("function literal");
  83. // '{' and EOL already skipped by parse_object_or_function_literal
  84. // The arguments array will be at the top of the stack
  85. char *ident = "$";
  86. l2_gen_stack_frame_set_copy(gen, ident);
  87. int first = 1;
  88. while (1) {
  89. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_CLOSE_BRACE) {
  90. l2_lexer_consume(lexer); // '}'
  91. break;
  92. }
  93. if (!first) {
  94. l2_gen_discard(gen);
  95. }
  96. l2_trace_scope("function literal expression");
  97. if (parse_expression(lexer, gen, err) < 0) {
  98. return -1;
  99. }
  100. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  101. first = 0;
  102. }
  103. // All functions must put _something_ on the stack
  104. if (first) {
  105. l2_gen_none(gen);
  106. }
  107. l2_gen_ret(gen);
  108. return 0;
  109. }
  110. static int parse_function_literal(
  111. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  112. l2_gen_flush(gen);
  113. struct l2_io_writer *prev_writer = gen->writer.w;
  114. // Generate the function to a buffer in memory
  115. struct l2_io_mem_writer w = {0};
  116. w.w.write = l2_io_mem_write;
  117. gen->writer.w = &w.w;
  118. // Generates two words; RJMP, 0
  119. l2_gen_rjmp(gen, 0);
  120. l2_word pos = gen->pos;
  121. // Generate the function body itself
  122. int ret = parse_function_literal_impl(lexer, gen, err);
  123. l2_gen_flush(gen);
  124. gen->writer.w = prev_writer;
  125. if (ret < 0) {
  126. free(w.mem);
  127. return -1;
  128. }
  129. l2_word *ops = w.mem;
  130. l2_word opcount = w.len / sizeof(l2_word);
  131. // Due to the earlier gen_rjmp, the second word will be the argument to RJMP.
  132. // Need to set it properly to skip the function body.
  133. // The '- 2' is because we don't skip the RJMP, <count> sequence.
  134. ops[1] = opcount - 2;
  135. l2_bufio_put_n(&gen->writer, ops, opcount * sizeof(l2_word));
  136. free(w.mem);
  137. l2_gen_function(gen, pos);
  138. return 0;
  139. }
  140. static int parse_object_or_function_literal(
  141. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  142. l2_trace_scope("object or function literal");
  143. l2_lexer_consume(lexer); // '{'
  144. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  145. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  146. struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
  147. if (l2_token_get_kind(tok) == L2_TOK_CLOSE_BRACE) {
  148. l2_trace_scope("empty object literal");
  149. l2_lexer_consume(lexer); // '}'
  150. l2_gen_namespace(gen);
  151. } else if (
  152. l2_token_get_kind(tok) == L2_TOK_IDENT &&
  153. l2_token_get_kind(tok2) == L2_TOK_COLON) {
  154. if (parse_object_literal(lexer, gen, err) < 0) {
  155. return -1;
  156. }
  157. } else {
  158. if (parse_function_literal(lexer, gen, err) < 0) {
  159. return -1;
  160. }
  161. }
  162. return 0;
  163. }
  164. static int parse_array_literal(
  165. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  166. l2_trace_scope("array literal");
  167. l2_lexer_consume(lexer); // '['
  168. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  169. int count = 0;
  170. while (1) {
  171. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_CLOSE_BRACKET) {
  172. l2_lexer_consume(lexer); // ']'
  173. break;
  174. }
  175. count += 1;
  176. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  177. return -1;
  178. }
  179. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  180. }
  181. l2_gen_array(gen, count);
  182. return 0;
  183. }
  184. static int parse_arg_level_expression_base(
  185. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  186. l2_trace_scope("arg level expression base");
  187. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  188. struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
  189. if (l2_token_get_kind(tok) == L2_TOK_OPEN_PAREN) {
  190. l2_trace_scope("group expr");
  191. l2_lexer_consume(lexer); // '('
  192. if (parse_expression(lexer, gen, err) < 0) {
  193. return -1;
  194. }
  195. tok = l2_lexer_peek(lexer, 1);
  196. if (l2_token_get_kind(tok) != L2_TOK_CLOSE_PAREN) {
  197. l2_parse_err(err, tok, "Expected '(', got %s",
  198. l2_token_get_name(tok));
  199. return -1;
  200. }
  201. l2_lexer_consume(lexer); // ')'
  202. } else if (l2_token_get_kind(tok) == L2_TOK_IDENT) {
  203. l2_trace_scope("ident");
  204. l2_trace("ident '%s'", tok->v.str);
  205. struct l2_token_value ident = l2_token_extract_val(tok);
  206. l2_lexer_consume(lexer); // ident
  207. if (ident.flags & L2_TOK_SMALL) {
  208. l2_gen_stack_frame_lookup_copy(gen, ident.strbuf);
  209. } else {
  210. l2_gen_stack_frame_lookup(gen, &ident.str);
  211. }
  212. } else if (l2_token_get_kind(tok) == L2_TOK_NUMBER) {
  213. l2_trace_scope("number literal");
  214. l2_trace("number %g", tok->v.num);
  215. double number = tok->v.num;
  216. l2_lexer_consume(lexer); // number
  217. l2_gen_number(gen, number);
  218. } else if (l2_token_get_kind(tok) == L2_TOK_STRING) {
  219. l2_trace_scope("string literal");
  220. l2_trace("string '%s'", tok->v.str);
  221. struct l2_token_value str = l2_token_extract_val(tok);
  222. l2_lexer_consume(lexer); // string
  223. if (str.flags & L2_TOK_SMALL) {
  224. l2_gen_string_copy(gen, str.strbuf);
  225. } else {
  226. l2_gen_string(gen, &str.str);
  227. }
  228. } else if (
  229. l2_token_get_kind(tok) == L2_TOK_QUOT &&
  230. l2_token_get_kind(tok2) == L2_TOK_IDENT) {
  231. l2_trace_scope("atom literal");
  232. l2_trace("atom '%s'", tok->v.str);
  233. struct l2_token_value ident = l2_token_extract_val(tok2);
  234. l2_lexer_consume(lexer); // "'"
  235. l2_lexer_consume(lexer); // ident
  236. if (ident.flags & L2_TOK_SMALL) {
  237. l2_gen_atom_copy(gen, ident.strbuf);
  238. } else {
  239. l2_gen_atom(gen, &ident.str);
  240. }
  241. } else if (l2_token_get_kind(tok) == L2_TOK_OPEN_BRACE) {
  242. if (parse_object_or_function_literal(lexer, gen, err) < 0) {
  243. return -1;
  244. }
  245. } else if (l2_token_get_kind(tok) == L2_TOK_OPEN_BRACKET) {
  246. if (parse_array_literal(lexer, gen, err) < 0) {
  247. return -1;
  248. }
  249. } else {
  250. l2_parse_err(err, tok, "Unexpected token %s",
  251. l2_token_get_name(tok));
  252. return -1;
  253. }
  254. return 0;
  255. }
  256. static int parse_arg_level_expression(
  257. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  258. l2_trace_scope("arg level expression");
  259. if (parse_arg_level_expression_base(lexer, gen, err) < 0) {
  260. return -1;
  261. }
  262. while (1) {
  263. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  264. struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
  265. struct l2_token *tok3 = l2_lexer_peek(lexer, 3);
  266. if (
  267. l2_token_get_kind(tok) == L2_TOK_OPEN_PAREN &&
  268. l2_token_get_kind(tok2) == L2_TOK_CLOSE_PAREN) {
  269. l2_trace_scope("niladic func call");
  270. l2_lexer_consume(lexer); // '('
  271. l2_lexer_consume(lexer); // ')'
  272. l2_gen_func_call(gen, 0);
  273. } else if (
  274. l2_token_get_kind(tok) == L2_TOK_PERIOD &&
  275. l2_token_get_kind(tok2) == L2_TOK_IDENT &&
  276. l2_token_get_kind(tok3) == L2_TOK_EQUALS) {
  277. l2_trace_scope("namespace assign");
  278. l2_trace("ident '%s'", tok2->v.str);
  279. struct l2_token_value ident = l2_token_extract_val(tok2);
  280. l2_lexer_consume(lexer); // '.'
  281. l2_lexer_consume(lexer); // ident
  282. l2_lexer_consume(lexer); // '='
  283. if (parse_expression(lexer, gen, err) < 0) {
  284. if (!(ident.flags & L2_TOK_SMALL)) free(ident.str);
  285. return -1;
  286. }
  287. if (ident.flags & L2_TOK_SMALL) {
  288. l2_gen_namespace_set_copy(gen, ident.strbuf);
  289. } else {
  290. l2_gen_namespace_set(gen, &ident.str);
  291. }
  292. l2_gen_swap_discard(gen);
  293. } else if (
  294. l2_token_get_kind(tok) == L2_TOK_PERIOD &&
  295. l2_token_get_kind(tok2) == L2_TOK_IDENT) {
  296. l2_trace_scope("namespace lookup");
  297. l2_trace("ident '%s'", tok2->v.str);
  298. struct l2_token_value ident = l2_token_extract_val(tok2);
  299. l2_lexer_consume(lexer); // '.'
  300. l2_lexer_consume(lexer); // ident
  301. if (ident.flags & L2_TOK_SMALL) {
  302. l2_gen_namespace_lookup_copy(gen, ident.strbuf);
  303. } else {
  304. l2_gen_namespace_lookup(gen, &ident.str);
  305. }
  306. } else if (
  307. l2_token_get_kind(tok) == L2_TOK_DOT_NUMBER &&
  308. l2_token_get_kind(tok2) == L2_TOK_EQUALS) {
  309. l2_trace_scope("direct array assign");
  310. int number = tok->v.integer;
  311. l2_lexer_consume(lexer); // dot-number
  312. l2_lexer_consume(lexer); // '='
  313. if (parse_expression(lexer, gen, err) < 0) {
  314. return -1;
  315. }
  316. l2_gen_array_set(gen, number);
  317. l2_gen_swap_discard(gen);
  318. } else if (l2_token_get_kind(tok) == L2_TOK_DOT_NUMBER) {
  319. l2_trace_scope("direct array lookup");
  320. int number = tok->v.integer;
  321. l2_lexer_consume(lexer); // dot-number
  322. l2_gen_array_lookup(gen, number);
  323. } else if (
  324. l2_token_get_kind(tok) == L2_TOK_PERIOD &&
  325. l2_token_get_kind(tok2) == L2_TOK_OPEN_PAREN) {
  326. l2_trace_scope("dynamic lookup");
  327. l2_lexer_consume(lexer); // '.'
  328. l2_lexer_consume(lexer); // '('
  329. if (parse_expression(lexer, gen, err) < 0) {
  330. return -1;
  331. }
  332. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) != L2_TOK_CLOSE_PAREN) {
  333. l2_parse_err(err, tok, "Expected '(', got %s",
  334. l2_token_get_name(tok));
  335. return -1;
  336. }
  337. l2_lexer_consume(lexer); // ')'
  338. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_EQUALS) {
  339. l2_lexer_consume(lexer); // '='
  340. if (parse_expression(lexer, gen, err) < 0) {
  341. return -1;
  342. }
  343. l2_gen_dynamic_set(gen);
  344. } else {
  345. l2_gen_dynamic_lookup(gen);
  346. }
  347. } else {
  348. break;
  349. }
  350. }
  351. return 0;
  352. }
  353. static int parse_func_call_after_base(
  354. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  355. l2_trace_scope("func call after base");
  356. size_t argc = 0;
  357. do {
  358. if (tok_is_infix(l2_lexer_peek(lexer, 1))) {
  359. do {
  360. // We already have one value (the lhs) on the stack,
  361. // so we need to parse the operator, then the rhs
  362. // Operator
  363. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  364. return -1;
  365. }
  366. // RHS
  367. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  368. return -1;
  369. }
  370. l2_gen_func_call_infix(gen);
  371. } while (tok_is_infix(l2_lexer_peek(lexer, 1)));
  372. // If this was the "first argument", this wasn't a function call
  373. // after all, it was just a (series of?) infix calls.
  374. if (argc == 0) {
  375. return 0;
  376. }
  377. // Don't increment argc here, because after an infix, we have
  378. // neither added nor removed an arguemnt, just transformed one
  379. } else {
  380. l2_trace_scope("func call param");
  381. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  382. return -1;
  383. }
  384. argc += 1;
  385. }
  386. } while (!tok_is_end(l2_lexer_peek(lexer, 1)));
  387. // The 'argc' previous expressions were arguments, the one before that was the function
  388. l2_gen_func_call(gen, argc);
  389. return 0;
  390. }
  391. static int parse_expression(
  392. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  393. l2_trace_scope("expression");
  394. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  395. struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
  396. if (
  397. l2_token_get_kind(tok) == L2_TOK_IDENT &&
  398. l2_token_get_kind(tok2) == L2_TOK_COLON_EQ) {
  399. l2_trace_scope("assign expression");
  400. l2_trace("ident '%s'", tok->v.str);
  401. struct l2_token_value ident = l2_token_extract_val(tok);
  402. l2_lexer_consume(lexer); // ident
  403. l2_lexer_consume(lexer); // :=
  404. if (parse_expression(lexer, gen, err) < 0) {
  405. if (!(ident.flags & L2_TOK_SMALL)) free(ident.str);
  406. return -1;
  407. }
  408. if (ident.flags & L2_TOK_SMALL) {
  409. l2_gen_stack_frame_set_copy(gen, ident.strbuf);
  410. } else {
  411. l2_gen_stack_frame_set(gen, &ident.str);
  412. }
  413. } else if (
  414. l2_token_get_kind(tok) == L2_TOK_IDENT &&
  415. l2_token_get_kind(tok2) == L2_TOK_EQUALS) {
  416. l2_trace_scope("replacement assign expression");
  417. l2_trace("ident '%s'", tok->v.str);
  418. struct l2_token_value ident = l2_token_extract_val(tok);
  419. l2_lexer_consume(lexer); // ident
  420. l2_lexer_consume(lexer); // =
  421. if (parse_expression(lexer, gen, err) < 0) {
  422. if (!(ident.flags & L2_TOK_SMALL)) free(ident.str);
  423. return -1;
  424. }
  425. if (ident.flags & L2_TOK_SMALL) {
  426. l2_gen_stack_frame_replace_copy(gen, ident.strbuf);
  427. } else {
  428. l2_gen_stack_frame_replace(gen, &ident.str);
  429. }
  430. } else {
  431. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  432. return -1;
  433. }
  434. if (!tok_is_end(l2_lexer_peek(lexer, 1))) {
  435. if (parse_func_call_after_base(lexer, gen, err) < 0) {
  436. return -1;
  437. }
  438. }
  439. }
  440. return 0;
  441. }
  442. int l2_parse_program(
  443. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  444. l2_trace_scope("program");
  445. while (1) {
  446. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  447. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_EOF) {
  448. break;
  449. }
  450. if (parse_expression(lexer, gen, err) < 0) {
  451. l2_gen_halt(gen);
  452. l2_gen_flush(gen);
  453. return -1;
  454. }
  455. l2_gen_discard(gen);
  456. }
  457. l2_gen_halt(gen);
  458. l2_gen_flush(gen);
  459. return 0;
  460. }