You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

parse.c 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583
  1. #include "parse/parse.h"
  2. #include "trace.h"
  3. #include "gen/gen.h"
  4. static int tok_is_end(struct l2_token *tok) {
  5. enum l2_token_kind kind = l2_token_get_kind(tok);
  6. return
  7. kind == L2_TOK_CLOSE_BRACE || kind == L2_TOK_CLOSE_BRACKET ||
  8. kind == L2_TOK_CLOSE_PAREN || kind == L2_TOK_EOF ||
  9. kind == L2_TOK_EOL;
  10. }
  11. static int tok_is_infix(struct l2_token *tok) {
  12. if (l2_token_get_kind(tok) != L2_TOK_IDENT) return 0;
  13. const char *str = l2_token_get_str(tok);
  14. return
  15. (str[0] == '$' && str[1] != '\0') ||
  16. strcmp(str, "+") == 0 ||
  17. strcmp(str, "-") == 0 ||
  18. strcmp(str, "*") == 0 ||
  19. strcmp(str, "/") == 0 ||
  20. strcmp(str, "==") == 0 ||
  21. strcmp(str, "!=") == 0 ||
  22. strcmp(str, "<") == 0 ||
  23. strcmp(str, "<=") == 0 ||
  24. strcmp(str, ">") == 0 ||
  25. strcmp(str, ">=") == 0 ||
  26. strcmp(str, "&&") == 0 ||
  27. strcmp(str, "||") == 0 ||
  28. strcmp(str, "??") == 0;
  29. }
  30. static int parse_expression(
  31. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err);
  32. static int parse_arg_level_expression(
  33. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err);
  34. static int parse_object_literal(
  35. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  36. l2_trace_scope("object literal");
  37. // '{' and EOL already skipped by parse_object_or_function_literal
  38. l2_gen_namespace(gen);
  39. while (1) {
  40. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  41. if (l2_token_get_kind(tok) == L2_TOK_CLOSE_BRACE) {
  42. l2_lexer_consume(lexer); // '}'
  43. break;
  44. } else if (l2_token_get_kind(tok) != L2_TOK_IDENT) {
  45. l2_parse_err(err, tok, "In object literal: Expected identifier, got %s",
  46. l2_token_get_name(tok));
  47. return -1;
  48. }
  49. l2_trace("key: '%s'", l2_token_get_str(tok));
  50. struct l2_token_value key = l2_token_extract_val(tok);
  51. l2_lexer_consume(lexer); // ident
  52. tok = l2_lexer_peek(lexer, 1);
  53. if (l2_token_get_kind(tok) != L2_TOK_COLON) {
  54. if (!(key.flags & L2_TOK_SMALL)) free(key.str);
  55. l2_parse_err(err, tok, "In object literal: Expected ':', got %s",
  56. l2_token_get_name(tok));
  57. return -1;
  58. }
  59. l2_lexer_consume(lexer); // ':'
  60. if (parse_expression(lexer, gen, err) < 0) {
  61. if (!(key.flags & L2_TOK_SMALL)) free(key.str);
  62. return -1;
  63. }
  64. if (key.flags & L2_TOK_SMALL) {
  65. l2_gen_namespace_set_copy(gen, key.strbuf);
  66. } else {
  67. l2_gen_namespace_set_copy(gen, key.str);
  68. }
  69. l2_gen_discard(gen);
  70. tok = l2_lexer_peek(lexer, 1);
  71. if (
  72. l2_token_get_kind(tok) != L2_TOK_EOL &&
  73. l2_token_get_kind(tok) != L2_TOK_CLOSE_BRACE) {
  74. l2_parse_err(err, tok, "In object literal: Expected EOL or '}', got %s",
  75. l2_token_get_name(tok));
  76. return -1;
  77. }
  78. if (l2_token_get_kind(tok) == L2_TOK_EOL) {
  79. l2_lexer_consume(lexer); // EOL
  80. }
  81. }
  82. return 0;
  83. }
  84. static int parse_function_literal_impl(
  85. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  86. l2_trace_scope("function literal");
  87. // '{' and EOL already skipped by parse_object_or_function_literal
  88. // The arguments array will be at the top of the stack
  89. char *ident = "$";
  90. l2_gen_stack_frame_set_copy(gen, ident);
  91. int first = 1;
  92. while (1) {
  93. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_CLOSE_BRACE) {
  94. l2_lexer_consume(lexer); // '}'
  95. break;
  96. }
  97. if (!first) {
  98. l2_gen_discard(gen);
  99. }
  100. l2_trace_scope("function literal expression");
  101. if (parse_expression(lexer, gen, err) < 0) {
  102. return -1;
  103. }
  104. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  105. first = 0;
  106. }
  107. // All functions must put _something_ on the stack
  108. if (first) {
  109. l2_gen_none(gen);
  110. }
  111. l2_gen_ret(gen);
  112. return 0;
  113. }
  114. static int parse_function_literal(
  115. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  116. l2_gen_flush(gen);
  117. struct l2_io_writer *prev_writer = gen->writer.w;
  118. // Generate the function to a buffer in memory
  119. struct l2_io_mem_writer w = {0};
  120. w.w.write = l2_io_mem_write;
  121. gen->writer.w = &w.w;
  122. // Generates five bytes; RJMP, then 4 byte counter
  123. l2_gen_rjmp_placeholder(gen);
  124. l2_word pos = gen->pos;
  125. // Generate the function body itself
  126. int ret = parse_function_literal_impl(lexer, gen, err);
  127. l2_gen_flush(gen);
  128. gen->writer.w = prev_writer;
  129. if (ret < 0) {
  130. free(w.mem);
  131. return -1;
  132. }
  133. unsigned char *bc = w.mem;
  134. l2_word jdist = w.len - 5;
  135. // Write the jump distance (little endian)
  136. bc[1] = (jdist >> 0) & 0xff;
  137. bc[2] = (jdist >> 8) & 0xff;
  138. bc[3] = (jdist >> 16) & 0xff;
  139. bc[4] = (jdist >> 24) & 0xff;
  140. l2_bufio_put_n(&gen->writer, bc, w.len);
  141. free(w.mem);
  142. l2_gen_function(gen, pos);
  143. return 0;
  144. }
  145. static int parse_object_or_function_literal(
  146. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  147. l2_trace_scope("object or function literal");
  148. l2_lexer_consume(lexer); // '{'
  149. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  150. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  151. struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
  152. if (l2_token_get_kind(tok) == L2_TOK_CLOSE_BRACE) {
  153. l2_trace_scope("empty object literal");
  154. l2_lexer_consume(lexer); // '}'
  155. l2_gen_namespace(gen);
  156. } else if (
  157. l2_token_get_kind(tok) == L2_TOK_IDENT &&
  158. l2_token_get_kind(tok2) == L2_TOK_COLON) {
  159. if (parse_object_literal(lexer, gen, err) < 0) {
  160. return -1;
  161. }
  162. } else {
  163. if (parse_function_literal(lexer, gen, err) < 0) {
  164. return -1;
  165. }
  166. }
  167. return 0;
  168. }
  169. static int parse_array_literal(
  170. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  171. l2_trace_scope("array literal");
  172. l2_lexer_consume(lexer); // '['
  173. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  174. int count = 0;
  175. while (1) {
  176. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_CLOSE_BRACKET) {
  177. l2_lexer_consume(lexer); // ']'
  178. break;
  179. }
  180. count += 1;
  181. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  182. return -1;
  183. }
  184. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  185. }
  186. l2_gen_array(gen, count);
  187. return 0;
  188. }
  189. static int parse_arg_level_expression_base(
  190. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  191. l2_trace_scope("arg level expression base");
  192. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  193. struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
  194. if (l2_token_get_kind(tok) == L2_TOK_OPEN_PAREN) {
  195. l2_trace_scope("group expr");
  196. l2_lexer_consume(lexer); // '('
  197. if (parse_expression(lexer, gen, err) < 0) {
  198. return -1;
  199. }
  200. tok = l2_lexer_peek(lexer, 1);
  201. if (l2_token_get_kind(tok) != L2_TOK_CLOSE_PAREN) {
  202. l2_parse_err(err, tok, "Expected ')', got %s",
  203. l2_token_get_name(tok));
  204. return -1;
  205. }
  206. l2_lexer_consume(lexer); // ')'
  207. } else if (l2_token_get_kind(tok) == L2_TOK_IDENT) {
  208. l2_trace_scope("ident");
  209. l2_trace("ident '%s'", l2_token_get_str(tok));
  210. struct l2_token_value ident = l2_token_extract_val(tok);
  211. l2_lexer_consume(lexer); // ident
  212. if (ident.flags & L2_TOK_SMALL) {
  213. l2_gen_stack_frame_lookup_copy(gen, ident.strbuf);
  214. } else {
  215. l2_gen_stack_frame_lookup(gen, &ident.str);
  216. }
  217. } else if (l2_token_get_kind(tok) == L2_TOK_NUMBER) {
  218. l2_trace_scope("number literal");
  219. l2_trace("number %g", tok->v.num);
  220. double number = tok->v.num;
  221. l2_lexer_consume(lexer); // number
  222. l2_gen_number(gen, number);
  223. } else if (l2_token_get_kind(tok) == L2_TOK_STRING) {
  224. l2_trace_scope("string literal");
  225. l2_trace("string '%s'", l2_token_get_str(tok));
  226. struct l2_token_value str = l2_token_extract_val(tok);
  227. l2_lexer_consume(lexer); // string
  228. if (str.flags & L2_TOK_SMALL) {
  229. l2_gen_string_copy(gen, str.strbuf);
  230. } else {
  231. l2_gen_string(gen, &str.str);
  232. }
  233. } else if (
  234. l2_token_get_kind(tok) == L2_TOK_QUOT &&
  235. l2_token_get_kind(tok2) == L2_TOK_IDENT) {
  236. l2_trace_scope("atom literal");
  237. l2_trace("atom '%s'", l2_token_get_str(tok2));
  238. struct l2_token_value ident = l2_token_extract_val(tok2);
  239. l2_lexer_consume(lexer); // "'"
  240. l2_lexer_consume(lexer); // ident
  241. if (ident.flags & L2_TOK_SMALL) {
  242. l2_gen_atom_copy(gen, ident.strbuf);
  243. } else {
  244. l2_gen_atom(gen, &ident.str);
  245. }
  246. } else if (l2_token_get_kind(tok) == L2_TOK_OPEN_BRACE) {
  247. if (parse_object_or_function_literal(lexer, gen, err) < 0) {
  248. return -1;
  249. }
  250. } else if (l2_token_get_kind(tok) == L2_TOK_OPEN_BRACKET) {
  251. if (parse_array_literal(lexer, gen, err) < 0) {
  252. return -1;
  253. }
  254. } else {
  255. l2_parse_err(err, tok, "Unexpected token %s",
  256. l2_token_get_name(tok));
  257. return -1;
  258. }
  259. return 0;
  260. }
  261. static int parse_func_call_after_base(
  262. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err,
  263. size_t infix_start) {
  264. l2_trace_scope("func call after base");
  265. size_t argc = 0;
  266. do {
  267. if (argc >= infix_start && tok_is_infix(l2_lexer_peek(lexer, 1))) {
  268. do {
  269. // We already have one value (the lhs) on the stack,
  270. // so we need to parse the operator, then the rhs
  271. // Operator
  272. int ret = parse_arg_level_expression(lexer, gen, err);
  273. if (ret < 0) {
  274. return -1;
  275. }
  276. // If the operator wasn't just the one base expression,
  277. // abort; we're not doing the infix call
  278. if (ret == 1) {
  279. argc += 1;
  280. break;
  281. }
  282. // RHS
  283. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  284. return -1;
  285. }
  286. l2_gen_func_call_infix(gen);
  287. } while (tok_is_infix(l2_lexer_peek(lexer, 1)));
  288. // If this was the "first argument", this wasn't a function call
  289. // after all, it was just a (series of?) infix calls.
  290. if (argc == 0) {
  291. return 0;
  292. }
  293. // Don't increment argc here, because after an infix, we have
  294. // neither added nor removed an arguemnt, just transformed one
  295. } else {
  296. l2_trace_scope("func call param");
  297. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  298. return -1;
  299. }
  300. argc += 1;
  301. }
  302. } while (!tok_is_end(l2_lexer_peek(lexer, 1)));
  303. // The 'argc' previous expressions were arguments, the one before that was the function
  304. l2_gen_func_call(gen, argc);
  305. return 0;
  306. }
  307. static int parse_arg_level_expression(
  308. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  309. l2_trace_scope("arg level expression");
  310. if (parse_arg_level_expression_base(lexer, gen, err) < 0) {
  311. return -1;
  312. }
  313. int ret = 0;
  314. while (1) {
  315. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  316. struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
  317. struct l2_token *tok3 = l2_lexer_peek(lexer, 3);
  318. if (l2_token_get_kind(tok) == L2_TOK_OPEN_PAREN_NS) {
  319. l2_trace_scope("parenthesized func call");
  320. l2_lexer_consume(lexer); // '('
  321. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_CLOSE_PAREN) {
  322. l2_lexer_consume(lexer); // ')'
  323. l2_gen_func_call(gen, 0);
  324. } else {
  325. if (parse_func_call_after_base(lexer, gen, err, 1) < 0) {
  326. return -1;
  327. }
  328. tok = l2_lexer_peek(lexer, 1);
  329. if (l2_token_get_kind(tok) != L2_TOK_CLOSE_PAREN) {
  330. l2_parse_err(err, tok, "Expected ')', got %s",
  331. l2_token_get_name(tok));
  332. return -1;
  333. }
  334. l2_lexer_consume(lexer); // ')'
  335. }
  336. } else if (
  337. l2_token_get_kind(tok) == L2_TOK_PERIOD &&
  338. l2_token_get_kind(tok2) == L2_TOK_IDENT &&
  339. l2_token_get_kind(tok3) == L2_TOK_EQUALS) {
  340. l2_trace_scope("namespace assign");
  341. l2_trace("ident '%s'", l2_token_get_str(tok2));
  342. struct l2_token_value ident = l2_token_extract_val(tok2);
  343. l2_lexer_consume(lexer); // '.'
  344. l2_lexer_consume(lexer); // ident
  345. l2_lexer_consume(lexer); // '='
  346. if (parse_expression(lexer, gen, err) < 0) {
  347. if (!(ident.flags & L2_TOK_SMALL)) free(ident.str);
  348. return -1;
  349. }
  350. if (ident.flags & L2_TOK_SMALL) {
  351. l2_gen_namespace_set_copy(gen, ident.strbuf);
  352. } else {
  353. l2_gen_namespace_set(gen, &ident.str);
  354. }
  355. l2_gen_swap_discard(gen);
  356. } else if (
  357. l2_token_get_kind(tok) == L2_TOK_PERIOD &&
  358. l2_token_get_kind(tok2) == L2_TOK_IDENT) {
  359. l2_trace_scope("namespace lookup");
  360. l2_trace("ident '%s'", l2_token_get_str(tok2));
  361. struct l2_token_value ident = l2_token_extract_val(tok2);
  362. l2_lexer_consume(lexer); // '.'
  363. l2_lexer_consume(lexer); // ident
  364. if (ident.flags & L2_TOK_SMALL) {
  365. l2_gen_namespace_lookup_copy(gen, ident.strbuf);
  366. } else {
  367. l2_gen_namespace_lookup(gen, &ident.str);
  368. }
  369. } else if (
  370. l2_token_get_kind(tok) == L2_TOK_DOT_NUMBER &&
  371. l2_token_get_kind(tok2) == L2_TOK_EQUALS) {
  372. l2_trace_scope("direct array assign");
  373. int number = tok->v.integer;
  374. l2_lexer_consume(lexer); // dot-number
  375. l2_lexer_consume(lexer); // '='
  376. if (parse_expression(lexer, gen, err) < 0) {
  377. return -1;
  378. }
  379. l2_gen_array_set(gen, number);
  380. l2_gen_swap_discard(gen);
  381. } else if (l2_token_get_kind(tok) == L2_TOK_DOT_NUMBER) {
  382. l2_trace_scope("direct array lookup");
  383. int number = tok->v.integer;
  384. l2_lexer_consume(lexer); // dot-number
  385. l2_gen_array_lookup(gen, number);
  386. } else if (
  387. l2_token_get_kind(tok) == L2_TOK_PERIOD &&
  388. l2_token_get_kind(tok2) == L2_TOK_OPEN_PAREN_NS) {
  389. l2_trace_scope("dynamic lookup");
  390. l2_lexer_consume(lexer); // '.'
  391. l2_lexer_consume(lexer); // '('
  392. if (parse_expression(lexer, gen, err) < 0) {
  393. return -1;
  394. }
  395. tok = l2_lexer_peek(lexer, 1);
  396. if (l2_token_get_kind(tok) != L2_TOK_CLOSE_PAREN) {
  397. l2_parse_err(err, tok, "Expected ')', got %s",
  398. l2_token_get_name(tok));
  399. return -1;
  400. }
  401. l2_lexer_consume(lexer); // ')'
  402. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_EQUALS) {
  403. l2_lexer_consume(lexer); // '='
  404. if (parse_expression(lexer, gen, err) < 0) {
  405. return -1;
  406. }
  407. l2_gen_dynamic_set(gen);
  408. } else {
  409. l2_gen_dynamic_lookup(gen);
  410. }
  411. } else {
  412. break;
  413. }
  414. ret = 1;
  415. }
  416. return ret;
  417. }
  418. static int parse_expression(
  419. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  420. l2_trace_scope("expression");
  421. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  422. struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
  423. if (
  424. l2_token_get_kind(tok) == L2_TOK_IDENT &&
  425. l2_token_get_kind(tok2) == L2_TOK_COLON_EQ) {
  426. l2_trace_scope("assign expression");
  427. l2_trace("ident '%s'", l2_token_get_str(tok));
  428. struct l2_token_value ident = l2_token_extract_val(tok);
  429. l2_lexer_consume(lexer); // ident
  430. l2_lexer_consume(lexer); // :=
  431. if (parse_expression(lexer, gen, err) < 0) {
  432. if (!(ident.flags & L2_TOK_SMALL)) free(ident.str);
  433. return -1;
  434. }
  435. if (ident.flags & L2_TOK_SMALL) {
  436. l2_gen_stack_frame_set_copy(gen, ident.strbuf);
  437. } else {
  438. l2_gen_stack_frame_set(gen, &ident.str);
  439. }
  440. } else if (
  441. l2_token_get_kind(tok) == L2_TOK_IDENT &&
  442. l2_token_get_kind(tok2) == L2_TOK_EQUALS) {
  443. l2_trace_scope("replacement assign expression");
  444. l2_trace("ident '%s'", l2_token_get_str(tok));
  445. struct l2_token_value ident = l2_token_extract_val(tok);
  446. l2_lexer_consume(lexer); // ident
  447. l2_lexer_consume(lexer); // =
  448. if (parse_expression(lexer, gen, err) < 0) {
  449. if (!(ident.flags & L2_TOK_SMALL)) free(ident.str);
  450. return -1;
  451. }
  452. if (ident.flags & L2_TOK_SMALL) {
  453. l2_gen_stack_frame_replace_copy(gen, ident.strbuf);
  454. } else {
  455. l2_gen_stack_frame_replace(gen, &ident.str);
  456. }
  457. } else {
  458. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  459. return -1;
  460. }
  461. if (!tok_is_end(l2_lexer_peek(lexer, 1))) {
  462. if (parse_func_call_after_base(lexer, gen, err, 0) < 0) {
  463. return -1;
  464. }
  465. }
  466. }
  467. return 0;
  468. }
  469. int l2_parse_program(
  470. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  471. l2_trace_scope("program");
  472. while (1) {
  473. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  474. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_EOF) {
  475. break;
  476. }
  477. if (parse_expression(lexer, gen, err) < 0) {
  478. l2_gen_halt(gen);
  479. l2_gen_flush(gen);
  480. return -1;
  481. }
  482. l2_gen_discard(gen);
  483. }
  484. l2_gen_halt(gen);
  485. l2_gen_flush(gen);
  486. return 0;
  487. }