You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

parse.c 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580
  1. #include "parse/parse.h"
  2. #include "trace.h"
  3. #include "gen/gen.h"
  4. static int tok_is_end(struct l2_token *tok) {
  5. enum l2_token_kind kind = l2_token_get_kind(tok);
  6. return
  7. kind == L2_TOK_CLOSE_BRACE || kind == L2_TOK_CLOSE_BRACKET ||
  8. kind == L2_TOK_CLOSE_PAREN || kind == L2_TOK_EOF ||
  9. kind == L2_TOK_EOL;
  10. }
  11. static int tok_is_infix(struct l2_token *tok) {
  12. if (l2_token_get_kind(tok) != L2_TOK_IDENT) return 0;
  13. const char *str = l2_token_get_str(tok);
  14. return
  15. (str[0] == '$' && str[1] != '\0') ||
  16. strcmp(str, "+") == 0 ||
  17. strcmp(str, "-") == 0 ||
  18. strcmp(str, "*") == 0 ||
  19. strcmp(str, "/") == 0 ||
  20. strcmp(str, "==") == 0 ||
  21. strcmp(str, "!=") == 0 ||
  22. strcmp(str, "<") == 0 ||
  23. strcmp(str, "<=") == 0 ||
  24. strcmp(str, ">") == 0 ||
  25. strcmp(str, ">=") == 0;
  26. }
  27. static int parse_expression(
  28. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err);
  29. static int parse_arg_level_expression(
  30. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err);
  31. static int parse_object_literal(
  32. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  33. l2_trace_scope("object literal");
  34. // '{' and EOL already skipped by parse_object_or_function_literal
  35. l2_gen_namespace(gen);
  36. while (1) {
  37. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  38. if (l2_token_get_kind(tok) == L2_TOK_CLOSE_BRACE) {
  39. l2_lexer_consume(lexer); // '}'
  40. break;
  41. } else if (l2_token_get_kind(tok) != L2_TOK_IDENT) {
  42. l2_parse_err(err, tok, "In object literal: Expected identifier, got %s",
  43. l2_token_get_name(tok));
  44. return -1;
  45. }
  46. l2_trace("key: '%s'", l2_token_get_str(tok));
  47. struct l2_token_value key = l2_token_extract_val(tok);
  48. l2_lexer_consume(lexer); // ident
  49. tok = l2_lexer_peek(lexer, 1);
  50. if (l2_token_get_kind(tok) != L2_TOK_COLON) {
  51. if (!(key.flags & L2_TOK_SMALL)) free(key.str);
  52. l2_parse_err(err, tok, "In object literal: Expected ':', got %s",
  53. l2_token_get_name(tok));
  54. return -1;
  55. }
  56. l2_lexer_consume(lexer); // ':'
  57. if (parse_expression(lexer, gen, err) < 0) {
  58. if (!(key.flags & L2_TOK_SMALL)) free(key.str);
  59. return -1;
  60. }
  61. if (key.flags & L2_TOK_SMALL) {
  62. l2_gen_namespace_set_copy(gen, key.strbuf);
  63. } else {
  64. l2_gen_namespace_set_copy(gen, key.str);
  65. }
  66. l2_gen_discard(gen);
  67. tok = l2_lexer_peek(lexer, 1);
  68. if (
  69. l2_token_get_kind(tok) != L2_TOK_EOL &&
  70. l2_token_get_kind(tok) != L2_TOK_CLOSE_BRACE) {
  71. l2_parse_err(err, tok, "In object literal: Expected EOL or '}', got %s",
  72. l2_token_get_name(tok));
  73. return -1;
  74. }
  75. if (l2_token_get_kind(tok) == L2_TOK_EOL) {
  76. l2_lexer_consume(lexer); // EOL
  77. }
  78. }
  79. return 0;
  80. }
  81. static int parse_function_literal_impl(
  82. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  83. l2_trace_scope("function literal");
  84. // '{' and EOL already skipped by parse_object_or_function_literal
  85. // The arguments array will be at the top of the stack
  86. char *ident = "$";
  87. l2_gen_stack_frame_set_copy(gen, ident);
  88. int first = 1;
  89. while (1) {
  90. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_CLOSE_BRACE) {
  91. l2_lexer_consume(lexer); // '}'
  92. break;
  93. }
  94. if (!first) {
  95. l2_gen_discard(gen);
  96. }
  97. l2_trace_scope("function literal expression");
  98. if (parse_expression(lexer, gen, err) < 0) {
  99. return -1;
  100. }
  101. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  102. first = 0;
  103. }
  104. // All functions must put _something_ on the stack
  105. if (first) {
  106. l2_gen_none(gen);
  107. }
  108. l2_gen_ret(gen);
  109. return 0;
  110. }
  111. static int parse_function_literal(
  112. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  113. l2_gen_flush(gen);
  114. struct l2_io_writer *prev_writer = gen->writer.w;
  115. // Generate the function to a buffer in memory
  116. struct l2_io_mem_writer w = {0};
  117. w.w.write = l2_io_mem_write;
  118. gen->writer.w = &w.w;
  119. // Generates five bytes; RJMP, then 4 byte counter
  120. l2_gen_rjmp_placeholder(gen);
  121. l2_word pos = gen->pos;
  122. // Generate the function body itself
  123. int ret = parse_function_literal_impl(lexer, gen, err);
  124. l2_gen_flush(gen);
  125. gen->writer.w = prev_writer;
  126. if (ret < 0) {
  127. free(w.mem);
  128. return -1;
  129. }
  130. unsigned char *bc = w.mem;
  131. l2_word jdist = w.len - 5;
  132. // Write the jump distance (little endian)
  133. bc[1] = (jdist >> 0) & 0xff;
  134. bc[2] = (jdist >> 8) & 0xff;
  135. bc[3] = (jdist >> 16) & 0xff;
  136. bc[4] = (jdist >> 24) & 0xff;
  137. l2_bufio_put_n(&gen->writer, bc, w.len);
  138. free(w.mem);
  139. l2_gen_function(gen, pos);
  140. return 0;
  141. }
  142. static int parse_object_or_function_literal(
  143. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  144. l2_trace_scope("object or function literal");
  145. l2_lexer_consume(lexer); // '{'
  146. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  147. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  148. struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
  149. if (l2_token_get_kind(tok) == L2_TOK_CLOSE_BRACE) {
  150. l2_trace_scope("empty object literal");
  151. l2_lexer_consume(lexer); // '}'
  152. l2_gen_namespace(gen);
  153. } else if (
  154. l2_token_get_kind(tok) == L2_TOK_IDENT &&
  155. l2_token_get_kind(tok2) == L2_TOK_COLON) {
  156. if (parse_object_literal(lexer, gen, err) < 0) {
  157. return -1;
  158. }
  159. } else {
  160. if (parse_function_literal(lexer, gen, err) < 0) {
  161. return -1;
  162. }
  163. }
  164. return 0;
  165. }
  166. static int parse_array_literal(
  167. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  168. l2_trace_scope("array literal");
  169. l2_lexer_consume(lexer); // '['
  170. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  171. int count = 0;
  172. while (1) {
  173. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_CLOSE_BRACKET) {
  174. l2_lexer_consume(lexer); // ']'
  175. break;
  176. }
  177. count += 1;
  178. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  179. return -1;
  180. }
  181. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  182. }
  183. l2_gen_array(gen, count);
  184. return 0;
  185. }
  186. static int parse_arg_level_expression_base(
  187. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  188. l2_trace_scope("arg level expression base");
  189. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  190. struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
  191. if (l2_token_get_kind(tok) == L2_TOK_OPEN_PAREN) {
  192. l2_trace_scope("group expr");
  193. l2_lexer_consume(lexer); // '('
  194. if (parse_expression(lexer, gen, err) < 0) {
  195. return -1;
  196. }
  197. tok = l2_lexer_peek(lexer, 1);
  198. if (l2_token_get_kind(tok) != L2_TOK_CLOSE_PAREN) {
  199. l2_parse_err(err, tok, "Expected ')', got %s",
  200. l2_token_get_name(tok));
  201. return -1;
  202. }
  203. l2_lexer_consume(lexer); // ')'
  204. } else if (l2_token_get_kind(tok) == L2_TOK_IDENT) {
  205. l2_trace_scope("ident");
  206. l2_trace("ident '%s'", l2_token_get_str(tok));
  207. struct l2_token_value ident = l2_token_extract_val(tok);
  208. l2_lexer_consume(lexer); // ident
  209. if (ident.flags & L2_TOK_SMALL) {
  210. l2_gen_stack_frame_lookup_copy(gen, ident.strbuf);
  211. } else {
  212. l2_gen_stack_frame_lookup(gen, &ident.str);
  213. }
  214. } else if (l2_token_get_kind(tok) == L2_TOK_NUMBER) {
  215. l2_trace_scope("number literal");
  216. l2_trace("number %g", tok->v.num);
  217. double number = tok->v.num;
  218. l2_lexer_consume(lexer); // number
  219. l2_gen_number(gen, number);
  220. } else if (l2_token_get_kind(tok) == L2_TOK_STRING) {
  221. l2_trace_scope("string literal");
  222. l2_trace("string '%s'", l2_token_get_str(tok));
  223. struct l2_token_value str = l2_token_extract_val(tok);
  224. l2_lexer_consume(lexer); // string
  225. if (str.flags & L2_TOK_SMALL) {
  226. l2_gen_string_copy(gen, str.strbuf);
  227. } else {
  228. l2_gen_string(gen, &str.str);
  229. }
  230. } else if (
  231. l2_token_get_kind(tok) == L2_TOK_QUOT &&
  232. l2_token_get_kind(tok2) == L2_TOK_IDENT) {
  233. l2_trace_scope("atom literal");
  234. l2_trace("atom '%s'", l2_token_get_str(tok2));
  235. struct l2_token_value ident = l2_token_extract_val(tok2);
  236. l2_lexer_consume(lexer); // "'"
  237. l2_lexer_consume(lexer); // ident
  238. if (ident.flags & L2_TOK_SMALL) {
  239. l2_gen_atom_copy(gen, ident.strbuf);
  240. } else {
  241. l2_gen_atom(gen, &ident.str);
  242. }
  243. } else if (l2_token_get_kind(tok) == L2_TOK_OPEN_BRACE) {
  244. if (parse_object_or_function_literal(lexer, gen, err) < 0) {
  245. return -1;
  246. }
  247. } else if (l2_token_get_kind(tok) == L2_TOK_OPEN_BRACKET) {
  248. if (parse_array_literal(lexer, gen, err) < 0) {
  249. return -1;
  250. }
  251. } else {
  252. l2_parse_err(err, tok, "Unexpected token %s",
  253. l2_token_get_name(tok));
  254. return -1;
  255. }
  256. return 0;
  257. }
  258. static int parse_func_call_after_base(
  259. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err,
  260. size_t infix_start) {
  261. l2_trace_scope("func call after base");
  262. size_t argc = 0;
  263. do {
  264. if (argc >= infix_start && tok_is_infix(l2_lexer_peek(lexer, 1))) {
  265. do {
  266. // We already have one value (the lhs) on the stack,
  267. // so we need to parse the operator, then the rhs
  268. // Operator
  269. int ret = parse_arg_level_expression(lexer, gen, err);
  270. if (ret < 0) {
  271. return -1;
  272. }
  273. // If the operator wasn't just the one base expression,
  274. // abort; we're not doing the infix call
  275. if (ret == 1) {
  276. argc += 1;
  277. break;
  278. }
  279. // RHS
  280. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  281. return -1;
  282. }
  283. l2_gen_func_call_infix(gen);
  284. } while (tok_is_infix(l2_lexer_peek(lexer, 1)));
  285. // If this was the "first argument", this wasn't a function call
  286. // after all, it was just a (series of?) infix calls.
  287. if (argc == 0) {
  288. return 0;
  289. }
  290. // Don't increment argc here, because after an infix, we have
  291. // neither added nor removed an arguemnt, just transformed one
  292. } else {
  293. l2_trace_scope("func call param");
  294. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  295. return -1;
  296. }
  297. argc += 1;
  298. }
  299. } while (!tok_is_end(l2_lexer_peek(lexer, 1)));
  300. // The 'argc' previous expressions were arguments, the one before that was the function
  301. l2_gen_func_call(gen, argc);
  302. return 0;
  303. }
  304. static int parse_arg_level_expression(
  305. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  306. l2_trace_scope("arg level expression");
  307. if (parse_arg_level_expression_base(lexer, gen, err) < 0) {
  308. return -1;
  309. }
  310. int ret = 0;
  311. while (1) {
  312. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  313. struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
  314. struct l2_token *tok3 = l2_lexer_peek(lexer, 3);
  315. if (l2_token_get_kind(tok) == L2_TOK_OPEN_PAREN_NS) {
  316. l2_trace_scope("parenthesized func call");
  317. l2_lexer_consume(lexer); // '('
  318. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_CLOSE_PAREN) {
  319. l2_lexer_consume(lexer); // ')'
  320. l2_gen_func_call(gen, 0);
  321. } else {
  322. if (parse_func_call_after_base(lexer, gen, err, 1) < 0) {
  323. return -1;
  324. }
  325. tok = l2_lexer_peek(lexer, 1);
  326. if (l2_token_get_kind(tok) != L2_TOK_CLOSE_PAREN) {
  327. l2_parse_err(err, tok, "Expected ')', got %s",
  328. l2_token_get_name(tok));
  329. return -1;
  330. }
  331. l2_lexer_consume(lexer); // ')'
  332. }
  333. } else if (
  334. l2_token_get_kind(tok) == L2_TOK_PERIOD &&
  335. l2_token_get_kind(tok2) == L2_TOK_IDENT &&
  336. l2_token_get_kind(tok3) == L2_TOK_EQUALS) {
  337. l2_trace_scope("namespace assign");
  338. l2_trace("ident '%s'", l2_token_get_str(tok2));
  339. struct l2_token_value ident = l2_token_extract_val(tok2);
  340. l2_lexer_consume(lexer); // '.'
  341. l2_lexer_consume(lexer); // ident
  342. l2_lexer_consume(lexer); // '='
  343. if (parse_expression(lexer, gen, err) < 0) {
  344. if (!(ident.flags & L2_TOK_SMALL)) free(ident.str);
  345. return -1;
  346. }
  347. if (ident.flags & L2_TOK_SMALL) {
  348. l2_gen_namespace_set_copy(gen, ident.strbuf);
  349. } else {
  350. l2_gen_namespace_set(gen, &ident.str);
  351. }
  352. l2_gen_swap_discard(gen);
  353. } else if (
  354. l2_token_get_kind(tok) == L2_TOK_PERIOD &&
  355. l2_token_get_kind(tok2) == L2_TOK_IDENT) {
  356. l2_trace_scope("namespace lookup");
  357. l2_trace("ident '%s'", l2_token_get_str(tok2));
  358. struct l2_token_value ident = l2_token_extract_val(tok2);
  359. l2_lexer_consume(lexer); // '.'
  360. l2_lexer_consume(lexer); // ident
  361. if (ident.flags & L2_TOK_SMALL) {
  362. l2_gen_namespace_lookup_copy(gen, ident.strbuf);
  363. } else {
  364. l2_gen_namespace_lookup(gen, &ident.str);
  365. }
  366. } else if (
  367. l2_token_get_kind(tok) == L2_TOK_DOT_NUMBER &&
  368. l2_token_get_kind(tok2) == L2_TOK_EQUALS) {
  369. l2_trace_scope("direct array assign");
  370. int number = tok->v.integer;
  371. l2_lexer_consume(lexer); // dot-number
  372. l2_lexer_consume(lexer); // '='
  373. if (parse_expression(lexer, gen, err) < 0) {
  374. return -1;
  375. }
  376. l2_gen_array_set(gen, number);
  377. l2_gen_swap_discard(gen);
  378. } else if (l2_token_get_kind(tok) == L2_TOK_DOT_NUMBER) {
  379. l2_trace_scope("direct array lookup");
  380. int number = tok->v.integer;
  381. l2_lexer_consume(lexer); // dot-number
  382. l2_gen_array_lookup(gen, number);
  383. } else if (
  384. l2_token_get_kind(tok) == L2_TOK_PERIOD &&
  385. l2_token_get_kind(tok2) == L2_TOK_OPEN_PAREN_NS) {
  386. l2_trace_scope("dynamic lookup");
  387. l2_lexer_consume(lexer); // '.'
  388. l2_lexer_consume(lexer); // '('
  389. if (parse_expression(lexer, gen, err) < 0) {
  390. return -1;
  391. }
  392. tok = l2_lexer_peek(lexer, 1);
  393. if (l2_token_get_kind(tok) != L2_TOK_CLOSE_PAREN) {
  394. l2_parse_err(err, tok, "Expected ')', got %s",
  395. l2_token_get_name(tok));
  396. return -1;
  397. }
  398. l2_lexer_consume(lexer); // ')'
  399. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_EQUALS) {
  400. l2_lexer_consume(lexer); // '='
  401. if (parse_expression(lexer, gen, err) < 0) {
  402. return -1;
  403. }
  404. l2_gen_dynamic_set(gen);
  405. } else {
  406. l2_gen_dynamic_lookup(gen);
  407. }
  408. } else {
  409. break;
  410. }
  411. ret = 1;
  412. }
  413. return ret;
  414. }
  415. static int parse_expression(
  416. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  417. l2_trace_scope("expression");
  418. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  419. struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
  420. if (
  421. l2_token_get_kind(tok) == L2_TOK_IDENT &&
  422. l2_token_get_kind(tok2) == L2_TOK_COLON_EQ) {
  423. l2_trace_scope("assign expression");
  424. l2_trace("ident '%s'", l2_token_get_str(tok));
  425. struct l2_token_value ident = l2_token_extract_val(tok);
  426. l2_lexer_consume(lexer); // ident
  427. l2_lexer_consume(lexer); // :=
  428. if (parse_expression(lexer, gen, err) < 0) {
  429. if (!(ident.flags & L2_TOK_SMALL)) free(ident.str);
  430. return -1;
  431. }
  432. if (ident.flags & L2_TOK_SMALL) {
  433. l2_gen_stack_frame_set_copy(gen, ident.strbuf);
  434. } else {
  435. l2_gen_stack_frame_set(gen, &ident.str);
  436. }
  437. } else if (
  438. l2_token_get_kind(tok) == L2_TOK_IDENT &&
  439. l2_token_get_kind(tok2) == L2_TOK_EQUALS) {
  440. l2_trace_scope("replacement assign expression");
  441. l2_trace("ident '%s'", l2_token_get_str(tok));
  442. struct l2_token_value ident = l2_token_extract_val(tok);
  443. l2_lexer_consume(lexer); // ident
  444. l2_lexer_consume(lexer); // =
  445. if (parse_expression(lexer, gen, err) < 0) {
  446. if (!(ident.flags & L2_TOK_SMALL)) free(ident.str);
  447. return -1;
  448. }
  449. if (ident.flags & L2_TOK_SMALL) {
  450. l2_gen_stack_frame_replace_copy(gen, ident.strbuf);
  451. } else {
  452. l2_gen_stack_frame_replace(gen, &ident.str);
  453. }
  454. } else {
  455. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  456. return -1;
  457. }
  458. if (!tok_is_end(l2_lexer_peek(lexer, 1))) {
  459. if (parse_func_call_after_base(lexer, gen, err, 0) < 0) {
  460. return -1;
  461. }
  462. }
  463. }
  464. return 0;
  465. }
  466. int l2_parse_program(
  467. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  468. l2_trace_scope("program");
  469. while (1) {
  470. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  471. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_EOF) {
  472. break;
  473. }
  474. if (parse_expression(lexer, gen, err) < 0) {
  475. l2_gen_halt(gen);
  476. l2_gen_flush(gen);
  477. return -1;
  478. }
  479. l2_gen_discard(gen);
  480. }
  481. l2_gen_halt(gen);
  482. l2_gen_flush(gen);
  483. return 0;
  484. }