You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

parse.c 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594
  1. #include "parse/parse.h"
  2. #include "trace.h"
  3. #include "gen/gen.h"
  4. static int tok_is_end(struct l2_token *tok) {
  5. enum l2_token_kind kind = l2_token_get_kind(tok);
  6. return
  7. kind == L2_TOK_CLOSE_BRACE || kind == L2_TOK_CLOSE_BRACKET ||
  8. kind == L2_TOK_CLOSE_PAREN || kind == L2_TOK_EOF ||
  9. kind == L2_TOK_EOL;
  10. }
  11. static int tok_is_infix(struct l2_token *tok) {
  12. if (l2_token_get_kind(tok) != L2_TOK_IDENT) return 0;
  13. char *str;
  14. if (l2_token_is_small(tok)) {
  15. str = tok->v.strbuf;
  16. } else {
  17. str = tok->v.str;
  18. }
  19. return
  20. (str[0] == '$' && str[1] != '\0') ||
  21. strcmp(str, "+") == 0 ||
  22. strcmp(str, "-") == 0 ||
  23. strcmp(str, "*") == 0 ||
  24. strcmp(str, "/") == 0 ||
  25. strcmp(str, "==") == 0 ||
  26. strcmp(str, "!=") == 0 ||
  27. strcmp(str, "<") == 0 ||
  28. strcmp(str, "<=") == 0 ||
  29. strcmp(str, ">") == 0 ||
  30. strcmp(str, ">=") == 0;
  31. }
  32. static int parse_expression(
  33. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err);
  34. static int parse_arg_level_expression(
  35. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err);
  36. static int parse_object_literal(
  37. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  38. l2_trace_scope("object literal");
  39. // '{' and EOL already skipped by parse_object_or_function_literal
  40. l2_gen_namespace(gen);
  41. while (1) {
  42. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  43. if (l2_token_get_kind(tok) == L2_TOK_CLOSE_BRACE) {
  44. l2_lexer_consume(lexer); // '}'
  45. break;
  46. } else if (l2_token_get_kind(tok) != L2_TOK_IDENT) {
  47. l2_parse_err(err, tok, "In object literal: Expected identifier, got %s",
  48. l2_token_get_name(tok));
  49. return -1;
  50. }
  51. l2_trace("key: '%s'", tok->v.str);
  52. struct l2_token_value key = l2_token_extract_val(tok);
  53. l2_lexer_consume(lexer); // ident
  54. tok = l2_lexer_peek(lexer, 1);
  55. if (l2_token_get_kind(tok) != L2_TOK_COLON) {
  56. if (!(key.flags & L2_TOK_SMALL)) free(key.str);
  57. l2_parse_err(err, tok, "In object literal: Expected ':', got %s",
  58. l2_token_get_name(tok));
  59. return -1;
  60. }
  61. l2_lexer_consume(lexer); // ':'
  62. if (parse_expression(lexer, gen, err) < 0) {
  63. if (!(key.flags & L2_TOK_SMALL)) free(key.str);
  64. return -1;
  65. }
  66. if (key.flags & L2_TOK_SMALL) {
  67. l2_gen_namespace_set_copy(gen, key.strbuf);
  68. } else {
  69. l2_gen_namespace_set_copy(gen, key.str);
  70. }
  71. l2_gen_discard(gen);
  72. tok = l2_lexer_peek(lexer, 1);
  73. if (
  74. l2_token_get_kind(tok) != L2_TOK_EOL &&
  75. l2_token_get_kind(tok) != L2_TOK_CLOSE_BRACE) {
  76. l2_parse_err(err, tok, "In object literal: Expected EOL or '}', got %s",
  77. l2_token_get_name(tok));
  78. return -1;
  79. }
  80. if (l2_token_get_kind(tok) == L2_TOK_EOL) {
  81. l2_lexer_consume(lexer); // EOL
  82. }
  83. }
  84. return 0;
  85. }
  86. static int parse_function_literal_impl(
  87. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  88. l2_trace_scope("function literal");
  89. // '{' and EOL already skipped by parse_object_or_function_literal
  90. // The arguments array will be at the top of the stack
  91. char *ident = "$";
  92. l2_gen_stack_frame_set_copy(gen, ident);
  93. int first = 1;
  94. while (1) {
  95. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_CLOSE_BRACE) {
  96. l2_lexer_consume(lexer); // '}'
  97. break;
  98. }
  99. if (!first) {
  100. l2_gen_discard(gen);
  101. }
  102. l2_trace_scope("function literal expression");
  103. if (parse_expression(lexer, gen, err) < 0) {
  104. return -1;
  105. }
  106. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  107. first = 0;
  108. }
  109. // All functions must put _something_ on the stack
  110. if (first) {
  111. l2_gen_none(gen);
  112. }
  113. l2_gen_ret(gen);
  114. return 0;
  115. }
  116. static int parse_function_literal(
  117. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  118. l2_gen_flush(gen);
  119. struct l2_io_writer *prev_writer = gen->writer.w;
  120. // Generate the function to a buffer in memory
  121. struct l2_io_mem_writer w = {0};
  122. w.w.write = l2_io_mem_write;
  123. gen->writer.w = &w.w;
  124. // Generates five bytes; RJMP, then 4 byte counter
  125. l2_gen_rjmp_placeholder(gen);
  126. l2_word pos = gen->pos;
  127. // Generate the function body itself
  128. int ret = parse_function_literal_impl(lexer, gen, err);
  129. l2_gen_flush(gen);
  130. gen->writer.w = prev_writer;
  131. if (ret < 0) {
  132. free(w.mem);
  133. return -1;
  134. }
  135. unsigned char *bc = w.mem;
  136. l2_word jdist = w.len - 5;
  137. // Write the jump distance (little endian)
  138. bc[1] = (jdist >> 0) & 0xff;
  139. bc[2] = (jdist >> 8) & 0xff;
  140. bc[3] = (jdist >> 16) & 0xff;
  141. bc[4] = (jdist >> 24) & 0xff;
  142. l2_bufio_put_n(&gen->writer, bc, w.len);
  143. free(w.mem);
  144. l2_gen_function(gen, pos);
  145. return 0;
  146. }
  147. static int parse_object_or_function_literal(
  148. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  149. l2_trace_scope("object or function literal");
  150. l2_lexer_consume(lexer); // '{'
  151. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  152. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  153. struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
  154. if (l2_token_get_kind(tok) == L2_TOK_CLOSE_BRACE) {
  155. l2_trace_scope("empty object literal");
  156. l2_lexer_consume(lexer); // '}'
  157. l2_gen_namespace(gen);
  158. } else if (
  159. l2_token_get_kind(tok) == L2_TOK_IDENT &&
  160. l2_token_get_kind(tok2) == L2_TOK_COLON) {
  161. if (parse_object_literal(lexer, gen, err) < 0) {
  162. return -1;
  163. }
  164. } else {
  165. if (parse_function_literal(lexer, gen, err) < 0) {
  166. return -1;
  167. }
  168. }
  169. return 0;
  170. }
  171. static int parse_array_literal(
  172. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  173. l2_trace_scope("array literal");
  174. l2_lexer_consume(lexer); // '['
  175. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  176. int count = 0;
  177. while (1) {
  178. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_CLOSE_BRACKET) {
  179. l2_lexer_consume(lexer); // ']'
  180. break;
  181. }
  182. count += 1;
  183. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  184. return -1;
  185. }
  186. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  187. }
  188. l2_gen_array(gen, count);
  189. return 0;
  190. }
  191. static int parse_arg_level_expression_base(
  192. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  193. l2_trace_scope("arg level expression base");
  194. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  195. struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
  196. if (l2_token_get_kind(tok) == L2_TOK_OPEN_PAREN) {
  197. l2_trace_scope("group expr");
  198. l2_lexer_consume(lexer); // '('
  199. if (parse_expression(lexer, gen, err) < 0) {
  200. return -1;
  201. }
  202. tok = l2_lexer_peek(lexer, 1);
  203. if (l2_token_get_kind(tok) != L2_TOK_CLOSE_PAREN) {
  204. l2_parse_err(err, tok, "Expected ')', got %s",
  205. l2_token_get_name(tok));
  206. return -1;
  207. }
  208. l2_lexer_consume(lexer); // ')'
  209. } else if (l2_token_get_kind(tok) == L2_TOK_IDENT) {
  210. l2_trace_scope("ident");
  211. if (l2_token_is_small(tok)) {
  212. l2_trace("ident '%s'", tok->v.strbuf);
  213. } else {
  214. l2_trace("ident '%s'", tok->v.str);
  215. }
  216. struct l2_token_value ident = l2_token_extract_val(tok);
  217. l2_lexer_consume(lexer); // ident
  218. if (ident.flags & L2_TOK_SMALL) {
  219. l2_gen_stack_frame_lookup_copy(gen, ident.strbuf);
  220. } else {
  221. l2_gen_stack_frame_lookup(gen, &ident.str);
  222. }
  223. } else if (l2_token_get_kind(tok) == L2_TOK_NUMBER) {
  224. l2_trace_scope("number literal");
  225. l2_trace("number %g", tok->v.num);
  226. double number = tok->v.num;
  227. l2_lexer_consume(lexer); // number
  228. l2_gen_number(gen, number);
  229. } else if (l2_token_get_kind(tok) == L2_TOK_STRING) {
  230. l2_trace_scope("string literal");
  231. l2_trace("string '%s'", tok->v.str);
  232. struct l2_token_value str = l2_token_extract_val(tok);
  233. l2_lexer_consume(lexer); // string
  234. if (str.flags & L2_TOK_SMALL) {
  235. l2_gen_string_copy(gen, str.strbuf);
  236. } else {
  237. l2_gen_string(gen, &str.str);
  238. }
  239. } else if (
  240. l2_token_get_kind(tok) == L2_TOK_QUOT &&
  241. l2_token_get_kind(tok2) == L2_TOK_IDENT) {
  242. l2_trace_scope("atom literal");
  243. l2_trace("atom '%s'", tok->v.str);
  244. struct l2_token_value ident = l2_token_extract_val(tok2);
  245. l2_lexer_consume(lexer); // "'"
  246. l2_lexer_consume(lexer); // ident
  247. if (ident.flags & L2_TOK_SMALL) {
  248. l2_gen_atom_copy(gen, ident.strbuf);
  249. } else {
  250. l2_gen_atom(gen, &ident.str);
  251. }
  252. } else if (l2_token_get_kind(tok) == L2_TOK_OPEN_BRACE) {
  253. if (parse_object_or_function_literal(lexer, gen, err) < 0) {
  254. return -1;
  255. }
  256. } else if (l2_token_get_kind(tok) == L2_TOK_OPEN_BRACKET) {
  257. if (parse_array_literal(lexer, gen, err) < 0) {
  258. return -1;
  259. }
  260. } else {
  261. l2_parse_err(err, tok, "Unexpected token %s",
  262. l2_token_get_name(tok));
  263. return -1;
  264. }
  265. return 0;
  266. }
  267. static int parse_func_call_after_base(
  268. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err,
  269. size_t infix_start) {
  270. l2_trace_scope("func call after base");
  271. size_t argc = 0;
  272. do {
  273. if (argc >= infix_start && tok_is_infix(l2_lexer_peek(lexer, 1))) {
  274. do {
  275. // We already have one value (the lhs) on the stack,
  276. // so we need to parse the operator, then the rhs
  277. // Operator
  278. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  279. return -1;
  280. }
  281. // RHS
  282. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  283. return -1;
  284. }
  285. l2_gen_func_call_infix(gen);
  286. } while (tok_is_infix(l2_lexer_peek(lexer, 1)));
  287. // If this was the "first argument", this wasn't a function call
  288. // after all, it was just a (series of?) infix calls.
  289. if (argc == 0) {
  290. return 0;
  291. }
  292. // Don't increment argc here, because after an infix, we have
  293. // neither added nor removed an arguemnt, just transformed one
  294. } else {
  295. l2_trace_scope("func call param");
  296. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  297. return -1;
  298. }
  299. argc += 1;
  300. }
  301. } while (!tok_is_end(l2_lexer_peek(lexer, 1)));
  302. // The 'argc' previous expressions were arguments, the one before that was the function
  303. l2_gen_func_call(gen, argc);
  304. return 0;
  305. }
  306. static int parse_arg_level_expression(
  307. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  308. l2_trace_scope("arg level expression");
  309. if (parse_arg_level_expression_base(lexer, gen, err) < 0) {
  310. return -1;
  311. }
  312. while (1) {
  313. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  314. struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
  315. struct l2_token *tok3 = l2_lexer_peek(lexer, 3);
  316. if (l2_token_get_kind(tok) == L2_TOK_OPEN_PAREN_NS) {
  317. l2_trace_scope("parenthesized func call");
  318. l2_lexer_consume(lexer); // '('
  319. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_CLOSE_PAREN) {
  320. l2_lexer_consume(lexer); // ')'
  321. l2_gen_func_call(gen, 0);
  322. } else {
  323. if (parse_func_call_after_base(lexer, gen, err, 1) < 0) {
  324. return -1;
  325. }
  326. tok = l2_lexer_peek(lexer, 1);
  327. if (l2_token_get_kind(tok) != L2_TOK_CLOSE_PAREN) {
  328. l2_parse_err(err, tok, "Expected ')', got %s",
  329. l2_token_get_name(tok));
  330. return -1;
  331. }
  332. l2_lexer_consume(lexer); // ')'
  333. }
  334. } else if (
  335. l2_token_get_kind(tok) == L2_TOK_PERIOD &&
  336. l2_token_get_kind(tok2) == L2_TOK_IDENT &&
  337. l2_token_get_kind(tok3) == L2_TOK_EQUALS) {
  338. l2_trace_scope("namespace assign");
  339. if (l2_token_is_small(tok2)) {
  340. l2_trace("ident '%s'", tok2->v.strbuf);
  341. } else {
  342. l2_trace("ident '%s'", tok2->v.str);
  343. }
  344. struct l2_token_value ident = l2_token_extract_val(tok2);
  345. l2_lexer_consume(lexer); // '.'
  346. l2_lexer_consume(lexer); // ident
  347. l2_lexer_consume(lexer); // '='
  348. if (parse_expression(lexer, gen, err) < 0) {
  349. if (!(ident.flags & L2_TOK_SMALL)) free(ident.str);
  350. return -1;
  351. }
  352. if (ident.flags & L2_TOK_SMALL) {
  353. l2_gen_namespace_set_copy(gen, ident.strbuf);
  354. } else {
  355. l2_gen_namespace_set(gen, &ident.str);
  356. }
  357. l2_gen_swap_discard(gen);
  358. } else if (
  359. l2_token_get_kind(tok) == L2_TOK_PERIOD &&
  360. l2_token_get_kind(tok2) == L2_TOK_IDENT) {
  361. l2_trace_scope("namespace lookup");
  362. if (l2_token_is_small(tok2)) {
  363. l2_trace("ident '%s'", tok2->v.strbuf);
  364. } else {
  365. l2_trace("ident '%s'", tok2->v.str);
  366. }
  367. struct l2_token_value ident = l2_token_extract_val(tok2);
  368. l2_lexer_consume(lexer); // '.'
  369. l2_lexer_consume(lexer); // ident
  370. if (ident.flags & L2_TOK_SMALL) {
  371. l2_gen_namespace_lookup_copy(gen, ident.strbuf);
  372. } else {
  373. l2_gen_namespace_lookup(gen, &ident.str);
  374. }
  375. } else if (
  376. l2_token_get_kind(tok) == L2_TOK_DOT_NUMBER &&
  377. l2_token_get_kind(tok2) == L2_TOK_EQUALS) {
  378. l2_trace_scope("direct array assign");
  379. int number = tok->v.integer;
  380. l2_lexer_consume(lexer); // dot-number
  381. l2_lexer_consume(lexer); // '='
  382. if (parse_expression(lexer, gen, err) < 0) {
  383. return -1;
  384. }
  385. l2_gen_array_set(gen, number);
  386. l2_gen_swap_discard(gen);
  387. } else if (l2_token_get_kind(tok) == L2_TOK_DOT_NUMBER) {
  388. l2_trace_scope("direct array lookup");
  389. int number = tok->v.integer;
  390. l2_lexer_consume(lexer); // dot-number
  391. l2_gen_array_lookup(gen, number);
  392. } else if (
  393. l2_token_get_kind(tok) == L2_TOK_PERIOD &&
  394. l2_token_get_kind(tok2) == L2_TOK_OPEN_PAREN_NS) {
  395. l2_trace_scope("dynamic lookup");
  396. l2_lexer_consume(lexer); // '.'
  397. l2_lexer_consume(lexer); // '('
  398. if (parse_expression(lexer, gen, err) < 0) {
  399. return -1;
  400. }
  401. tok = l2_lexer_peek(lexer, 1);
  402. if (l2_token_get_kind(tok) != L2_TOK_CLOSE_PAREN) {
  403. l2_parse_err(err, tok, "Expected ')', got %s",
  404. l2_token_get_name(tok));
  405. return -1;
  406. }
  407. l2_lexer_consume(lexer); // ')'
  408. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_EQUALS) {
  409. l2_lexer_consume(lexer); // '='
  410. if (parse_expression(lexer, gen, err) < 0) {
  411. return -1;
  412. }
  413. l2_gen_dynamic_set(gen);
  414. } else {
  415. l2_gen_dynamic_lookup(gen);
  416. }
  417. } else {
  418. break;
  419. }
  420. }
  421. return 0;
  422. }
  423. static int parse_expression(
  424. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  425. l2_trace_scope("expression");
  426. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  427. struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
  428. if (
  429. l2_token_get_kind(tok) == L2_TOK_IDENT &&
  430. l2_token_get_kind(tok2) == L2_TOK_COLON_EQ) {
  431. l2_trace_scope("assign expression");
  432. if (l2_token_is_small(tok)) {
  433. l2_trace("ident '%s'", tok->v.strbuf);
  434. } else {
  435. l2_trace("ident '%s'", tok->v.str);
  436. }
  437. struct l2_token_value ident = l2_token_extract_val(tok);
  438. l2_lexer_consume(lexer); // ident
  439. l2_lexer_consume(lexer); // :=
  440. if (parse_expression(lexer, gen, err) < 0) {
  441. if (!(ident.flags & L2_TOK_SMALL)) free(ident.str);
  442. return -1;
  443. }
  444. if (ident.flags & L2_TOK_SMALL) {
  445. l2_gen_stack_frame_set_copy(gen, ident.strbuf);
  446. } else {
  447. l2_gen_stack_frame_set(gen, &ident.str);
  448. }
  449. } else if (
  450. l2_token_get_kind(tok) == L2_TOK_IDENT &&
  451. l2_token_get_kind(tok2) == L2_TOK_EQUALS) {
  452. l2_trace_scope("replacement assign expression");
  453. if (l2_token_is_small(tok)) {
  454. l2_trace("ident '%s'", tok->v.strbuf);
  455. } else {
  456. l2_trace("ident '%s'", tok->v.str);
  457. }
  458. struct l2_token_value ident = l2_token_extract_val(tok);
  459. l2_lexer_consume(lexer); // ident
  460. l2_lexer_consume(lexer); // =
  461. if (parse_expression(lexer, gen, err) < 0) {
  462. if (!(ident.flags & L2_TOK_SMALL)) free(ident.str);
  463. return -1;
  464. }
  465. if (ident.flags & L2_TOK_SMALL) {
  466. l2_gen_stack_frame_replace_copy(gen, ident.strbuf);
  467. } else {
  468. l2_gen_stack_frame_replace(gen, &ident.str);
  469. }
  470. } else {
  471. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  472. return -1;
  473. }
  474. if (!tok_is_end(l2_lexer_peek(lexer, 1))) {
  475. if (parse_func_call_after_base(lexer, gen, err, 0) < 0) {
  476. return -1;
  477. }
  478. }
  479. }
  480. return 0;
  481. }
  482. int l2_parse_program(
  483. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  484. l2_trace_scope("program");
  485. while (1) {
  486. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  487. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_EOF) {
  488. break;
  489. }
  490. if (parse_expression(lexer, gen, err) < 0) {
  491. l2_gen_halt(gen);
  492. l2_gen_flush(gen);
  493. return -1;
  494. }
  495. l2_gen_discard(gen);
  496. }
  497. l2_gen_halt(gen);
  498. l2_gen_flush(gen);
  499. return 0;
  500. }