You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

parse.c 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557
  1. #include "parse/parse.h"
  2. #include "trace.h"
  3. #include "gen/gen.h"
  4. static int tok_is_end(struct l2_token *tok) {
  5. enum l2_token_kind kind = l2_token_get_kind(tok);
  6. return
  7. kind == L2_TOK_CLOSE_BRACE || kind == L2_TOK_CLOSE_BRACKET ||
  8. kind == L2_TOK_CLOSE_PAREN || kind == L2_TOK_EOF ||
  9. kind == L2_TOK_EOL;
  10. }
  11. static int tok_is_infix(struct l2_token *tok) {
  12. if (l2_token_get_kind(tok) != L2_TOK_IDENT) return 0;
  13. char *str;
  14. if (l2_token_is_small(tok)) {
  15. str = tok->v.strbuf;
  16. } else {
  17. str = tok->v.str;
  18. }
  19. return
  20. (str[0] == '$' && str[1] != '\0') ||
  21. strcmp(str, "+") == 0 ||
  22. strcmp(str, "-") == 0 ||
  23. strcmp(str, "*") == 0 ||
  24. strcmp(str, "/") == 0 ||
  25. strcmp(str, "==") == 0 ||
  26. strcmp(str, "!=") == 0;
  27. }
  28. static int parse_expression(
  29. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err);
  30. static int parse_arg_level_expression(
  31. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err);
  32. static int parse_object_literal(
  33. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  34. l2_trace_scope("object literal");
  35. // '{' and EOL already skipped by parse_object_or_function_literal
  36. l2_gen_namespace(gen);
  37. while (1) {
  38. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  39. if (l2_token_get_kind(tok) == L2_TOK_CLOSE_BRACE) {
  40. l2_lexer_consume(lexer); // '}'
  41. break;
  42. } else if (l2_token_get_kind(tok) != L2_TOK_IDENT) {
  43. l2_parse_err(err, tok, "In object literal: Expected identifier, got %s",
  44. l2_token_get_name(tok));
  45. return -1;
  46. }
  47. l2_trace("key: '%s'", tok->v.str);
  48. struct l2_token_value key = l2_token_extract_val(tok);
  49. l2_lexer_consume(lexer); // ident
  50. tok = l2_lexer_peek(lexer, 1);
  51. if (l2_token_get_kind(tok) != L2_TOK_COLON) {
  52. if (!(key.flags & L2_TOK_SMALL)) free(key.str);
  53. l2_parse_err(err, tok, "In object literal: Expected ':', got %s",
  54. l2_token_get_name(tok));
  55. return -1;
  56. }
  57. l2_lexer_consume(lexer); // ':'
  58. if (parse_expression(lexer, gen, err) < 0) {
  59. if (!(key.flags & L2_TOK_SMALL)) free(key.str);
  60. return -1;
  61. }
  62. if (key.flags & L2_TOK_SMALL) {
  63. l2_gen_namespace_set_copy(gen, key.strbuf);
  64. } else {
  65. l2_gen_namespace_set_copy(gen, key.str);
  66. }
  67. l2_gen_discard(gen);
  68. tok = l2_lexer_peek(lexer, 1);
  69. if (
  70. l2_token_get_kind(tok) != L2_TOK_EOL &&
  71. l2_token_get_kind(tok) != L2_TOK_CLOSE_BRACE) {
  72. l2_parse_err(err, tok, "In object literal: Expected EOL or '}', got %s",
  73. l2_token_get_name(tok));
  74. return -1;
  75. }
  76. if (l2_token_get_kind(tok) == L2_TOK_EOL) {
  77. l2_lexer_consume(lexer); // EOL
  78. }
  79. }
  80. return 0;
  81. }
  82. static int parse_function_literal_impl(
  83. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  84. l2_trace_scope("function literal");
  85. // '{' and EOL already skipped by parse_object_or_function_literal
  86. // The arguments array will be at the top of the stack
  87. char *ident = "$";
  88. l2_gen_stack_frame_set_copy(gen, ident);
  89. int first = 1;
  90. while (1) {
  91. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_CLOSE_BRACE) {
  92. l2_lexer_consume(lexer); // '}'
  93. break;
  94. }
  95. if (!first) {
  96. l2_gen_discard(gen);
  97. }
  98. l2_trace_scope("function literal expression");
  99. if (parse_expression(lexer, gen, err) < 0) {
  100. return -1;
  101. }
  102. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  103. first = 0;
  104. }
  105. // All functions must put _something_ on the stack
  106. if (first) {
  107. l2_gen_none(gen);
  108. }
  109. l2_gen_ret(gen);
  110. return 0;
  111. }
  112. static int parse_function_literal(
  113. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  114. l2_gen_flush(gen);
  115. struct l2_io_writer *prev_writer = gen->writer.w;
  116. // Generate the function to a buffer in memory
  117. struct l2_io_mem_writer w = {0};
  118. w.w.write = l2_io_mem_write;
  119. gen->writer.w = &w.w;
  120. // Generates five bytes; RJMP, then 4 byte counter
  121. l2_gen_rjmp_placeholder(gen);
  122. l2_word pos = gen->pos;
  123. // Generate the function body itself
  124. int ret = parse_function_literal_impl(lexer, gen, err);
  125. l2_gen_flush(gen);
  126. gen->writer.w = prev_writer;
  127. if (ret < 0) {
  128. free(w.mem);
  129. return -1;
  130. }
  131. unsigned char *ops = w.mem;
  132. l2_word opcount = w.len - 5;
  133. // Write the jump distance (little endian)
  134. ops[1] = (opcount >> 0) & 0xff;
  135. ops[2] = (opcount >> 8) & 0xff;
  136. ops[3] = (opcount >> 16) & 0xff;
  137. ops[4] = (opcount >> 24) & 0xff;
  138. l2_bufio_put_n(&gen->writer, ops, w.len);
  139. free(w.mem);
  140. l2_gen_function(gen, pos);
  141. return 0;
  142. }
  143. static int parse_object_or_function_literal(
  144. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  145. l2_trace_scope("object or function literal");
  146. l2_lexer_consume(lexer); // '{'
  147. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  148. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  149. struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
  150. if (l2_token_get_kind(tok) == L2_TOK_CLOSE_BRACE) {
  151. l2_trace_scope("empty object literal");
  152. l2_lexer_consume(lexer); // '}'
  153. l2_gen_namespace(gen);
  154. } else if (
  155. l2_token_get_kind(tok) == L2_TOK_IDENT &&
  156. l2_token_get_kind(tok2) == L2_TOK_COLON) {
  157. if (parse_object_literal(lexer, gen, err) < 0) {
  158. return -1;
  159. }
  160. } else {
  161. if (parse_function_literal(lexer, gen, err) < 0) {
  162. return -1;
  163. }
  164. }
  165. return 0;
  166. }
  167. static int parse_array_literal(
  168. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  169. l2_trace_scope("array literal");
  170. l2_lexer_consume(lexer); // '['
  171. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  172. int count = 0;
  173. while (1) {
  174. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_CLOSE_BRACKET) {
  175. l2_lexer_consume(lexer); // ']'
  176. break;
  177. }
  178. count += 1;
  179. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  180. return -1;
  181. }
  182. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  183. }
  184. l2_gen_array(gen, count);
  185. return 0;
  186. }
  187. static int parse_arg_level_expression_base(
  188. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  189. l2_trace_scope("arg level expression base");
  190. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  191. struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
  192. if (l2_token_get_kind(tok) == L2_TOK_OPEN_PAREN) {
  193. l2_trace_scope("group expr");
  194. l2_lexer_consume(lexer); // '('
  195. if (parse_expression(lexer, gen, err) < 0) {
  196. return -1;
  197. }
  198. tok = l2_lexer_peek(lexer, 1);
  199. if (l2_token_get_kind(tok) != L2_TOK_CLOSE_PAREN) {
  200. l2_parse_err(err, tok, "Expected ')', got %s",
  201. l2_token_get_name(tok));
  202. return -1;
  203. }
  204. l2_lexer_consume(lexer); // ')'
  205. } else if (l2_token_get_kind(tok) == L2_TOK_IDENT) {
  206. l2_trace_scope("ident");
  207. l2_trace("ident '%s'", tok->v.str);
  208. struct l2_token_value ident = l2_token_extract_val(tok);
  209. l2_lexer_consume(lexer); // ident
  210. if (ident.flags & L2_TOK_SMALL) {
  211. l2_gen_stack_frame_lookup_copy(gen, ident.strbuf);
  212. } else {
  213. l2_gen_stack_frame_lookup(gen, &ident.str);
  214. }
  215. } else if (l2_token_get_kind(tok) == L2_TOK_NUMBER) {
  216. l2_trace_scope("number literal");
  217. l2_trace("number %g", tok->v.num);
  218. double number = tok->v.num;
  219. l2_lexer_consume(lexer); // number
  220. l2_gen_number(gen, number);
  221. } else if (l2_token_get_kind(tok) == L2_TOK_STRING) {
  222. l2_trace_scope("string literal");
  223. l2_trace("string '%s'", tok->v.str);
  224. struct l2_token_value str = l2_token_extract_val(tok);
  225. l2_lexer_consume(lexer); // string
  226. if (str.flags & L2_TOK_SMALL) {
  227. l2_gen_string_copy(gen, str.strbuf);
  228. } else {
  229. l2_gen_string(gen, &str.str);
  230. }
  231. } else if (
  232. l2_token_get_kind(tok) == L2_TOK_QUOT &&
  233. l2_token_get_kind(tok2) == L2_TOK_IDENT) {
  234. l2_trace_scope("atom literal");
  235. l2_trace("atom '%s'", tok->v.str);
  236. struct l2_token_value ident = l2_token_extract_val(tok2);
  237. l2_lexer_consume(lexer); // "'"
  238. l2_lexer_consume(lexer); // ident
  239. if (ident.flags & L2_TOK_SMALL) {
  240. l2_gen_atom_copy(gen, ident.strbuf);
  241. } else {
  242. l2_gen_atom(gen, &ident.str);
  243. }
  244. } else if (l2_token_get_kind(tok) == L2_TOK_OPEN_BRACE) {
  245. if (parse_object_or_function_literal(lexer, gen, err) < 0) {
  246. return -1;
  247. }
  248. } else if (l2_token_get_kind(tok) == L2_TOK_OPEN_BRACKET) {
  249. if (parse_array_literal(lexer, gen, err) < 0) {
  250. return -1;
  251. }
  252. } else {
  253. l2_parse_err(err, tok, "Unexpected token %s",
  254. l2_token_get_name(tok));
  255. return -1;
  256. }
  257. return 0;
  258. }
  259. static int parse_arg_level_expression(
  260. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  261. l2_trace_scope("arg level expression");
  262. if (parse_arg_level_expression_base(lexer, gen, err) < 0) {
  263. return -1;
  264. }
  265. while (1) {
  266. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  267. struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
  268. struct l2_token *tok3 = l2_lexer_peek(lexer, 3);
  269. if (
  270. l2_token_get_kind(tok) == L2_TOK_OPEN_PAREN &&
  271. l2_token_get_kind(tok2) == L2_TOK_CLOSE_PAREN) {
  272. l2_trace_scope("niladic func call");
  273. l2_lexer_consume(lexer); // '('
  274. l2_lexer_consume(lexer); // ')'
  275. l2_gen_func_call(gen, 0);
  276. } else if (
  277. l2_token_get_kind(tok) == L2_TOK_PERIOD &&
  278. l2_token_get_kind(tok2) == L2_TOK_IDENT &&
  279. l2_token_get_kind(tok3) == L2_TOK_EQUALS) {
  280. l2_trace_scope("namespace assign");
  281. l2_trace("ident '%s'", tok2->v.str);
  282. struct l2_token_value ident = l2_token_extract_val(tok2);
  283. l2_lexer_consume(lexer); // '.'
  284. l2_lexer_consume(lexer); // ident
  285. l2_lexer_consume(lexer); // '='
  286. if (parse_expression(lexer, gen, err) < 0) {
  287. if (!(ident.flags & L2_TOK_SMALL)) free(ident.str);
  288. return -1;
  289. }
  290. if (ident.flags & L2_TOK_SMALL) {
  291. l2_gen_namespace_set_copy(gen, ident.strbuf);
  292. } else {
  293. l2_gen_namespace_set(gen, &ident.str);
  294. }
  295. l2_gen_swap_discard(gen);
  296. } else if (
  297. l2_token_get_kind(tok) == L2_TOK_PERIOD &&
  298. l2_token_get_kind(tok2) == L2_TOK_IDENT) {
  299. l2_trace_scope("namespace lookup");
  300. l2_trace("ident '%s'", tok2->v.str);
  301. struct l2_token_value ident = l2_token_extract_val(tok2);
  302. l2_lexer_consume(lexer); // '.'
  303. l2_lexer_consume(lexer); // ident
  304. if (ident.flags & L2_TOK_SMALL) {
  305. l2_gen_namespace_lookup_copy(gen, ident.strbuf);
  306. } else {
  307. l2_gen_namespace_lookup(gen, &ident.str);
  308. }
  309. } else if (
  310. l2_token_get_kind(tok) == L2_TOK_DOT_NUMBER &&
  311. l2_token_get_kind(tok2) == L2_TOK_EQUALS) {
  312. l2_trace_scope("direct array assign");
  313. int number = tok->v.integer;
  314. l2_lexer_consume(lexer); // dot-number
  315. l2_lexer_consume(lexer); // '='
  316. if (parse_expression(lexer, gen, err) < 0) {
  317. return -1;
  318. }
  319. l2_gen_array_set(gen, number);
  320. l2_gen_swap_discard(gen);
  321. } else if (l2_token_get_kind(tok) == L2_TOK_DOT_NUMBER) {
  322. l2_trace_scope("direct array lookup");
  323. int number = tok->v.integer;
  324. l2_lexer_consume(lexer); // dot-number
  325. l2_gen_array_lookup(gen, number);
  326. } else if (
  327. l2_token_get_kind(tok) == L2_TOK_PERIOD &&
  328. l2_token_get_kind(tok2) == L2_TOK_OPEN_PAREN) {
  329. l2_trace_scope("dynamic lookup");
  330. l2_lexer_consume(lexer); // '.'
  331. l2_lexer_consume(lexer); // '('
  332. if (parse_expression(lexer, gen, err) < 0) {
  333. return -1;
  334. }
  335. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) != L2_TOK_CLOSE_PAREN) {
  336. l2_parse_err(err, tok, "Expected '(', got %s",
  337. l2_token_get_name(tok));
  338. return -1;
  339. }
  340. l2_lexer_consume(lexer); // ')'
  341. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_EQUALS) {
  342. l2_lexer_consume(lexer); // '='
  343. if (parse_expression(lexer, gen, err) < 0) {
  344. return -1;
  345. }
  346. l2_gen_dynamic_set(gen);
  347. } else {
  348. l2_gen_dynamic_lookup(gen);
  349. }
  350. } else {
  351. break;
  352. }
  353. }
  354. return 0;
  355. }
  356. static int parse_func_call_after_base(
  357. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  358. l2_trace_scope("func call after base");
  359. size_t argc = 0;
  360. do {
  361. if (tok_is_infix(l2_lexer_peek(lexer, 1))) {
  362. do {
  363. // We already have one value (the lhs) on the stack,
  364. // so we need to parse the operator, then the rhs
  365. // Operator
  366. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  367. return -1;
  368. }
  369. // RHS
  370. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  371. return -1;
  372. }
  373. l2_gen_func_call_infix(gen);
  374. } while (tok_is_infix(l2_lexer_peek(lexer, 1)));
  375. // If this was the "first argument", this wasn't a function call
  376. // after all, it was just a (series of?) infix calls.
  377. if (argc == 0) {
  378. return 0;
  379. }
  380. // Don't increment argc here, because after an infix, we have
  381. // neither added nor removed an arguemnt, just transformed one
  382. } else {
  383. l2_trace_scope("func call param");
  384. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  385. return -1;
  386. }
  387. argc += 1;
  388. }
  389. } while (!tok_is_end(l2_lexer_peek(lexer, 1)));
  390. // The 'argc' previous expressions were arguments, the one before that was the function
  391. l2_gen_func_call(gen, argc);
  392. return 0;
  393. }
  394. static int parse_expression(
  395. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  396. l2_trace_scope("expression");
  397. struct l2_token *tok = l2_lexer_peek(lexer, 1);
  398. struct l2_token *tok2 = l2_lexer_peek(lexer, 2);
  399. if (
  400. l2_token_get_kind(tok) == L2_TOK_IDENT &&
  401. l2_token_get_kind(tok2) == L2_TOK_COLON_EQ) {
  402. l2_trace_scope("assign expression");
  403. l2_trace("ident '%s'", tok->v.str);
  404. struct l2_token_value ident = l2_token_extract_val(tok);
  405. l2_lexer_consume(lexer); // ident
  406. l2_lexer_consume(lexer); // :=
  407. if (parse_expression(lexer, gen, err) < 0) {
  408. if (!(ident.flags & L2_TOK_SMALL)) free(ident.str);
  409. return -1;
  410. }
  411. if (ident.flags & L2_TOK_SMALL) {
  412. l2_gen_stack_frame_set_copy(gen, ident.strbuf);
  413. } else {
  414. l2_gen_stack_frame_set(gen, &ident.str);
  415. }
  416. } else if (
  417. l2_token_get_kind(tok) == L2_TOK_IDENT &&
  418. l2_token_get_kind(tok2) == L2_TOK_EQUALS) {
  419. l2_trace_scope("replacement assign expression");
  420. l2_trace("ident '%s'", tok->v.str);
  421. struct l2_token_value ident = l2_token_extract_val(tok);
  422. l2_lexer_consume(lexer); // ident
  423. l2_lexer_consume(lexer); // =
  424. if (parse_expression(lexer, gen, err) < 0) {
  425. if (!(ident.flags & L2_TOK_SMALL)) free(ident.str);
  426. return -1;
  427. }
  428. if (ident.flags & L2_TOK_SMALL) {
  429. l2_gen_stack_frame_replace_copy(gen, ident.strbuf);
  430. } else {
  431. l2_gen_stack_frame_replace(gen, &ident.str);
  432. }
  433. } else {
  434. if (parse_arg_level_expression(lexer, gen, err) < 0) {
  435. return -1;
  436. }
  437. if (!tok_is_end(l2_lexer_peek(lexer, 1))) {
  438. if (parse_func_call_after_base(lexer, gen, err) < 0) {
  439. return -1;
  440. }
  441. }
  442. }
  443. return 0;
  444. }
  445. int l2_parse_program(
  446. struct l2_lexer *lexer, struct l2_generator *gen, struct l2_parse_error *err) {
  447. l2_trace_scope("program");
  448. while (1) {
  449. l2_lexer_skip_opt(lexer, L2_TOK_EOL);
  450. if (l2_token_get_kind(l2_lexer_peek(lexer, 1)) == L2_TOK_EOF) {
  451. break;
  452. }
  453. if (parse_expression(lexer, gen, err) < 0) {
  454. l2_gen_halt(gen);
  455. l2_gen_flush(gen);
  456. return -1;
  457. }
  458. l2_gen_discard(gen);
  459. }
  460. l2_gen_halt(gen);
  461. l2_gen_flush(gen);
  462. return 0;
  463. }