You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lex.c 3.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211
  1. #include "parse/lex.h"
  2. #include <stdlib.h>
  3. const char *l2_token_kind_name(enum l2_token_kind kind) {
  4. switch (kind) {
  5. case L2_TOK_OPEN_PAREN:
  6. return "open-paren";
  7. case L2_TOK_CLOSE_PAREN:
  8. return "close-paren";
  9. case L2_TOK_OPEN_BRACE:
  10. return "open-brace";
  11. case L2_TOK_CLOSE_BRACE:
  12. return "close-brace";
  13. case L2_TOK_OPEN_BRACKET:
  14. return "open-bracket";
  15. case L2_TOK_CLOSE_BRACKET:
  16. return "close-bracket";
  17. case L2_TOK_COMMA:
  18. return "comma";
  19. case L2_TOK_PERIOD:
  20. return "period";
  21. case L2_TOK_EOF:
  22. return "end-of-file";
  23. case L2_TOK_NUMBER:
  24. return "number";
  25. case L2_TOK_STRING:
  26. return "string";
  27. case L2_TOK_ERROR:
  28. return "error";
  29. }
  30. }
  31. void l2_token_free(struct l2_token *tok) {
  32. if (tok->kind == L2_TOK_STRING) {
  33. free(tok->v.str);
  34. }
  35. }
  36. struct l2_token l2_token_move(struct l2_token *tok) {
  37. struct l2_token dup = *tok;
  38. if (tok->kind == L2_TOK_STRING) {
  39. tok->v.str = NULL;
  40. }
  41. return dup;
  42. }
  43. void l2_lexer_init(struct l2_lexer *lexer, struct l2_io_reader *r) {
  44. lexer->currtok.kind = L2_TOK_EOF,
  45. lexer->tokidx = 0;
  46. lexer->line = 1;
  47. lexer->ch = 1;
  48. l2_bufio_reader_init(&lexer->reader, r);
  49. }
  50. static int read_ch(struct l2_lexer *lexer) {
  51. int ch = l2_bufio_get(&lexer->reader);
  52. lexer->ch += 1;
  53. if (ch == '\n') {
  54. lexer->ch = 1;
  55. lexer->line += 1;
  56. }
  57. return ch;
  58. }
  59. static int is_whitespace(int ch) {
  60. return ch == ' ' || ch == '\r' || ch == '\n' || ch == '\t';
  61. }
  62. static void skip_whitespace(struct l2_lexer *lexer) {
  63. while (is_whitespace(l2_bufio_peek(&lexer->reader, 1))) read_ch(lexer);
  64. }
  65. static void read_string(struct l2_lexer *lexer, struct l2_token *tok) {
  66. tok->kind = L2_TOK_STRING;
  67. tok->v.str = malloc(16);
  68. if (tok->v.str == NULL) {
  69. tok->kind = L2_TOK_ERROR;
  70. tok->v.str = "Allocaton failure";
  71. return;
  72. }
  73. size_t size = 16;
  74. size_t idx = 0;
  75. while (1) {
  76. int ch = read_ch(lexer);
  77. if (ch == '"') {
  78. return;
  79. } else if (ch == EOF) {
  80. tok->kind = L2_TOK_EOF;
  81. free(tok->v.str);
  82. tok->v.str = "Unexpected EOF";
  83. return;
  84. } else if (ch == '\\') {
  85. int ch2 = read_ch(lexer);
  86. switch (ch2) {
  87. case 'n':
  88. ch = '\n';
  89. break;
  90. case 'r':
  91. ch = '\r';
  92. break;
  93. case 't':
  94. ch = '\t';
  95. break;
  96. case EOF:
  97. tok->kind = L2_TOK_EOF;
  98. free(tok->v.str);
  99. tok->v.str = "Unexpected EOF";
  100. return;
  101. default:
  102. ch = ch2;
  103. break;
  104. }
  105. }
  106. tok->v.str[idx++] = (char)ch;
  107. if (idx >= size) {
  108. size *= 2;
  109. char *newbuf = realloc(tok->v.str, size);
  110. if (newbuf == NULL) {
  111. free(tok->v.str);
  112. tok->kind = L2_TOK_ERROR;
  113. tok->v.str = "Allocation failure";
  114. return;
  115. }
  116. tok->v.str = newbuf;
  117. }
  118. }
  119. }
  120. static void read_tok(struct l2_lexer *lexer, struct l2_token *tok) {
  121. skip_whitespace(lexer);
  122. tok->line = lexer->line;
  123. tok->ch = lexer->ch;
  124. int ch = read_ch(lexer);
  125. switch (ch) {
  126. case '(':
  127. tok->kind = L2_TOK_OPEN_PAREN;
  128. break;
  129. case ')':
  130. tok->kind = L2_TOK_CLOSE_PAREN;
  131. break;
  132. case '{':
  133. tok->kind = L2_TOK_OPEN_BRACE;
  134. break;
  135. case '}':
  136. tok->kind = L2_TOK_CLOSE_BRACE;
  137. break;
  138. case '[':
  139. tok->kind = L2_TOK_OPEN_BRACKET;
  140. break;
  141. case ']':
  142. tok->kind = L2_TOK_CLOSE_BRACKET;
  143. break;
  144. case ',':
  145. tok->kind = L2_TOK_COMMA;
  146. break;
  147. case '.':
  148. tok->kind = L2_TOK_PERIOD;
  149. break;
  150. case EOF:
  151. tok->kind = L2_TOK_EOF;
  152. break;
  153. case '"':
  154. read_string(lexer, tok);
  155. break;
  156. }
  157. }
  158. struct l2_token *l2_lexer_peek(struct l2_lexer *lexer, int count) {
  159. int offset = count - 1;
  160. while (offset >= lexer->tokidx) {
  161. read_tok(lexer, &lexer->toks[lexer->tokidx++]);
  162. }
  163. return &lexer->toks[offset];
  164. }
  165. struct l2_token *l2_lexer_get(struct l2_lexer *lexer) {
  166. l2_token_free(&lexer->currtok);
  167. if (lexer->tokidx == 0) {
  168. read_tok(lexer, &lexer->currtok);
  169. } else {
  170. memmove(lexer->toks, lexer->toks + 1, lexer->tokidx - 1);
  171. lexer->tokidx -= 1;
  172. }
  173. return &lexer->currtok;
  174. }