You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lex.c 3.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. #include "parse/lex.h"
  2. #include <stdlib.h>
  3. void l2_token_free(struct l2_token *tok) {
  4. if (tok->kind == L2_TOK_STRING) {
  5. free(tok->v.str);
  6. }
  7. }
  8. struct l2_token l2_token_move(struct l2_token *tok) {
  9. struct l2_token dup = *tok;
  10. if (tok->kind == L2_TOK_STRING) {
  11. tok->v.str = NULL;
  12. }
  13. return dup;
  14. }
  15. void l2_lexer_init(struct l2_lexer *lexer, struct l2_io_reader *r) {
  16. lexer->currtok.kind = L2_TOK_EOF,
  17. lexer->tokidx = 0;
  18. lexer->line = 1;
  19. lexer->ch = 1;
  20. l2_bufio_reader_init(&lexer->reader, r);
  21. }
  22. static int read_ch(struct l2_lexer *lexer) {
  23. int ch = l2_bufio_get(&lexer->reader);
  24. lexer->ch += 1;
  25. if (ch == '\n') {
  26. lexer->ch = 1;
  27. lexer->line += 1;
  28. }
  29. return ch;
  30. }
  31. static int is_whitespace(int ch) {
  32. return ch == ' ' || ch == '\r' || ch == '\n' || ch == '\t';
  33. }
  34. static void skip_whitespace(struct l2_lexer *lexer) {
  35. while (is_whitespace(l2_bufio_peek(&lexer->reader, 1))) read_ch(lexer);
  36. }
  37. static void read_string(struct l2_lexer *lexer, struct l2_token *tok) {
  38. tok->kind = L2_TOK_STRING;
  39. tok->v.str = malloc(16);
  40. if (tok->v.str == NULL) {
  41. tok->kind = L2_TOK_ERROR;
  42. tok->v.str = "Allocaton failure";
  43. return;
  44. }
  45. size_t size = 16;
  46. size_t idx = 0;
  47. while (1) {
  48. int ch = read_ch(lexer);
  49. if (ch == '"') {
  50. return;
  51. } else if (ch == EOF) {
  52. tok->kind = L2_TOK_EOF;
  53. free(tok->v.str);
  54. tok->v.str = "Unexpected EOF";
  55. return;
  56. } else if (ch == '\\') {
  57. int ch2 = read_ch(lexer);
  58. switch (ch2) {
  59. case 'n':
  60. ch = '\n';
  61. break;
  62. case 'r':
  63. ch = '\r';
  64. break;
  65. case 't':
  66. ch = '\t';
  67. break;
  68. case EOF:
  69. tok->kind = L2_TOK_EOF;
  70. free(tok->v.str);
  71. tok->v.str = "Unexpected EOF";
  72. return;
  73. default:
  74. ch = ch2;
  75. break;
  76. }
  77. }
  78. tok->v.str[idx++] = (char)ch;
  79. if (idx >= size) {
  80. size *= 2;
  81. char *newbuf = realloc(tok->v.str, size);
  82. if (newbuf == NULL) {
  83. free(tok->v.str);
  84. tok->kind = L2_TOK_ERROR;
  85. tok->v.str = "Allocation failure";
  86. return;
  87. }
  88. tok->v.str = newbuf;
  89. }
  90. }
  91. }
  92. static void read_tok(struct l2_lexer *lexer, struct l2_token *tok) {
  93. skip_whitespace(lexer);
  94. tok->line = lexer->line;
  95. tok->ch = lexer->ch;
  96. int ch = read_ch(lexer);
  97. switch (ch) {
  98. case '(':
  99. tok->kind = L2_TOK_OPEN_PAREN;
  100. break;
  101. case ')':
  102. tok->kind = L2_TOK_CLOSE_PAREN;
  103. break;
  104. case '{':
  105. tok->kind = L2_TOK_OPEN_BRACE;
  106. break;
  107. case '}':
  108. tok->kind = L2_TOK_CLOSE_BRACE;
  109. break;
  110. case '[':
  111. tok->kind = L2_TOK_OPEN_BRACKET;
  112. break;
  113. case ']':
  114. tok->kind = L2_TOK_CLOSE_BRACKET;
  115. break;
  116. case ',':
  117. tok->kind = L2_TOK_COMMA;
  118. break;
  119. case '.':
  120. tok->kind = L2_TOK_PERIOD;
  121. break;
  122. case EOF:
  123. tok->kind = L2_TOK_EOF;
  124. break;
  125. case '"':
  126. read_string(lexer, tok);
  127. break;
  128. }
  129. }
  130. struct l2_token *l2_lexer_peek(struct l2_lexer *lexer, int count) {
  131. int offset = count - 1;
  132. while (offset >= lexer->tokidx) {
  133. read_tok(lexer, &lexer->toks[lexer->tokidx++]);
  134. }
  135. return &lexer->toks[offset];
  136. }
  137. struct l2_token *l2_lexer_get(struct l2_lexer *lexer) {
  138. l2_token_free(&lexer->currtok);
  139. if (lexer->tokidx == 0) {
  140. read_tok(lexer, &lexer->currtok);
  141. } else {
  142. memmove(lexer->toks, lexer->toks + 1, lexer->tokidx - 1);
  143. lexer->tokidx -= 1;
  144. }
  145. return &lexer->currtok;
  146. }