You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lex.c 5.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
  1. #include "parse/lex.h"
  2. #include <stdlib.h>
  3. static int parse_number(const char *str, double *num) {
  4. size_t len = strlen(str);
  5. *num = 0;
  6. int power = 1;
  7. for (int i = (int)len - 1; i >= 0; --i) {
  8. char ch = str[i];
  9. if (ch >= '0' && ch <= '9') {
  10. *num += (ch - '0') * power;
  11. power *= 10;
  12. } else {
  13. return -1;
  14. }
  15. }
  16. return 0;
  17. }
  18. const char *l2_token_kind_name(enum l2_token_kind kind) {
  19. switch (kind) {
  20. case L2_TOK_OPEN_PAREN:
  21. return "open-paren";
  22. case L2_TOK_CLOSE_PAREN:
  23. return "close-paren";
  24. case L2_TOK_OPEN_BRACE:
  25. return "open-brace";
  26. case L2_TOK_CLOSE_BRACE:
  27. return "close-brace";
  28. case L2_TOK_OPEN_BRACKET:
  29. return "open-bracket";
  30. case L2_TOK_CLOSE_BRACKET:
  31. return "close-bracket";
  32. case L2_TOK_COMMA:
  33. return "comma";
  34. case L2_TOK_PERIOD:
  35. return "period";
  36. case L2_TOK_COLON_EQ:
  37. return "period";
  38. case L2_TOK_EOF:
  39. return "end-of-file";
  40. case L2_TOK_NUMBER:
  41. return "number";
  42. case L2_TOK_STRING:
  43. return "string";
  44. case L2_TOK_IDENT:
  45. return "ident";
  46. case L2_TOK_ERROR:
  47. return "error";
  48. }
  49. }
  50. void l2_token_free(struct l2_token *tok) {
  51. if (tok->kind == L2_TOK_STRING) {
  52. free(tok->v.str);
  53. }
  54. }
  55. char *l2_token_extract_str(struct l2_token *tok) {
  56. char *str = tok->v.str;
  57. tok->v.str = NULL;
  58. return str;
  59. }
  60. void l2_lexer_init(struct l2_lexer *lexer, struct l2_io_reader *r) {
  61. lexer->currtok.kind = L2_TOK_EOF,
  62. lexer->tokidx = 0;
  63. lexer->line = 1;
  64. lexer->ch = 1;
  65. l2_bufio_reader_init(&lexer->reader, r);
  66. }
  67. static int peek_ch(struct l2_lexer *lexer) {
  68. int ch = l2_bufio_peek(&lexer->reader, 1);
  69. return ch;
  70. }
  71. static int read_ch(struct l2_lexer *lexer) {
  72. int ch = l2_bufio_get(&lexer->reader);
  73. lexer->ch += 1;
  74. if (ch == '\n') {
  75. lexer->ch = 1;
  76. lexer->line += 1;
  77. }
  78. return ch;
  79. }
  80. static int is_whitespace(int ch) {
  81. return ch == ' ' || ch == '\r' || ch == '\n' || ch == '\t';
  82. }
  83. static void skip_whitespace(struct l2_lexer *lexer) {
  84. while (is_whitespace(l2_bufio_peek(&lexer->reader, 1))) read_ch(lexer);
  85. }
  86. static void read_string(struct l2_lexer *lexer, struct l2_token *tok) {
  87. tok->kind = L2_TOK_STRING;
  88. tok->v.str = malloc(16);
  89. if (tok->v.str == NULL) {
  90. tok->kind = L2_TOK_ERROR;
  91. tok->v.str = "Allocaton failure";
  92. return;
  93. }
  94. size_t size = 16;
  95. size_t idx = 0;
  96. while (1) {
  97. int ch = read_ch(lexer);
  98. if (ch == '"') {
  99. return;
  100. } else if (ch == EOF) {
  101. tok->kind = L2_TOK_EOF;
  102. free(tok->v.str);
  103. tok->v.str = "Unexpected EOF";
  104. return;
  105. } else if (ch == '\\') {
  106. int ch2 = read_ch(lexer);
  107. switch (ch2) {
  108. case 'n':
  109. ch = '\n';
  110. break;
  111. case 'r':
  112. ch = '\r';
  113. break;
  114. case 't':
  115. ch = '\t';
  116. break;
  117. case EOF:
  118. tok->kind = L2_TOK_EOF;
  119. free(tok->v.str);
  120. tok->v.str = "Unexpected EOF";
  121. return;
  122. default:
  123. ch = ch2;
  124. break;
  125. }
  126. }
  127. tok->v.str[idx++] = (char)ch;
  128. if (idx >= size) {
  129. size *= 2;
  130. char *newbuf = realloc(tok->v.str, size);
  131. if (newbuf == NULL) {
  132. free(tok->v.str);
  133. tok->kind = L2_TOK_ERROR;
  134. tok->v.str = "Allocation failure";
  135. return;
  136. }
  137. tok->v.str = newbuf;
  138. }
  139. }
  140. }
  141. static void read_ident(struct l2_lexer *lexer, struct l2_token *tok) {
  142. tok->kind = L2_TOK_IDENT;
  143. tok->v.str = malloc(16);
  144. if (tok->v.str == NULL) {
  145. tok->kind = L2_TOK_ERROR;
  146. tok->v.str = "Allocaton failure";
  147. return;
  148. }
  149. size_t size = 16;
  150. size_t idx = 0;
  151. while (1) {
  152. int ch = peek_ch(lexer);
  153. if (is_whitespace(ch)) {
  154. return;
  155. }
  156. switch (ch) {
  157. case '(':
  158. case ')':
  159. case '{':
  160. case '}':
  161. case '[':
  162. case ']':
  163. case ',':
  164. case '.':
  165. case ':':
  166. case EOF:
  167. return;
  168. }
  169. tok->v.str[idx++] = (char)read_ch(lexer);
  170. if (idx >= size) {
  171. size *= 2;
  172. char *newbuf = realloc(tok->v.str, size);
  173. if (newbuf == NULL) {
  174. free(tok->v.str);
  175. tok->kind = L2_TOK_ERROR;
  176. tok->v.str = "Allocation failure";
  177. return;
  178. }
  179. tok->v.str = newbuf;
  180. }
  181. }
  182. }
  183. static void read_tok(struct l2_lexer *lexer, struct l2_token *tok) {
  184. skip_whitespace(lexer);
  185. tok->line = lexer->line;
  186. tok->ch = lexer->ch;
  187. int ch = peek_ch(lexer);
  188. switch (ch) {
  189. case '(':
  190. read_ch(lexer);
  191. tok->kind = L2_TOK_OPEN_PAREN;
  192. break;
  193. case ')':
  194. read_ch(lexer);
  195. tok->kind = L2_TOK_CLOSE_PAREN;
  196. break;
  197. case '{':
  198. read_ch(lexer);
  199. tok->kind = L2_TOK_OPEN_BRACE;
  200. break;
  201. case '}':
  202. read_ch(lexer);
  203. tok->kind = L2_TOK_CLOSE_BRACE;
  204. break;
  205. case '[':
  206. read_ch(lexer);
  207. tok->kind = L2_TOK_OPEN_BRACKET;
  208. break;
  209. case ']':
  210. read_ch(lexer);
  211. tok->kind = L2_TOK_CLOSE_BRACKET;
  212. break;
  213. case ',':
  214. read_ch(lexer);
  215. tok->kind = L2_TOK_COMMA;
  216. break;
  217. case '.':
  218. read_ch(lexer);
  219. tok->kind = L2_TOK_PERIOD;
  220. break;
  221. case ':':
  222. read_ch(lexer);
  223. {
  224. ch = read_ch(lexer);
  225. switch (ch) {
  226. case '=':
  227. tok->kind = L2_TOK_COLON_EQ;
  228. break;
  229. default:
  230. tok->kind = L2_TOK_ERROR;
  231. tok->v.str = "Unexpected character";
  232. break;
  233. }
  234. }
  235. break;
  236. case EOF:
  237. tok->kind = L2_TOK_EOF;
  238. break;
  239. case '"':
  240. read_ch(lexer);
  241. read_string(lexer, tok);
  242. break;
  243. default:
  244. read_ident(lexer, tok);
  245. if (tok->kind != L2_TOK_IDENT) {
  246. break;
  247. }
  248. double num;
  249. if (parse_number(tok->v.str, &num) >= 0) {
  250. free(tok->v.str);
  251. tok->kind = L2_TOK_NUMBER;
  252. tok->v.num = num;
  253. }
  254. break;
  255. }
  256. }
  257. struct l2_token *l2_lexer_peek(struct l2_lexer *lexer, int count) {
  258. int offset = count - 1;
  259. while (offset >= lexer->tokidx) {
  260. read_tok(lexer, &lexer->toks[lexer->tokidx++]);
  261. }
  262. return &lexer->toks[offset];
  263. }
  264. struct l2_token *l2_lexer_get(struct l2_lexer *lexer) {
  265. l2_token_free(&lexer->currtok);
  266. if (lexer->tokidx == 0) {
  267. read_tok(lexer, &lexer->currtok);
  268. } else {
  269. memmove(lexer->toks, lexer->toks + 1, lexer->tokidx - 1);
  270. lexer->tokidx -= 1;
  271. }
  272. return &lexer->currtok;
  273. }