選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

lexer.c 3.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141
  1. #include <stdlib.h>
  2. #include <stdio.h>
  3. #include <regex.h>
  4. #include "lexer.h"
  5. static regex_t regex(const char* str)
  6. {
  7. regex_t reg;
  8. int err = regcomp(&reg, str, REG_EXTENDED);
  9. if (err)
  10. {
  11. fprintf(stderr, "Invalid regex: '%s'\n", str);
  12. fprintf(stderr, "Error code: %i\n", err);
  13. exit(1);
  14. }
  15. return reg;
  16. }
  17. static int regmatch(regex_t reg, char* str, regmatch_t* pmatch)
  18. {
  19. return (regexec(&reg, str, 1, pmatch, 0) != REG_NOMATCH);
  20. }
  21. static int tokens_append(
  22. lexer_tokens* tokens,
  23. lexer_token_class tokenClass,
  24. char* str,
  25. size_t len)
  26. {
  27. tokens->length += 1;
  28. if (tokens->length > tokens->allocd)
  29. {
  30. tokens->allocd *= 2;
  31. tokens->pairs = realloc(tokens->pairs, tokens->allocd);
  32. if (!tokens->pairs)
  33. return 1;
  34. }
  35. lexer_token pair = tokens->pairs[tokens->length - 1];
  36. pair.tokenClass = tokenClass;
  37. pair.str = str;
  38. pair.len = len;
  39. return 0;
  40. }
  41. lexer_tokens* lexer_analyze(char* str)
  42. {
  43. regex_t whitespace = regex("\\s+");
  44. regex_t identifier = regex("[a-zA-Z][a-zA-Z0-9]*");
  45. regex_t keyword = regex("if|ret|func|str|arr|err|null");
  46. regex_t operator = regex("=|==");
  47. regex_t separator = regex("\\,");
  48. regex_t integer = regex("[0-9]+");
  49. regex_t string = regex("\\\"[^\\\"]*\\\"");
  50. regex_t function_start = regex("\\{");
  51. regex_t function_end = regex("\\}");
  52. regex_t expression_start = regex("\\(");
  53. regex_t expression_end = regex("\\)");
  54. lexer_tokens* tokens = malloc(sizeof(lexer_tokens));
  55. if (!tokens)
  56. return NULL;
  57. tokens->pairs = NULL;
  58. tokens->length = 0;
  59. tokens->allocd = 0;
  60. regmatch_t pmatch;
  61. size_t offset = 0;
  62. #define APPEND(tclass) tokens_append(tokens, tclass, str + offset + pmatch.rm_so, pmatch.rm_eo - pmatch.rm_so)
  63. while (1)
  64. {
  65. if (regmatch(whitespace, str + offset, &pmatch))
  66. {
  67. if (APPEND(LEXER_TOKEN_WHITESPACE))
  68. return NULL;
  69. }
  70. else if (regmatch(identifier, str + offset, &pmatch))
  71. {
  72. if (APPEND(LEXER_TOKEN_IDENTIFIER))
  73. return NULL;
  74. }
  75. else if (regmatch(keyword, str + offset, &pmatch))
  76. {
  77. if (APPEND(LEXER_TOKEN_KEYWORD))
  78. return NULL;
  79. } else if (regmatch(operator, str + offset, &pmatch))
  80. {
  81. if (APPEND(LEXER_TOKEN_OPERATOR))
  82. return NULL;
  83. }
  84. else if (regmatch(separator, str + offset, &pmatch))
  85. {
  86. if (APPEND(LEXER_TOKEN_SEPARATOR))
  87. return NULL;
  88. }
  89. else if (regmatch(integer, str + offset, &pmatch))
  90. {
  91. if (APPEND(LEXER_TOKEN_INTEGER))
  92. return NULL;
  93. }
  94. else if (regmatch(string, str + offset, &pmatch))
  95. {
  96. if (APPEND(LEXER_TOKEN_STRING))
  97. return NULL;
  98. }
  99. else if (regmatch(function_start, str + offset, &pmatch))
  100. {
  101. if (APPEND(LEXER_TOKEN_FUNCTION_START))
  102. return NULL;
  103. }
  104. else if (regmatch(function_end, str + offset, &pmatch))
  105. {
  106. if (APPEND(LEXER_TOKEN_FUNCTION_END))
  107. return NULL;
  108. }
  109. else if (regmatch(expression_start, str + offset, &pmatch))
  110. {
  111. if (APPEND(LEXER_TOKEN_EXPRESSION_START))
  112. return NULL;
  113. }
  114. else if (regmatch(expression_end, str + offset, &pmatch))
  115. {
  116. if (APPEND(LEXER_TOKEN_EXPRESSION_END))
  117. return NULL;
  118. }
  119. else
  120. {
  121. return tokens;
  122. }
  123. offset += pmatch.rm_eo;
  124. }
  125. }