You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

lexer.c 2.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. #include <stdlib.h>
  2. #include <stdio.h>
  3. #include <string.h>
  4. #include <regex.h>
  5. #include "lexer.h"
  6. struct token_reg
  7. {
  8. regex_t reg;
  9. lexer_token_class tokenClass;
  10. };
  11. static struct token_reg regex(const char* str, lexer_token_class tokenClass)
  12. {
  13. regex_t reg;
  14. int err = regcomp(&reg, str, REG_EXTENDED);
  15. if (err)
  16. {
  17. fprintf(stderr, "Invalid regex: '%s'\n", str);
  18. fprintf(stderr, "Error code: %i\n", err);
  19. exit(1);
  20. }
  21. struct token_reg res =
  22. {
  23. reg,
  24. tokenClass
  25. };
  26. return res;
  27. }
  28. static int regmatch(regex_t reg, char* str, regmatch_t* pmatch)
  29. {
  30. return (regexec(&reg, str, 1, pmatch, 0) != REG_NOMATCH);
  31. }
  32. static int tokens_append(
  33. lexer_tokens* tokens,
  34. lexer_token_class tokenClass,
  35. char* str,
  36. size_t len)
  37. {
  38. puts("hi");
  39. tokens->length += 1;
  40. if (tokens->length > tokens->allocd)
  41. {
  42. if (tokens->allocd)
  43. tokens->allocd *= 2;
  44. else
  45. tokens->allocd = 1;
  46. tokens->pairs = realloc(tokens->pairs, tokens->allocd);
  47. if (!tokens->pairs)
  48. return 1;
  49. puts("realloced");
  50. }
  51. puts("appending");
  52. lexer_token* pair = &(tokens->pairs[tokens->length - 1]);
  53. pair->tokenClass = tokenClass;
  54. pair->str = str;
  55. pair->len = len;
  56. printf("class: %i, pair length: %i, tokens length: %i, allocd: %i, str: %s\n", pair->tokenClass, (int)(pair->len), (int)(tokens->length), (int)(tokens->length), pair->str);
  57. puts("bye\n");
  58. return 0;
  59. }
  60. lexer_tokens* lexer_analyze(char* str)
  61. {
  62. printf("str: %s\n", str);
  63. size_t len = strlen(str);
  64. struct token_reg tregs[] =
  65. {
  66. regex("\\s+", LEXER_TOKEN_WHITESPACE),
  67. regex("[a-zA-Z][a-zA-Z0-9]*", LEXER_TOKEN_IDENTIFIER),
  68. regex("if|ret|func|str|arr|err|null", LEXER_TOKEN_KEYWORD),
  69. regex("=|==", LEXER_TOKEN_OPERATOR),
  70. regex("\\,", LEXER_TOKEN_SEPARATOR),
  71. regex("[0-9]+", LEXER_TOKEN_INTEGER),
  72. regex("\\\"[^\\\"]*\\\"", LEXER_TOKEN_STRING),
  73. regex("\\{", LEXER_TOKEN_FUNCTION_START),
  74. regex("\\}", LEXER_TOKEN_FUNCTION_END),
  75. regex("\\(", LEXER_TOKEN_EXPRESSION_START),
  76. regex("\\)", LEXER_TOKEN_EXPRESSION_END)
  77. };
  78. lexer_tokens* tokens = malloc(sizeof(lexer_tokens));
  79. if (!tokens)
  80. return NULL;
  81. tokens->pairs = NULL;
  82. tokens->length = 0;
  83. tokens->allocd = 0;
  84. regmatch_t pmatch;
  85. size_t offset = 0;
  86. #define APPEND(tclass) tokens_append(tokens, tclass, str + offset + pmatch.rm_so, pmatch.rm_eo - pmatch.rm_so)
  87. while (1)
  88. {
  89. size_t i;
  90. for (i = 0; i < LEXER_TOKEN_NONE; ++i)
  91. {
  92. struct token_reg treg = tregs[i];
  93. if (regmatch(treg.reg, str + offset, &pmatch))
  94. {
  95. APPEND(treg.tokenClass);
  96. break;
  97. }
  98. }
  99. offset += pmatch.rm_eo;
  100. if (offset >= len)
  101. break;
  102. }
  103. return tokens;
  104. }