123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127 |
- #include <stdlib.h>
- #include <stdio.h>
- #include <string.h>
- #include <regex.h>
-
- #include "lexer.h"
-
- struct token_reg
- {
- regex_t reg;
- lexer_token_class tokenClass;
- };
-
- static struct token_reg regex(const char* str, lexer_token_class tokenClass)
- {
- regex_t reg;
- int err = regcomp(®, str, REG_EXTENDED);
- if (err)
- {
- fprintf(stderr, "Invalid regex: '%s'\n", str);
- fprintf(stderr, "Error code: %i\n", err);
- exit(1);
- }
-
- struct token_reg res =
- {
- reg,
- tokenClass
- };
-
- return res;
- }
-
- static int regmatch(regex_t reg, char* str, regmatch_t* pmatch)
- {
- return (regexec(®, str, 1, pmatch, 0) != REG_NOMATCH);
- }
-
- static int tokens_append(
- lexer_tokens* tokens,
- lexer_token_class tokenClass,
- char* str,
- size_t len)
- {
- puts("hi");
- tokens->length += 1;
- if (tokens->length > tokens->allocd)
- {
- if (tokens->allocd)
- tokens->allocd *= 2;
- else
- tokens->allocd = 1;
-
- tokens->pairs = realloc(tokens->pairs, tokens->allocd);
- if (!tokens->pairs)
- return 1;
- puts("realloced");
- }
-
- puts("appending");
- lexer_token* pair = &(tokens->pairs[tokens->length - 1]);
- pair->tokenClass = tokenClass;
- pair->str = str;
- pair->len = len;
-
- printf("class: %i, pair length: %i, tokens length: %i, allocd: %i, str: %s\n", pair->tokenClass, (int)(pair->len), (int)(tokens->length), (int)(tokens->length), pair->str);
-
- puts("bye\n");
- return 0;
- }
-
- lexer_tokens* lexer_analyze(char* str)
- {
- printf("str: %s\n", str);
-
- size_t len = strlen(str);
-
- struct token_reg tregs[] =
- {
- regex("\\s+", LEXER_TOKEN_WHITESPACE),
- regex("[a-zA-Z][a-zA-Z0-9]*", LEXER_TOKEN_IDENTIFIER),
- regex("if|ret|func|str|arr|err|null", LEXER_TOKEN_KEYWORD),
- regex("=|==", LEXER_TOKEN_OPERATOR),
- regex("\\,", LEXER_TOKEN_SEPARATOR),
-
- regex("[0-9]+", LEXER_TOKEN_INTEGER),
- regex("\\\"[^\\\"]*\\\"", LEXER_TOKEN_STRING),
-
- regex("\\{", LEXER_TOKEN_FUNCTION_START),
- regex("\\}", LEXER_TOKEN_FUNCTION_END),
- regex("\\(", LEXER_TOKEN_EXPRESSION_START),
- regex("\\)", LEXER_TOKEN_EXPRESSION_END)
- };
-
- lexer_tokens* tokens = malloc(sizeof(lexer_tokens));
- if (!tokens)
- return NULL;
-
- tokens->pairs = NULL;
- tokens->length = 0;
- tokens->allocd = 0;
-
- regmatch_t pmatch;
- size_t offset = 0;
-
- #define APPEND(tclass) tokens_append(tokens, tclass, str + offset + pmatch.rm_so, pmatch.rm_eo - pmatch.rm_so)
-
- while (1)
- {
- size_t i;
- for (i = 0; i < LEXER_TOKEN_NONE; ++i)
- {
- struct token_reg treg = tregs[i];
- if (regmatch(treg.reg, str + offset, &pmatch))
- {
- APPEND(treg.tokenClass);
- break;
- }
- }
-
- offset += pmatch.rm_eo;
- if (offset >= len)
- break;
- }
-
- return tokens;
- }
|