#include #include #include #include "lexer.h" static regex_t regex(const char* str) { regex_t reg; int err = regcomp(®, str, REG_EXTENDED); if (err) { fprintf(stderr, "Invalid regex: '%s'\n", str); fprintf(stderr, "Error code: %i\n", err); exit(1); } return reg; } static int regmatch(regex_t reg, char* str, regmatch_t* pmatch) { return (regexec(®, str, 1, pmatch, 0) != REG_NOMATCH); } static int tokens_append( lexer_tokens* tokens, lexer_token_class tokenClass, char* str, size_t len) { tokens->length += 1; if (tokens->length > tokens->allocd) { tokens->allocd *= 2; tokens->pairs = realloc(tokens->pairs, tokens->allocd); if (!tokens->pairs) return 1; } lexer_token pair = tokens->pairs[tokens->length - 1]; pair.tokenClass = tokenClass; pair.str = str; pair.len = len; return 0; } lexer_tokens* lexer_analyze(char* str) { regex_t whitespace = regex("\\s+"); regex_t identifier = regex("[a-zA-Z][a-zA-Z0-9]*"); regex_t keyword = regex("if|ret|func|str|arr|err|null"); regex_t operator = regex("=|=="); regex_t separator = regex("\\,"); regex_t integer = regex("[0-9]+"); regex_t string = regex("\\\"[^\\\"]*\\\""); regex_t function_start = regex("\\{"); regex_t function_end = regex("\\}"); regex_t expression_start = regex("\\("); regex_t expression_end = regex("\\)"); lexer_tokens* tokens = malloc(sizeof(lexer_tokens)); if (!tokens) return NULL; tokens->pairs = NULL; tokens->length = 0; tokens->allocd = 0; regmatch_t pmatch; size_t offset = 0; #define APPEND(tclass) tokens_append(tokens, tclass, str + offset + pmatch.rm_so, pmatch.rm_eo - pmatch.rm_so) while (1) { if (regmatch(whitespace, str + offset, &pmatch)) { if (APPEND(LEXER_TOKEN_WHITESPACE)) return NULL; } else if (regmatch(identifier, str + offset, &pmatch)) { if (APPEND(LEXER_TOKEN_IDENTIFIER)) return NULL; } else if (regmatch(keyword, str + offset, &pmatch)) { if (APPEND(LEXER_TOKEN_KEYWORD)) return NULL; } else if (regmatch(operator, str + offset, &pmatch)) { if (APPEND(LEXER_TOKEN_OPERATOR)) return NULL; } else if (regmatch(separator, str + offset, &pmatch)) { if (APPEND(LEXER_TOKEN_SEPARATOR)) return NULL; } else if (regmatch(integer, str + offset, &pmatch)) { if (APPEND(LEXER_TOKEN_INTEGER)) return NULL; } else if (regmatch(string, str + offset, &pmatch)) { if (APPEND(LEXER_TOKEN_STRING)) return NULL; } else if (regmatch(function_start, str + offset, &pmatch)) { if (APPEND(LEXER_TOKEN_FUNCTION_START)) return NULL; } else if (regmatch(function_end, str + offset, &pmatch)) { if (APPEND(LEXER_TOKEN_FUNCTION_END)) return NULL; } else if (regmatch(expression_start, str + offset, &pmatch)) { if (APPEND(LEXER_TOKEN_EXPRESSION_START)) return NULL; } else if (regmatch(expression_end, str + offset, &pmatch)) { if (APPEND(LEXER_TOKEN_EXPRESSION_END)) return NULL; } else { return tokens; } offset += pmatch.rm_eo; } }