#include #include #include #include #include "lexer.h" struct token_reg { regex_t reg; lexer_token_class tokenClass; int matched; regmatch_t pmatch; }; static struct token_reg regex(const char* str, lexer_token_class tokenClass) { regex_t reg; int err = regcomp(®, str, REG_EXTENDED); if (err) { fprintf(stderr, "Invalid regex: '%s'\n", str); fprintf(stderr, "Error code: %i\n", err); exit(1); } struct token_reg res = { reg, tokenClass, 0 }; return res; } static int regmatch(regex_t reg, char* str, regmatch_t* pmatch) { return (regexec(®, str, 1, pmatch, 0) != REG_NOMATCH); } static int tokens_append( lexer_tokens* tokens, lexer_token_class tokenClass, char* str, size_t len) { puts("hi"); tokens->length += 1; if (tokens->length > tokens->allocd) { if (tokens->allocd) tokens->allocd *= 2; else tokens->allocd = 1; tokens->pairs = realloc(tokens->pairs, tokens->allocd); if (!tokens->pairs) return 1; puts("realloced"); } puts("appending"); lexer_token* pair = &(tokens->pairs[tokens->length - 1]); pair->tokenClass = tokenClass; pair->str = str; pair->len = len; printf("class: %i, pair length: %i, tokens length: %i, allocd: %i, str: %s\n", pair->tokenClass, (int)(pair->len), (int)(tokens->length), (int)(tokens->length), pair->str); puts("bye\n"); return 0; } lexer_tokens* lexer_analyze(char* str) { printf("str: %s\n", str); size_t len = strlen(str); struct token_reg tregs[] = { regex("\\s+", LEXER_TOKEN_WHITESPACE), regex("[a-zA-Z][a-zA-Z0-9]*", LEXER_TOKEN_IDENTIFIER), regex("if|ret|func|str|arr|err|null", LEXER_TOKEN_KEYWORD), regex("=|==", LEXER_TOKEN_OPERATOR), regex("\\,", LEXER_TOKEN_SEPARATOR), regex("[0-9]+", LEXER_TOKEN_INTEGER), regex("\\\"[^\\\"]*\\\"", LEXER_TOKEN_STRING), regex("\\{", LEXER_TOKEN_FUNCTION_START), regex("\\}", LEXER_TOKEN_FUNCTION_END), regex("\\(", LEXER_TOKEN_EXPRESSION_START), regex("\\)", LEXER_TOKEN_EXPRESSION_END) }; lexer_tokens* tokens = malloc(sizeof(lexer_tokens)); if (!tokens) return NULL; tokens->pairs = NULL; tokens->length = 0; tokens->allocd = 0; regmatch_t pmatch; size_t offset = 0; #define APPEND(treg) tokens_append( \ tokens, \ treg.tokenClass, \ str + offset + treg.pmatch.rm_so, \ treg.pmatch.rm_eo - treg.pmatch.rm_so \ ) #define LENGTH(treg) (treg.pmatch.rm_eo - treg.pmatch.rm_so) while (1) { size_t i; //Reset .matched property for all tregs for (i = 0; i < LEXER_TOKEN_NONE; ++i) { tregs[i].matched = 0; } //See which tregs match for (i = 0; i < LEXER_TOKEN_NONE; ++i) { struct token_reg treg = tregs[i]; if (regmatch(treg.reg, str + offset, &pmatch)) { treg.matched = 1; treg.pmatch = pmatch; } } //Find longest match struct { size_t length; size_t index; } longest; longest.length = -1; longest.index = -1; for (i = 0; i < LEXER_TOKEN_NONE; ++i) { struct token_reg treg = tregs[i]; if (!treg.matched) continue; if (LENGTH(treg) >= longest.length) { longest.length = LENGTH(treg); longest.index = i; } } //Append longest match APPEND(tregs[longest.index]); offset += pmatch.rm_eo; if (offset >= len) break; } #undef APPEND #undef LENGTH return tokens; }