#include #include #include #include #include "lexer.h" struct token_reg { regex_t reg; lexer_token_class tokenClass; int matched; regmatch_t pmatch; }; static struct token_reg regex(const char* str, lexer_token_class tokenClass) { regex_t reg; int err = regcomp(®, str, REG_EXTENDED); if (err) { fprintf(stderr, "Invalid regex: '%s'\n", str); fprintf(stderr, "Error code: %i\n", err); exit(1); } struct token_reg res = { reg, tokenClass, 0 }; return res; } static int regmatch(regex_t reg, char* str, regmatch_t* pmatch) { return (regexec(®, str, 1, pmatch, 0) != REG_NOMATCH); } static void tokens_append( lexer_tokens* tokens, lexer_token_class tokenClass, char* str, size_t len) { puts("hi"); tokens->length += 1; if (tokens->length > tokens->allocd) { printf("old allocd: %i\n", (int)(tokens->allocd)); if (tokens->allocd) tokens->allocd *= 2; else tokens->allocd = 1; printf("new allocd: %i\n", (int)(tokens->allocd)); tokens->pairs = realloc(tokens->pairs, tokens->allocd); if (!tokens->pairs) { fprintf(stderr, "Allocation error."); exit(1); } puts("realloced"); } struct lexer_token pair = { tokenClass, str, len }; printf("appending to %i\n", (int)(tokens->length - 1)); tokens->pairs[tokens->length - 1] = pair; puts("appended"); puts("bye\n"); return; } lexer_tokens* lexer_analyze(char* str) { printf("str: %s\n", str); size_t len = strlen(str); struct token_reg tregs[] = { regex(LEXER_REGEX_WHITESPACE, LEXER_TOKEN_WHITESPACE), regex(LEXER_REGEX_IDENTIFIER, LEXER_TOKEN_IDENTIFIER), regex(LEXER_REGEX_KEYWORD, LEXER_TOKEN_KEYWORD), regex(LEXER_REGEX_OPERATOR, LEXER_TOKEN_OPERATOR), regex(LEXER_REGEX_INTEGER, LEXER_TOKEN_INTEGER), regex(LEXER_REGEX_STRING, LEXER_TOKEN_STRING), regex(LEXER_REGEX_COMMA, LEXER_TOKEN_COMMA), regex(LEXER_REGEX_OPENBRACE, LEXER_TOKEN_OPENBRACE), regex(LEXER_REGEX_CLOSEBRACE, LEXER_TOKEN_CLOSEBRACE), regex(LEXER_REGEX_OPENPAREN, LEXER_TOKEN_OPENPAREN), regex(LEXER_REGEX_CLOSEPAREN, LEXER_TOKEN_CLOSEPAREN) }; lexer_tokens* tokens = malloc(sizeof(lexer_tokens)); if (!tokens) return NULL; tokens->pairs = NULL; tokens->length = 0; tokens->allocd = 0; regmatch_t pmatch; size_t offset = 0; #define APPEND(treg) tokens_append( \ tokens, \ treg.tokenClass, \ str + offset + treg.pmatch.rm_so, \ treg.pmatch.rm_eo - treg.pmatch.rm_so \ ) #define LENGTH(treg) (treg.pmatch.rm_eo - treg.pmatch.rm_so) while (1) { size_t i; //Reset .matched property for all tregs for (i = 0; i < LEXER_TOKEN_NONE; ++i) { tregs[i].matched = 0; } //See which tregs match for (i = 0; i < LEXER_TOKEN_NONE; ++i) { struct token_reg treg = tregs[i]; if (regmatch(treg.reg, str + offset, &pmatch)) { treg.matched = 1; treg.pmatch = pmatch; } } //Find longest match struct { size_t length; size_t index; } longest; longest.length = -1; longest.index = -1; for (i = 0; i < LEXER_TOKEN_NONE; ++i) { struct token_reg treg = tregs[i]; if (!treg.matched) continue; if (LENGTH(treg) >= longest.length) { longest.length = LENGTH(treg); longest.index = i; } } //Append longest match APPEND(tregs[longest.index]); offset += pmatch.rm_eo; if (offset >= len) break; } #undef APPEND #undef LENGTH return tokens; }