|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181 |
- #include <stdlib.h>
- #include <stdio.h>
- #include <string.h>
- #include <regex.h>
-
- #include "lexer.h"
-
- struct token_reg
- {
- regex_t reg;
- lexer_token_class tokenClass;
- int matched;
- regmatch_t pmatch;
- };
-
- static struct token_reg regex(const char* str, lexer_token_class tokenClass)
- {
- regex_t reg;
- int err = regcomp(®, str, REG_EXTENDED);
- if (err)
- {
- fprintf(stderr, "Invalid regex: '%s'\n", str);
- fprintf(stderr, "Error code: %i\n", err);
- exit(1);
- }
-
- struct token_reg res =
- {
- reg,
- tokenClass,
- 0
- };
-
- return res;
- }
-
- static int regmatch(regex_t reg, char* str, regmatch_t* pmatch)
- {
- return (regexec(®, str, 1, pmatch, 0) != REG_NOMATCH);
- }
-
- static void tokens_append(
- lexer_tokens* tokens,
- lexer_token_class tokenClass,
- char* str,
- size_t len)
- {
- puts("hi");
- tokens->length += 1;
- if (tokens->length > tokens->allocd)
- {
- printf("old allocd: %i\n", (int)(tokens->allocd));
- if (tokens->allocd)
- tokens->allocd *= 2;
- else
- tokens->allocd = 1;
-
- printf("new allocd: %i\n", (int)(tokens->allocd));
-
- tokens->pairs = realloc(tokens->pairs, tokens->allocd);
- if (!tokens->pairs)
- {
- fprintf(stderr, "Allocation error.");
- exit(1);
- }
- puts("realloced");
- }
-
- struct lexer_token pair =
- {
- tokenClass,
- str,
- len
- };
-
- printf("appending to %i\n", (int)(tokens->length - 1));
- tokens->pairs[tokens->length - 1] = pair;
- puts("appended");
-
- puts("bye\n");
- return;
- }
-
- lexer_tokens* lexer_analyze(char* str)
- {
- printf("str: %s\n", str);
-
- size_t len = strlen(str);
-
- struct token_reg tregs[] =
- {
- regex(LEXER_REGEX_WHITESPACE, LEXER_TOKEN_WHITESPACE),
- regex(LEXER_REGEX_IDENTIFIER, LEXER_TOKEN_IDENTIFIER),
- regex(LEXER_REGEX_KEYWORD, LEXER_TOKEN_KEYWORD),
- regex(LEXER_REGEX_OPERATOR, LEXER_TOKEN_OPERATOR),
-
- regex(LEXER_REGEX_INTEGER, LEXER_TOKEN_INTEGER),
- regex(LEXER_REGEX_STRING, LEXER_TOKEN_STRING),
-
- regex(LEXER_REGEX_COMMA, LEXER_TOKEN_COMMA),
- regex(LEXER_REGEX_OPENBRACE, LEXER_TOKEN_OPENBRACE),
- regex(LEXER_REGEX_CLOSEBRACE, LEXER_TOKEN_CLOSEBRACE),
- regex(LEXER_REGEX_OPENPAREN, LEXER_TOKEN_OPENPAREN),
- regex(LEXER_REGEX_CLOSEPAREN, LEXER_TOKEN_CLOSEPAREN)
- };
-
- lexer_tokens* tokens = malloc(sizeof(lexer_tokens));
- if (!tokens)
- return NULL;
-
- tokens->pairs = NULL;
- tokens->length = 0;
- tokens->allocd = 0;
-
- regmatch_t pmatch;
- size_t offset = 0;
-
- #define APPEND(treg) tokens_append( \
- tokens, \
- treg.tokenClass, \
- str + offset + treg.pmatch.rm_so, \
- treg.pmatch.rm_eo - treg.pmatch.rm_so \
- )
-
- #define LENGTH(treg) (treg.pmatch.rm_eo - treg.pmatch.rm_so)
-
- while (1)
- {
- size_t i;
-
- //Reset .matched property for all tregs
- for (i = 0; i < LEXER_TOKEN_NONE; ++i)
- {
- tregs[i].matched = 0;
- }
-
- //See which tregs match
- for (i = 0; i < LEXER_TOKEN_NONE; ++i)
- {
- struct token_reg treg = tregs[i];
- if (regmatch(treg.reg, str + offset, &pmatch))
- {
- treg.matched = 1;
- treg.pmatch = pmatch;
- }
- }
-
- //Find longest match
- struct
- {
- size_t length;
- size_t index;
- } longest;
- longest.length = -1;
- longest.index = -1;
- for (i = 0; i < LEXER_TOKEN_NONE; ++i)
- {
- struct token_reg treg = tregs[i];
- if (!treg.matched)
- continue;
-
- if (LENGTH(treg) >= longest.length)
- {
- longest.length = LENGTH(treg);
- longest.index = i;
- }
- }
-
- //Append longest match
- APPEND(tregs[longest.index]);
-
- offset += pmatch.rm_eo;
- if (offset >= len)
- break;
- }
-
- #undef APPEND
- #undef LENGTH
-
- return tokens;
- }
|