|
|
@@ -9,6 +9,8 @@ struct token_reg |
|
|
|
{ |
|
|
|
regex_t reg; |
|
|
|
lexer_token_class tokenClass; |
|
|
|
int matched; |
|
|
|
regmatch_t pmatch; |
|
|
|
}; |
|
|
|
|
|
|
|
static struct token_reg regex(const char* str, lexer_token_class tokenClass) |
|
|
@@ -25,7 +27,8 @@ static struct token_reg regex(const char* str, lexer_token_class tokenClass) |
|
|
|
struct token_reg res = |
|
|
|
{ |
|
|
|
reg, |
|
|
|
tokenClass |
|
|
|
tokenClass, |
|
|
|
0 |
|
|
|
}; |
|
|
|
|
|
|
|
return res; |
|
|
@@ -103,25 +106,67 @@ lexer_tokens* lexer_analyze(char* str) |
|
|
|
regmatch_t pmatch; |
|
|
|
size_t offset = 0; |
|
|
|
|
|
|
|
#define APPEND(tclass) tokens_append(tokens, tclass, str + offset + pmatch.rm_so, pmatch.rm_eo - pmatch.rm_so) |
|
|
|
#define APPEND(treg) tokens_append( \ |
|
|
|
tokens, \ |
|
|
|
treg.tokenClass, \ |
|
|
|
str + offset + treg.pmatch.rm_so, \ |
|
|
|
treg.pmatch.rm_eo - treg.pmatch.rm_so \ |
|
|
|
) |
|
|
|
|
|
|
|
#define LENGTH(treg) (treg.pmatch.rm_eo - treg.pmatch.rm_so) |
|
|
|
|
|
|
|
while (1) |
|
|
|
{ |
|
|
|
size_t i; |
|
|
|
|
|
|
|
//Reset .matched property for all tregs |
|
|
|
for (i = 0; i < LEXER_TOKEN_NONE; ++i) |
|
|
|
{ |
|
|
|
tregs[i].matched = 0; |
|
|
|
} |
|
|
|
|
|
|
|
//See which tregs match |
|
|
|
for (i = 0; i < LEXER_TOKEN_NONE; ++i) |
|
|
|
{ |
|
|
|
struct token_reg treg = tregs[i]; |
|
|
|
if (regmatch(treg.reg, str + offset, &pmatch)) |
|
|
|
{ |
|
|
|
APPEND(treg.tokenClass); |
|
|
|
break; |
|
|
|
treg.matched = 1; |
|
|
|
treg.pmatch = pmatch; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
//Find longest match |
|
|
|
struct |
|
|
|
{ |
|
|
|
size_t length; |
|
|
|
size_t index; |
|
|
|
} longest; |
|
|
|
longest.length = -1; |
|
|
|
longest.index = -1; |
|
|
|
for (i = 0; i < LEXER_TOKEN_NONE; ++i) |
|
|
|
{ |
|
|
|
struct token_reg treg = tregs[i]; |
|
|
|
if (!treg.matched) |
|
|
|
continue; |
|
|
|
|
|
|
|
if (LENGTH(treg) >= longest.length) |
|
|
|
{ |
|
|
|
longest.length = LENGTH(treg); |
|
|
|
longest.index = i; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
//Append longest match |
|
|
|
APPEND(tregs[longest.index]); |
|
|
|
|
|
|
|
offset += pmatch.rm_eo; |
|
|
|
if (offset >= len) |
|
|
|
break; |
|
|
|
} |
|
|
|
|
|
|
|
#undef APPEND |
|
|
|
#undef LENGTH |
|
|
|
|
|
|
|
return tokens; |
|
|
|
} |