|
|
@@ -1,10 +1,17 @@ |
|
|
|
#include <stdlib.h> |
|
|
|
#include <stdio.h> |
|
|
|
#include <string.h> |
|
|
|
#include <regex.h> |
|
|
|
|
|
|
|
#include "lexer.h" |
|
|
|
|
|
|
|
static regex_t regex(const char* str) |
|
|
|
struct token_reg |
|
|
|
{ |
|
|
|
regex_t reg; |
|
|
|
lexer_token_class tokenClass; |
|
|
|
}; |
|
|
|
|
|
|
|
static struct token_reg regex(const char* str, lexer_token_class tokenClass) |
|
|
|
{ |
|
|
|
regex_t reg; |
|
|
|
int err = regcomp(®, str, REG_EXTENDED); |
|
|
@@ -15,7 +22,13 @@ static regex_t regex(const char* str) |
|
|
|
exit(1); |
|
|
|
} |
|
|
|
|
|
|
|
return reg; |
|
|
|
struct token_reg res = |
|
|
|
{ |
|
|
|
reg, |
|
|
|
tokenClass |
|
|
|
}; |
|
|
|
|
|
|
|
return res; |
|
|
|
} |
|
|
|
|
|
|
|
static int regmatch(regex_t reg, char* str, regmatch_t* pmatch) |
|
|
@@ -29,38 +42,55 @@ static int tokens_append( |
|
|
|
char* str, |
|
|
|
size_t len) |
|
|
|
{ |
|
|
|
puts("hi"); |
|
|
|
tokens->length += 1; |
|
|
|
if (tokens->length > tokens->allocd) |
|
|
|
{ |
|
|
|
tokens->allocd *= 2; |
|
|
|
if (tokens->allocd) |
|
|
|
tokens->allocd *= 2; |
|
|
|
else |
|
|
|
tokens->allocd = 1; |
|
|
|
|
|
|
|
tokens->pairs = realloc(tokens->pairs, tokens->allocd); |
|
|
|
if (!tokens->pairs) |
|
|
|
return 1; |
|
|
|
puts("realloced"); |
|
|
|
} |
|
|
|
|
|
|
|
lexer_token pair = tokens->pairs[tokens->length - 1]; |
|
|
|
pair.tokenClass = tokenClass; |
|
|
|
pair.str = str; |
|
|
|
pair.len = len; |
|
|
|
puts("appending"); |
|
|
|
lexer_token* pair = &(tokens->pairs[tokens->length - 1]); |
|
|
|
pair->tokenClass = tokenClass; |
|
|
|
pair->str = str; |
|
|
|
pair->len = len; |
|
|
|
|
|
|
|
printf("class: %i, pair length: %i, tokens length: %i, allocd: %i, str: %s\n", pair->tokenClass, (int)(pair->len), (int)(tokens->length), (int)(tokens->length), pair->str); |
|
|
|
|
|
|
|
puts("bye\n"); |
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
lexer_tokens* lexer_analyze(char* str) |
|
|
|
{ |
|
|
|
regex_t whitespace = regex("\\s+"); |
|
|
|
regex_t identifier = regex("[a-zA-Z][a-zA-Z0-9]*"); |
|
|
|
regex_t keyword = regex("if|ret|func|str|arr|err|null"); |
|
|
|
regex_t operator = regex("=|=="); |
|
|
|
regex_t separator = regex("\\,"); |
|
|
|
printf("str: %s\n", str); |
|
|
|
|
|
|
|
regex_t integer = regex("[0-9]+"); |
|
|
|
regex_t string = regex("\\\"[^\\\"]*\\\""); |
|
|
|
size_t len = strlen(str); |
|
|
|
|
|
|
|
regex_t function_start = regex("\\{"); |
|
|
|
regex_t function_end = regex("\\}"); |
|
|
|
regex_t expression_start = regex("\\("); |
|
|
|
regex_t expression_end = regex("\\)"); |
|
|
|
struct token_reg tregs[] = |
|
|
|
{ |
|
|
|
regex("\\s+", LEXER_TOKEN_WHITESPACE), |
|
|
|
regex("[a-zA-Z][a-zA-Z0-9]*", LEXER_TOKEN_IDENTIFIER), |
|
|
|
regex("if|ret|func|str|arr|err|null", LEXER_TOKEN_KEYWORD), |
|
|
|
regex("=|==", LEXER_TOKEN_OPERATOR), |
|
|
|
regex("\\,", LEXER_TOKEN_SEPARATOR), |
|
|
|
|
|
|
|
regex("[0-9]+", LEXER_TOKEN_INTEGER), |
|
|
|
regex("\\\"[^\\\"]*\\\"", LEXER_TOKEN_STRING), |
|
|
|
|
|
|
|
regex("\\{", LEXER_TOKEN_FUNCTION_START), |
|
|
|
regex("\\}", LEXER_TOKEN_FUNCTION_END), |
|
|
|
regex("\\(", LEXER_TOKEN_EXPRESSION_START), |
|
|
|
regex("\\)", LEXER_TOKEN_EXPRESSION_END) |
|
|
|
}; |
|
|
|
|
|
|
|
lexer_tokens* tokens = malloc(sizeof(lexer_tokens)); |
|
|
|
if (!tokens) |
|
|
@@ -77,65 +107,21 @@ lexer_tokens* lexer_analyze(char* str) |
|
|
|
|
|
|
|
while (1) |
|
|
|
{ |
|
|
|
if (regmatch(whitespace, str + offset, &pmatch)) |
|
|
|
{ |
|
|
|
if (APPEND(LEXER_TOKEN_WHITESPACE)) |
|
|
|
return NULL; |
|
|
|
} |
|
|
|
else if (regmatch(identifier, str + offset, &pmatch)) |
|
|
|
{ |
|
|
|
if (APPEND(LEXER_TOKEN_IDENTIFIER)) |
|
|
|
return NULL; |
|
|
|
} |
|
|
|
else if (regmatch(keyword, str + offset, &pmatch)) |
|
|
|
{ |
|
|
|
if (APPEND(LEXER_TOKEN_KEYWORD)) |
|
|
|
return NULL; |
|
|
|
} else if (regmatch(operator, str + offset, &pmatch)) |
|
|
|
{ |
|
|
|
if (APPEND(LEXER_TOKEN_OPERATOR)) |
|
|
|
return NULL; |
|
|
|
} |
|
|
|
else if (regmatch(separator, str + offset, &pmatch)) |
|
|
|
{ |
|
|
|
if (APPEND(LEXER_TOKEN_SEPARATOR)) |
|
|
|
return NULL; |
|
|
|
} |
|
|
|
else if (regmatch(integer, str + offset, &pmatch)) |
|
|
|
{ |
|
|
|
if (APPEND(LEXER_TOKEN_INTEGER)) |
|
|
|
return NULL; |
|
|
|
} |
|
|
|
else if (regmatch(string, str + offset, &pmatch)) |
|
|
|
{ |
|
|
|
if (APPEND(LEXER_TOKEN_STRING)) |
|
|
|
return NULL; |
|
|
|
} |
|
|
|
else if (regmatch(function_start, str + offset, &pmatch)) |
|
|
|
{ |
|
|
|
if (APPEND(LEXER_TOKEN_FUNCTION_START)) |
|
|
|
return NULL; |
|
|
|
} |
|
|
|
else if (regmatch(function_end, str + offset, &pmatch)) |
|
|
|
size_t i; |
|
|
|
for (i = 0; i < LEXER_TOKEN_NONE; ++i) |
|
|
|
{ |
|
|
|
if (APPEND(LEXER_TOKEN_FUNCTION_END)) |
|
|
|
return NULL; |
|
|
|
} |
|
|
|
else if (regmatch(expression_start, str + offset, &pmatch)) |
|
|
|
{ |
|
|
|
if (APPEND(LEXER_TOKEN_EXPRESSION_START)) |
|
|
|
return NULL; |
|
|
|
} |
|
|
|
else if (regmatch(expression_end, str + offset, &pmatch)) |
|
|
|
{ |
|
|
|
if (APPEND(LEXER_TOKEN_EXPRESSION_END)) |
|
|
|
return NULL; |
|
|
|
} |
|
|
|
else |
|
|
|
{ |
|
|
|
return tokens; |
|
|
|
struct token_reg treg = tregs[i]; |
|
|
|
if (regmatch(treg.reg, str + offset, &pmatch)) |
|
|
|
{ |
|
|
|
APPEND(treg.tokenClass); |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
offset += pmatch.rm_eo; |
|
|
|
if (offset >= len) |
|
|
|
break; |
|
|
|
} |
|
|
|
|
|
|
|
return tokens; |
|
|
|
} |