Browse Source

completely rewritten parser

feature/new-parser
Martin Dørum 3 years ago
parent
commit
1d20362ef4
5 changed files with 342 additions and 332 deletions
  1. 1
    1
      Makefile
  2. 2
    2
      cmd/main.cc
  3. 297
    303
      lib/BXParser.cc
  4. 40
    24
      lib/BXParser.h
  5. 2
    2
      lib/CompileStep.cc

+ 1
- 1
Makefile View File

lib/toolchain.h lib/bufio.h lib/toolchain.h lib/bufio.h
BUILD = build BUILD = build
OBJS = $(patsubst %,$(BUILD)/%.o,$(SRCS)) OBJS = $(patsubst %,$(BUILD)/%.o,$(SRCS))
CFLAGS = -std=c++14 -Wall -Wextra -Wno-unused-parameter -O3 -g
CFLAGS = -std=c++14 -Wall -Wextra -Wno-unused-parameter -Ilib -O3 -g
LDLIBS = -lpthread LDLIBS = -lpthread


$(BUILD)/%.cc.o: %.cc $(HDRS) $(BUILD)/%.cc.o: %.cc $(HDRS)

+ 2
- 2
cmd/main.cc View File

BXVariables variables; BXVariables variables;
if (sys::fileExists(path + "/.config.bx")) { if (sys::fileExists(path + "/.config.bx")) {
bufio::IFStream f(path + "/.config.bx"); bufio::IFStream f(path + "/.config.bx");
BXParser parser(f, BXParser::FLAG_NONE);
BXParser parser(f);
parser.parse(variables); parser.parse(variables);
} }


for (auto &pair: kwargs) { for (auto &pair: kwargs) {
bufio::ISStream ss(pair.second); bufio::ISStream ss(pair.second);
BXParser parser(ss, BXParser::FLAG_NONE);
BXParser parser(ss);
auto &list = variables[pair.first]; auto &list = variables[pair.first];
list.clear(); list.clear();
parser.parseList(variables, list); parser.parseList(variables, list);

+ 297
- 303
lib/BXParser.cc View File

return c; return c;
} }


BXParser::Operator BXParser::readOperator() {
int ch2 = peek(2);
if (peek() == ':' && ch2 == '=') {
skip(); // ':'
skip(); // '='
return Operator::COLON_EQUALS;
} else if (peek() == '+' && ch2 == '=') {
skip(); // '+'
skip(); // '='
return Operator::PLUS_EQUALS;
} else if (peek() == '=' && ch2 == '+') {
skip(); // '='
skip(); // '+'
return Operator::EQUALS_PLUS;
} else if (peek() == '|' && ch2 == '=') {
skip(); // '|'
skip(); // '='
return Operator::BAR_EQUALS;
}

return Operator::NONE;
}

void BXParser::skip(char expected) { void BXParser::skip(char expected) {
int ch = get(); int ch = get();
if (ch == EOF) { if (ch == EOF) {
throw BXParseError(std::to_string(line_) + ":" + std::to_string(ch_) + ": " + msg); throw BXParseError(std::to_string(line_) + ":" + std::to_string(ch_) + ": " + msg);
} }


static bool isWhitespace(int ch) {
if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n')
return true;
return false;
}

void BXParser::skipWhitespace() {
if (flags_ & FLAG_ONE_LINE) {
int ch;
while (isWhitespace(ch = peek()) && ch != '\r' && ch != '\n')
get();
} else {
while (isWhitespace(peek()))
get();
}
}
[[noreturn]] void BXParser::error(std::string msg, TokenKind kind) {
switch (kind) {
case TokenKind::E_O_F:
msg += " EOF";
break;


char BXParser::parseEscape() {
skip(); // '\'
int ch;
switch (ch = get()) {
case EOF:
error("Unexpected EOF");
case TokenKind::INDENTATION:
msg += " indentation";
break;


case 'n':
return '\n';
case TokenKind::NEWLINE:
msg += " newline";
break;


case 'r':
return '\r';
case TokenKind::COMMA:
msg += " comma ','";
break;


case 't':
return '\t';
case TokenKind::COLON_EQUALS:
msg += " colon equals ':='";
break;


default:
return (char)ch;
}
}
case TokenKind::PLUS_EQUALS:
msg += " plus equals '+='";
break;


static void appendVariableToString(
const BXVariables &vars, std::string &name,
std::string &value) {
if (name.size() == 0)
return;

auto it = vars.find(name);
if (it == vars.end())
return;

auto &vec = it->second;
bool first = true;
for (auto &part: vec) {
if (!first) {
value += ' ';
}
case TokenKind::EQUALS_PLUS:
msg += " equals plus '=+'";
break;


first = false;
value += part;
}
}
case TokenKind::BAR_EQUALS:
msg += " bar equals '|='";
break;


static void appendVariableToArray(
const BXVariables &vars, const std::string &name,
std::vector<std::string> &values) {
if (name.size() == 0)
return;
case TokenKind::EXPANSION:
msg += " expansion";
break;


auto it = vars.find(name);
if (it == vars.end())
return;
case TokenKind::STRING:
msg += " string";
break;


auto &vec = it->second;
for (auto &part: vec) {
values.push_back(part);
case TokenKind::NONE:
msg += " none";
break;
} }
}

void BXParser::parseExpansion(const BXVariables &vars, std::vector<std::string> &values) {
skip(); // '$'

std::string str;
switch (peek()) {
case '{':
skip();
parseString(vars, str, '}');
skip('}');
appendVariableToArray(vars, str, values);
break;

default:
if (!parseIdentifier(str)) {
error("No identifier after $.");
}


appendVariableToArray(vars, str, values);
break;
}
error(msg);
} }


void BXParser::parseQuotedExpansion(const BXVariables &vars, std::string &content) {
skip(); // '$'

std::string BXParser::readIdent(const BXVariables &vars) {
std::string str; std::string str;
switch (peek()) {
case '{':
skip();
parseString(vars, str, '}');
skip('}');
appendVariableToString(vars, str, content);
break;

default:
if (!parseIdentifier(str)) {
error("No identifier after $.");
}

appendVariableToString(vars, str, content);
break;
}
}

void BXParser::parseQuotedString(const BXVariables &vars, std::string &content) {
skip(); // '"'


int ch; int ch;
while ((ch = peek()) != EOF) { while ((ch = peek()) != EOF) {
switch (ch) {
case EOF:
error("Unexpected EOF");

case '\\':
content.push_back(parseEscape());
break;

case '$':
parseQuotedExpansion(vars, content);
break;

case '"':
skip();
return;

default:
content.push_back(get());
break;
if (
(ch >= 'a' && ch <= 'z') ||
(ch >= 'A' && ch <= 'Z') ||
(ch == '_')) {
str.push_back(ch);
get();
} else {
break;
} }
} }

return str;
} }


bool BXParser::parseString(const BXVariables &vars, std::string &content, int sep) {
bool success = false;
void BXParser::skipWhitespace() {
int ch; int ch;
while (1) {
ch = peek();
if ((sep > 0 && ch == sep) || isWhitespace(ch)) {
return success;
}

switch (ch) {
case EOF:
return success;

case '\\':
content.push_back(parseEscape());
success = true;
break;

case '$':
parseQuotedExpansion(vars, content);
success = true;
break;

case '"':
parseQuotedString(vars, content);
success = true;
break;

default:
int ch2 = peek(2);
if (
(ch == ':' && ch2 == '=') ||
(ch == '+' && ch2 == '=') ||
(ch == '=' && ch2 == '+') ||
(ch == '|' && ch2 == '=')) {
return success;
}

content.push_back(get());
success = true;
break;
}
while ((ch = peek()) == ' ' || ch == '\t' || ch == '\r' || ch == '\n') {
get();
} }
} }


bool BXParser::parseIdentifier(std::string &content) {
int ch = peek();
if (!(
(ch >= 'a' && ch <= 'z') ||
(ch >= 'A' && ch <= 'Z') ||
(ch == '_'))) {
return false;
}

content += get();

while (1) {
ch = peek();
if (!(
(ch >= '0' && ch <= '9') ||
(ch >= 'a' && ch <= 'z') ||
(ch >= 'A' && ch <= 'Z') ||
(ch == '_'))) {
return true;
}

content += get();
char BXParser::readEscape() {
int ch = get();
if (ch == EOF) {
error("Unexpected EOF");
} else if (ch == 'n') {
return '\n';
} else if (ch == 'r') {
return '\r';
} else if (ch == 't') {
return '\t';
} else {
return (char)ch;
} }
} }


void BXParser::parse(BXVariables &vars) {
std::string key, value;
std::vector<std::string> values;

skipWhitespace();
if (!parseString(vars, key)) {
return;
std::string BXParser::readStringExpansion(const BXVariables &vars) {
bool braced = peek() == '{';
std::string key;
if (braced) {
get();
skipWhitespace();
key = readString(vars);
} else {
key = readIdent(vars);
} }


skipWhitespace();
Operator prevOper = readOperator();
if (prevOper == Operator::NONE) {
error("Expected operator.");
auto it = vars.find(key);
if (it == vars.end()) {
error("Key '" + key + "' doesn't exist");
} }


auto doAssignment = [&] {
switch (prevOper) {
case Operator::COLON_EQUALS:
vars[key] = std::move(values);
values.clear();
break;
if (braced) {
skipWhitespace();


case Operator::PLUS_EQUALS:
{
auto &vec = vars[key];
vec.reserve(vec.size() + values.size());
for (size_t i = 0; i < values.size(); ++i) {
vec.push_back(std::move(values[i]));
}
}
values.clear();
break;
if (peek() != '}') {
error("Expected a '}' after a '${' expansion");
}


case Operator::EQUALS_PLUS:
{
auto &vec = vars[key];
vec.reserve(vec.size() + values.size());
for (size_t i = 0; i < vec.size(); ++i) {
values.push_back(std::move(vec[i]));
}
vec = std::move(values);
}
values.clear();
break;
get();
}


case Operator::BAR_EQUALS:
{
auto &vec = vars[key];
for (size_t i = 0; i < vec.size(); ++i) {
bool exists = false;
for (auto &val: values) {
if (val == vec[i]) {
exists = true;
break;
}
}
// TODO: Use BXValue.asString()
return it->second[0];
}


if (!exists) {
values.push_back(std::move(vec[i]));
}
}
vec = std::move(values);
}
values.clear();
break;
std::string BXParser::readQuotedString(const BXVariables &vars) {
std::string str;


case Operator::NONE:
int ch;
while ((ch = peek()) != EOF) {
if (ch == '\\') {
get();
str.push_back(readEscape());
} else if (ch == '$') {
get();
str += readStringExpansion(vars);
} else if (ch == '"') {
get();
break; break;
} else {
str.push_back(ch);
get();
} }
};
}


while (true) {
skipWhitespace();
return str;
}

std::string BXParser::readString(const BXVariables &vars) {
std::string str;


// Parse next value
if (peek() == '$') {
parseExpansion(vars, values);
value.clear();
continue; // We can't have an assignment after an expansion
} else if (!parseString(vars, value)) {
int ch;
while ((ch = peek()) != EOF) {
if (ch == '\\') {
get();
str.push_back(readEscape());
} else if (ch == '$') {
get();
str += readStringExpansion(vars);
} else if (ch == '"') {
get();
str += readQuotedString(vars);
} else if (
ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n' ||
ch == '}' || ch == ',') {
break; break;
} else {
str.push_back(ch);
get();
} }
}


skipWhitespace();
return str;
}


// If there's an operator next, the value we just read was a actually a key.
// Otherwise, it was just another value.
Operator op = readOperator();
if (op == Operator::NONE) {
values.push_back(std::move(value));
value.clear();
} else {
if (value.size() == 0) {
error("Expected string before assignment operator");
}
BXParser::Token BXParser::getToken(const BXVariables &vars) {
Token tok;
tok.line = line();
tok.ch = ch();


doAssignment();
prevOper = op;
key = std::move(value);
value.clear();
}
int ch = peek();

if (ch == EOF) {
tok.kind = TokenKind::E_O_F;
return tok;
} }


doAssignment();
}
if (ch == '\t' || ch == ' ') {
tok.kind = TokenKind::INDENTATION;
do {
get();
ch = peek();
} while (ch == '\t' || ch == ' ');
return tok;
} else if (ch == '\n' || ch == '\r') {
tok.kind = TokenKind::NEWLINE;
do {
get();
ch = peek();
} while (ch == '\n' || ch == '\r');
return tok;
}


void BXParser::parseList(const BXVariables &vars, std::vector<std::string> &values) {
while (true) {
int ch2 = peek(2);
if (ch == ',') {
get();
tok.kind = TokenKind::COMMA;
} else if (ch == ':' && ch2 == '=') {
get(); get();
tok.kind = TokenKind::COLON_EQUALS;
} else if (ch == '+' && ch2 == '=') {
get(); get();
tok.kind = TokenKind::PLUS_EQUALS;
} else if (ch == '=' && ch2 == '+') {
get(); get();
tok.kind = TokenKind::EQUALS_PLUS;
} else if (ch == '|' && ch2 == '=') {
get(); get();
tok.kind = TokenKind::BAR_EQUALS;
} else if (ch == '$' && ch2 == '{') {
get(); get();
skipWhitespace(); skipWhitespace();
std::string value;
if (!parseString(vars, value)) {
break;
tok.kind = TokenKind::EXPANSION;
tok.str = readString(vars);
skipWhitespace();
if (peek() != '}') {
error("Expected a '}' after a '${' expansion.");
} }
get();
} else if (ch == '$') {
get();
tok.kind = TokenKind::EXPANSION;
tok.str = readString(vars);
} else {
tok.kind = TokenKind::STRING;
tok.str = readString(vars);
}


values.push_back(std::move(value));
while ((ch = peek()) == '\t' || ch == ' ') {
get();
} }

return tok;
}

BXParser::Token BXParser::readToken(const BXVariables &vars) {
Token t = tok_;
tok_ = getToken(vars);
return t;
} }


void BXWriter::escape(const std::string &str) { void BXWriter::escape(const std::string &str) {
buf_.put('\n'); buf_.put('\n');
} }
} }

void BXParser::parse(BXVariables &vars, bool oneLine) {
readToken(vars);

while (true) {
if (peekToken().kind == TokenKind::E_O_F) {
break;
} else if (peekToken().kind != TokenKind::STRING) {
error("Expected string, got", peekToken().kind);
}

Token t = readToken(vars);
std::string key = t.str;
std::vector<std::string> &var = vars[key];

void (*addVal)(std::vector<std::string> &var, std::string val);
switch (peekToken().kind) {
case TokenKind::COLON_EQUALS:
var.clear();

// Fallthrough
case TokenKind::PLUS_EQUALS:
addVal = [](auto &var, auto val) {
var.push_back(std::move(val));
};
break;

case TokenKind::EQUALS_PLUS:
addVal = [](auto &var, auto val) {
var.insert(var.begin(), std::move(val));
};
break;

case TokenKind::BAR_EQUALS:
addVal = [](auto &var, auto val) {
for (auto &v: var) {
if (v == val) {
return;
}
}

var.push_back(val);
};
break;

default:
error("Expected operator, got", peekToken().kind);
}
readToken(vars);

parseList(vars, var, addVal, oneLine);
}
}

void BXParser::parseList(
BXVariables &vars, std::vector<std::string> &var,
void (*addVal)(std::vector<std::string> &var, std::string val),
bool oneLine) {
while (true) {
Token tok = peekToken();
switch (tok.kind) {
case TokenKind::NEWLINE:
if (oneLine) {
return;
}

readToken(vars);
if (peekToken().kind != TokenKind::INDENTATION) {
return;
}

readToken(vars); // Read indentation
break;

case TokenKind::STRING:
addVal(var, std::move(tok.str));
readToken(vars);
break;

case TokenKind::COMMA:
readToken(vars);
return;

case TokenKind::E_O_F:
return;

case TokenKind::EXPANSION:
for (auto &v: vars[tok.str]) {
addVal(var, v);
}
readToken(vars);
break;

default:
error("Unexpected token", tok.kind);
}
}
}

void BXParser::parseList(BXVariables &vars, std::vector<std::string> &var) {
auto addVal = [](auto &var, auto val) {
var.push_back(std::move(val));
};

parseList(vars, var, addVal, false);
}

+ 40
- 24
lib/BXParser.h View File



class BXParser { class BXParser {
public: public:
static const int FLAG_NONE = 0;
static const int FLAG_ONE_LINE = 1 << 0;
enum class TokenKind {
E_O_F,
INDENTATION,
NEWLINE,
COMMA,
COLON_EQUALS,
PLUS_EQUALS,
EQUALS_PLUS,
BAR_EQUALS,
EXPANSION,
STRING,
NONE,
};


BXParser(bufio::IStream &stream, int flags = FLAG_NONE, int line = 1, int ch = 1):
flags_(flags), line_(line), ch_(ch), buf_(stream) {}
struct Token {
TokenKind kind;
std::string str;
int line;
int ch;
};


void parse(BXVariables &vars);
void parseList(const BXVariables &vars, std::vector<std::string> &values);
BXParser(bufio::IStream &stream, int line = 1, int ch = 1):
line_(line), ch_(ch), buf_(stream) {}


int get(); int get();
int peek(size_t count = 1) { return buf_.peek(count); } int peek(size_t count = 1) { return buf_.peek(count); }
int line() const { return line_; } int line() const { return line_; }
int ch() const { return ch_; } int ch() const { return ch_; }


private:
enum class Operator {
COLON_EQUALS,
PLUS_EQUALS,
EQUALS_PLUS,
BAR_EQUALS,
NONE,
};
Token readToken(const BXVariables &vars);
Token &peekToken() { return tok_; }

void parse(BXVariables &vars, bool oneLine = false);
void parseLine(BXVariables &vars) { parse(vars, true); }
void parseList(BXVariables &vars, std::vector<std::string> &list);


private:
[[noreturn]] void error(std::string); [[noreturn]] void error(std::string);
[[noreturn]] void error(std::string, TokenKind);


Operator readOperator();
void skipWhitespaceLine();
std::string readIdent(const BXVariables &vars);
void skipWhitespace(); void skipWhitespace();
Token getToken(const BXVariables &vars);
std::string readString(const BXVariables &vars);
std::string readQuotedString(const BXVariables &vars);
std::string readStringExpansion(const BXVariables &vars);
char readEscape();
void parseList(
BXVariables &vars, std::vector<std::string> &var,
void (*addVal)(std::vector<std::string> &var, std::string val),
bool oneLine);


char parseEscape();
void parseExpansion(const BXVariables &vars, std::vector<std::string> &values);
void parseQuotedExpansion(const BXVariables &vars, std::string &content);
void parseQuotedString(const BXVariables &vars, std::string &content);
bool parseString(const BXVariables &vars, std::string &content, int sep = -1);
bool parseIdentifier(std::string &content);

int flags_;
int line_; int line_;
int ch_; int ch_;
Token tok_;


bufio::IBuf<> buf_; bufio::IBuf<> buf_;
}; };

+ 2
- 2
lib/CompileStep.cc View File

} }


bufio::IFStream f(path_); bufio::IFStream f(path_);
BXParser parser(f, BXParser::FLAG_ONE_LINE);
BXParser parser(f);


while (parser.peek() != EOF) { while (parser.peek() != EOF) {
if (startsWith(parser, "//#bx")) { if (startsWith(parser, "//#bx")) {
parser.parse(variables_);
parser.parseLine(variables_);
} else { } else {
while (parser.peek() != EOF && parser.get() != '\n'); while (parser.peek() != EOF && parser.get() != '\n');
} }

Loading…
Cancel
Save