Browse Source

completely rewritten parser

feature/new-parser
Martin Dørum 3 years ago
parent
commit
1d20362ef4
5 changed files with 342 additions and 332 deletions
  1. 1
    1
      Makefile
  2. 2
    2
      cmd/main.cc
  3. 297
    303
      lib/BXParser.cc
  4. 40
    24
      lib/BXParser.h
  5. 2
    2
      lib/CompileStep.cc

+ 1
- 1
Makefile View File

@@ -8,7 +8,7 @@ HDRS = \
lib/toolchain.h lib/bufio.h
BUILD = build
OBJS = $(patsubst %,$(BUILD)/%.o,$(SRCS))
CFLAGS = -std=c++14 -Wall -Wextra -Wno-unused-parameter -O3 -g
CFLAGS = -std=c++14 -Wall -Wextra -Wno-unused-parameter -Ilib -O3 -g
LDLIBS = -lpthread

$(BUILD)/%.cc.o: %.cc $(HDRS)

+ 2
- 2
cmd/main.cc View File

@@ -43,13 +43,13 @@ static void run(std::vector<std::string> args, std::vector<std::pair<std::string
BXVariables variables;
if (sys::fileExists(path + "/.config.bx")) {
bufio::IFStream f(path + "/.config.bx");
BXParser parser(f, BXParser::FLAG_NONE);
BXParser parser(f);
parser.parse(variables);
}

for (auto &pair: kwargs) {
bufio::ISStream ss(pair.second);
BXParser parser(ss, BXParser::FLAG_NONE);
BXParser parser(ss);
auto &list = variables[pair.first];
list.clear();
parser.parseList(variables, list);

+ 297
- 303
lib/BXParser.cc View File

@@ -16,29 +16,6 @@ int BXParser::get() {
return c;
}

BXParser::Operator BXParser::readOperator() {
int ch2 = peek(2);
if (peek() == ':' && ch2 == '=') {
skip(); // ':'
skip(); // '='
return Operator::COLON_EQUALS;
} else if (peek() == '+' && ch2 == '=') {
skip(); // '+'
skip(); // '='
return Operator::PLUS_EQUALS;
} else if (peek() == '=' && ch2 == '+') {
skip(); // '='
skip(); // '+'
return Operator::EQUALS_PLUS;
} else if (peek() == '|' && ch2 == '=') {
skip(); // '|'
skip(); // '='
return Operator::BAR_EQUALS;
}

return Operator::NONE;
}

void BXParser::skip(char expected) {
int ch = get();
if (ch == EOF) {
@@ -52,340 +29,251 @@ void BXParser::skip(char expected) {
throw BXParseError(std::to_string(line_) + ":" + std::to_string(ch_) + ": " + msg);
}

static bool isWhitespace(int ch) {
if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n')
return true;
return false;
}

void BXParser::skipWhitespace() {
if (flags_ & FLAG_ONE_LINE) {
int ch;
while (isWhitespace(ch = peek()) && ch != '\r' && ch != '\n')
get();
} else {
while (isWhitespace(peek()))
get();
}
}
[[noreturn]] void BXParser::error(std::string msg, TokenKind kind) {
switch (kind) {
case TokenKind::E_O_F:
msg += " EOF";
break;

char BXParser::parseEscape() {
skip(); // '\'
int ch;
switch (ch = get()) {
case EOF:
error("Unexpected EOF");
case TokenKind::INDENTATION:
msg += " indentation";
break;

case 'n':
return '\n';
case TokenKind::NEWLINE:
msg += " newline";
break;

case 'r':
return '\r';
case TokenKind::COMMA:
msg += " comma ','";
break;

case 't':
return '\t';
case TokenKind::COLON_EQUALS:
msg += " colon equals ':='";
break;

default:
return (char)ch;
}
}
case TokenKind::PLUS_EQUALS:
msg += " plus equals '+='";
break;

static void appendVariableToString(
const BXVariables &vars, std::string &name,
std::string &value) {
if (name.size() == 0)
return;

auto it = vars.find(name);
if (it == vars.end())
return;

auto &vec = it->second;
bool first = true;
for (auto &part: vec) {
if (!first) {
value += ' ';
}
case TokenKind::EQUALS_PLUS:
msg += " equals plus '=+'";
break;

first = false;
value += part;
}
}
case TokenKind::BAR_EQUALS:
msg += " bar equals '|='";
break;

static void appendVariableToArray(
const BXVariables &vars, const std::string &name,
std::vector<std::string> &values) {
if (name.size() == 0)
return;
case TokenKind::EXPANSION:
msg += " expansion";
break;

auto it = vars.find(name);
if (it == vars.end())
return;
case TokenKind::STRING:
msg += " string";
break;

auto &vec = it->second;
for (auto &part: vec) {
values.push_back(part);
case TokenKind::NONE:
msg += " none";
break;
}
}

void BXParser::parseExpansion(const BXVariables &vars, std::vector<std::string> &values) {
skip(); // '$'

std::string str;
switch (peek()) {
case '{':
skip();
parseString(vars, str, '}');
skip('}');
appendVariableToArray(vars, str, values);
break;

default:
if (!parseIdentifier(str)) {
error("No identifier after $.");
}

appendVariableToArray(vars, str, values);
break;
}
error(msg);
}

void BXParser::parseQuotedExpansion(const BXVariables &vars, std::string &content) {
skip(); // '$'

std::string BXParser::readIdent(const BXVariables &vars) {
std::string str;
switch (peek()) {
case '{':
skip();
parseString(vars, str, '}');
skip('}');
appendVariableToString(vars, str, content);
break;

default:
if (!parseIdentifier(str)) {
error("No identifier after $.");
}

appendVariableToString(vars, str, content);
break;
}
}

void BXParser::parseQuotedString(const BXVariables &vars, std::string &content) {
skip(); // '"'

int ch;
while ((ch = peek()) != EOF) {
switch (ch) {
case EOF:
error("Unexpected EOF");

case '\\':
content.push_back(parseEscape());
break;

case '$':
parseQuotedExpansion(vars, content);
break;

case '"':
skip();
return;

default:
content.push_back(get());
break;
if (
(ch >= 'a' && ch <= 'z') ||
(ch >= 'A' && ch <= 'Z') ||
(ch == '_')) {
str.push_back(ch);
get();
} else {
break;
}
}

return str;
}

bool BXParser::parseString(const BXVariables &vars, std::string &content, int sep) {
bool success = false;
void BXParser::skipWhitespace() {
int ch;
while (1) {
ch = peek();
if ((sep > 0 && ch == sep) || isWhitespace(ch)) {
return success;
}

switch (ch) {
case EOF:
return success;

case '\\':
content.push_back(parseEscape());
success = true;
break;

case '$':
parseQuotedExpansion(vars, content);
success = true;
break;

case '"':
parseQuotedString(vars, content);
success = true;
break;

default:
int ch2 = peek(2);
if (
(ch == ':' && ch2 == '=') ||
(ch == '+' && ch2 == '=') ||
(ch == '=' && ch2 == '+') ||
(ch == '|' && ch2 == '=')) {
return success;
}

content.push_back(get());
success = true;
break;
}
while ((ch = peek()) == ' ' || ch == '\t' || ch == '\r' || ch == '\n') {
get();
}
}

bool BXParser::parseIdentifier(std::string &content) {
int ch = peek();
if (!(
(ch >= 'a' && ch <= 'z') ||
(ch >= 'A' && ch <= 'Z') ||
(ch == '_'))) {
return false;
}

content += get();

while (1) {
ch = peek();
if (!(
(ch >= '0' && ch <= '9') ||
(ch >= 'a' && ch <= 'z') ||
(ch >= 'A' && ch <= 'Z') ||
(ch == '_'))) {
return true;
}

content += get();
char BXParser::readEscape() {
int ch = get();
if (ch == EOF) {
error("Unexpected EOF");
} else if (ch == 'n') {
return '\n';
} else if (ch == 'r') {
return '\r';
} else if (ch == 't') {
return '\t';
} else {
return (char)ch;
}
}

void BXParser::parse(BXVariables &vars) {
std::string key, value;
std::vector<std::string> values;

skipWhitespace();
if (!parseString(vars, key)) {
return;
std::string BXParser::readStringExpansion(const BXVariables &vars) {
bool braced = peek() == '{';
std::string key;
if (braced) {
get();
skipWhitespace();
key = readString(vars);
} else {
key = readIdent(vars);
}

skipWhitespace();
Operator prevOper = readOperator();
if (prevOper == Operator::NONE) {
error("Expected operator.");
auto it = vars.find(key);
if (it == vars.end()) {
error("Key '" + key + "' doesn't exist");
}

auto doAssignment = [&] {
switch (prevOper) {
case Operator::COLON_EQUALS:
vars[key] = std::move(values);
values.clear();
break;
if (braced) {
skipWhitespace();

case Operator::PLUS_EQUALS:
{
auto &vec = vars[key];
vec.reserve(vec.size() + values.size());
for (size_t i = 0; i < values.size(); ++i) {
vec.push_back(std::move(values[i]));
}
}
values.clear();
break;
if (peek() != '}') {
error("Expected a '}' after a '${' expansion");
}

case Operator::EQUALS_PLUS:
{
auto &vec = vars[key];
vec.reserve(vec.size() + values.size());
for (size_t i = 0; i < vec.size(); ++i) {
values.push_back(std::move(vec[i]));
}
vec = std::move(values);
}
values.clear();
break;
get();
}

case Operator::BAR_EQUALS:
{
auto &vec = vars[key];
for (size_t i = 0; i < vec.size(); ++i) {
bool exists = false;
for (auto &val: values) {
if (val == vec[i]) {
exists = true;
break;
}
}
// TODO: Use BXValue.asString()
return it->second[0];
}

if (!exists) {
values.push_back(std::move(vec[i]));
}
}
vec = std::move(values);
}
values.clear();
break;
std::string BXParser::readQuotedString(const BXVariables &vars) {
std::string str;

case Operator::NONE:
int ch;
while ((ch = peek()) != EOF) {
if (ch == '\\') {
get();
str.push_back(readEscape());
} else if (ch == '$') {
get();
str += readStringExpansion(vars);
} else if (ch == '"') {
get();
break;
} else {
str.push_back(ch);
get();
}
};
}

while (true) {
skipWhitespace();
return str;
}

std::string BXParser::readString(const BXVariables &vars) {
std::string str;

// Parse next value
if (peek() == '$') {
parseExpansion(vars, values);
value.clear();
continue; // We can't have an assignment after an expansion
} else if (!parseString(vars, value)) {
int ch;
while ((ch = peek()) != EOF) {
if (ch == '\\') {
get();
str.push_back(readEscape());
} else if (ch == '$') {
get();
str += readStringExpansion(vars);
} else if (ch == '"') {
get();
str += readQuotedString(vars);
} else if (
ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n' ||
ch == '}' || ch == ',') {
break;
} else {
str.push_back(ch);
get();
}
}

skipWhitespace();
return str;
}

// If there's an operator next, the value we just read was a actually a key.
// Otherwise, it was just another value.
Operator op = readOperator();
if (op == Operator::NONE) {
values.push_back(std::move(value));
value.clear();
} else {
if (value.size() == 0) {
error("Expected string before assignment operator");
}
BXParser::Token BXParser::getToken(const BXVariables &vars) {
Token tok;
tok.line = line();
tok.ch = ch();

doAssignment();
prevOper = op;
key = std::move(value);
value.clear();
}
int ch = peek();

if (ch == EOF) {
tok.kind = TokenKind::E_O_F;
return tok;
}

doAssignment();
}
if (ch == '\t' || ch == ' ') {
tok.kind = TokenKind::INDENTATION;
do {
get();
ch = peek();
} while (ch == '\t' || ch == ' ');
return tok;
} else if (ch == '\n' || ch == '\r') {
tok.kind = TokenKind::NEWLINE;
do {
get();
ch = peek();
} while (ch == '\n' || ch == '\r');
return tok;
}

void BXParser::parseList(const BXVariables &vars, std::vector<std::string> &values) {
while (true) {
int ch2 = peek(2);
if (ch == ',') {
get();
tok.kind = TokenKind::COMMA;
} else if (ch == ':' && ch2 == '=') {
get(); get();
tok.kind = TokenKind::COLON_EQUALS;
} else if (ch == '+' && ch2 == '=') {
get(); get();
tok.kind = TokenKind::PLUS_EQUALS;
} else if (ch == '=' && ch2 == '+') {
get(); get();
tok.kind = TokenKind::EQUALS_PLUS;
} else if (ch == '|' && ch2 == '=') {
get(); get();
tok.kind = TokenKind::BAR_EQUALS;
} else if (ch == '$' && ch2 == '{') {
get(); get();
skipWhitespace();
std::string value;
if (!parseString(vars, value)) {
break;
tok.kind = TokenKind::EXPANSION;
tok.str = readString(vars);
skipWhitespace();
if (peek() != '}') {
error("Expected a '}' after a '${' expansion.");
}
get();
} else if (ch == '$') {
get();
tok.kind = TokenKind::EXPANSION;
tok.str = readString(vars);
} else {
tok.kind = TokenKind::STRING;
tok.str = readString(vars);
}

values.push_back(std::move(value));
while ((ch = peek()) == '\t' || ch == ' ') {
get();
}

return tok;
}

BXParser::Token BXParser::readToken(const BXVariables &vars) {
Token t = tok_;
tok_ = getToken(vars);
return t;
}

void BXWriter::escape(const std::string &str) {
@@ -419,3 +307,109 @@ void BXWriter::write(const BXVariables &vars) {
buf_.put('\n');
}
}

void BXParser::parse(BXVariables &vars, bool oneLine) {
readToken(vars);

while (true) {
if (peekToken().kind == TokenKind::E_O_F) {
break;
} else if (peekToken().kind != TokenKind::STRING) {
error("Expected string, got", peekToken().kind);
}

Token t = readToken(vars);
std::string key = t.str;
std::vector<std::string> &var = vars[key];

void (*addVal)(std::vector<std::string> &var, std::string val);
switch (peekToken().kind) {
case TokenKind::COLON_EQUALS:
var.clear();

// Fallthrough
case TokenKind::PLUS_EQUALS:
addVal = [](auto &var, auto val) {
var.push_back(std::move(val));
};
break;

case TokenKind::EQUALS_PLUS:
addVal = [](auto &var, auto val) {
var.insert(var.begin(), std::move(val));
};
break;

case TokenKind::BAR_EQUALS:
addVal = [](auto &var, auto val) {
for (auto &v: var) {
if (v == val) {
return;
}
}

var.push_back(val);
};
break;

default:
error("Expected operator, got", peekToken().kind);
}
readToken(vars);

parseList(vars, var, addVal, oneLine);
}
}

void BXParser::parseList(
BXVariables &vars, std::vector<std::string> &var,
void (*addVal)(std::vector<std::string> &var, std::string val),
bool oneLine) {
while (true) {
Token tok = peekToken();
switch (tok.kind) {
case TokenKind::NEWLINE:
if (oneLine) {
return;
}

readToken(vars);
if (peekToken().kind != TokenKind::INDENTATION) {
return;
}

readToken(vars); // Read indentation
break;

case TokenKind::STRING:
addVal(var, std::move(tok.str));
readToken(vars);
break;

case TokenKind::COMMA:
readToken(vars);
return;

case TokenKind::E_O_F:
return;

case TokenKind::EXPANSION:
for (auto &v: vars[tok.str]) {
addVal(var, v);
}
readToken(vars);
break;

default:
error("Unexpected token", tok.kind);
}
}
}

void BXParser::parseList(BXVariables &vars, std::vector<std::string> &var) {
auto addVal = [](auto &var, auto val) {
var.push_back(std::move(val));
};

parseList(vars, var, addVal, false);
}

+ 40
- 24
lib/BXParser.h View File

@@ -21,14 +21,29 @@ using BXVariables = std::unordered_map<std::string, std::vector<std::string>>;

class BXParser {
public:
static const int FLAG_NONE = 0;
static const int FLAG_ONE_LINE = 1 << 0;
enum class TokenKind {
E_O_F,
INDENTATION,
NEWLINE,
COMMA,
COLON_EQUALS,
PLUS_EQUALS,
EQUALS_PLUS,
BAR_EQUALS,
EXPANSION,
STRING,
NONE,
};

BXParser(bufio::IStream &stream, int flags = FLAG_NONE, int line = 1, int ch = 1):
flags_(flags), line_(line), ch_(ch), buf_(stream) {}
struct Token {
TokenKind kind;
std::string str;
int line;
int ch;
};

void parse(BXVariables &vars);
void parseList(const BXVariables &vars, std::vector<std::string> &values);
BXParser(bufio::IStream &stream, int line = 1, int ch = 1):
line_(line), ch_(ch), buf_(stream) {}

int get();
int peek(size_t count = 1) { return buf_.peek(count); }
@@ -38,31 +53,32 @@ public:
int line() const { return line_; }
int ch() const { return ch_; }

private:
enum class Operator {
COLON_EQUALS,
PLUS_EQUALS,
EQUALS_PLUS,
BAR_EQUALS,
NONE,
};
Token readToken(const BXVariables &vars);
Token &peekToken() { return tok_; }

void parse(BXVariables &vars, bool oneLine = false);
void parseLine(BXVariables &vars) { parse(vars, true); }
void parseList(BXVariables &vars, std::vector<std::string> &list);

private:
[[noreturn]] void error(std::string);
[[noreturn]] void error(std::string, TokenKind);

Operator readOperator();
void skipWhitespaceLine();
std::string readIdent(const BXVariables &vars);
void skipWhitespace();
Token getToken(const BXVariables &vars);
std::string readString(const BXVariables &vars);
std::string readQuotedString(const BXVariables &vars);
std::string readStringExpansion(const BXVariables &vars);
char readEscape();
void parseList(
BXVariables &vars, std::vector<std::string> &var,
void (*addVal)(std::vector<std::string> &var, std::string val),
bool oneLine);

char parseEscape();
void parseExpansion(const BXVariables &vars, std::vector<std::string> &values);
void parseQuotedExpansion(const BXVariables &vars, std::string &content);
void parseQuotedString(const BXVariables &vars, std::string &content);
bool parseString(const BXVariables &vars, std::string &content, int sep = -1);
bool parseIdentifier(std::string &content);

int flags_;
int line_;
int ch_;
Token tok_;

bufio::IBuf<> buf_;
};

+ 2
- 2
lib/CompileStep.cc View File

@@ -130,11 +130,11 @@ BXVariables &CompileStep::variables() {
}

bufio::IFStream f(path_);
BXParser parser(f, BXParser::FLAG_ONE_LINE);
BXParser parser(f);

while (parser.peek() != EOF) {
if (startsWith(parser, "//#bx")) {
parser.parse(variables_);
parser.parseLine(variables_);
} else {
while (parser.peek() != EOF && parser.get() != '\n');
}

Loading…
Cancel
Save