Build tool
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

BXParser.cc 7.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. #include "BXParser.h"
  2. #include <stdlib.h>
  3. #include <stdio.h>
  4. #include <string.h>
  5. #include <errno.h>
  6. int BXParser::get() {
  7. if (bufidx_ < buflen_) {
  8. return buf_[bufidx_++];
  9. }
  10. bufidx_ = 0;
  11. stream_.read(buf_, sizeof(buf_));
  12. buflen_ = stream_.gcount();
  13. if (buflen_ == 0) {
  14. return EOF;
  15. }
  16. return buf_[bufidx_++];
  17. }
  18. int BXParser::peek() {
  19. if (bufidx_ < buflen_) {
  20. return buf_[bufidx_];
  21. } else {
  22. return stream_.peek();
  23. }
  24. }
  25. int BXParser::peek2() {
  26. if (bufidx_ + 1 < buflen_) {
  27. return buf_[bufidx_ + 1];
  28. } else {
  29. stream_.get();
  30. int ch = stream_.peek();
  31. stream_.unget();
  32. return ch;
  33. }
  34. }
  35. BXParser::Operator BXParser::readOperator() {
  36. int ch2 = peek2();
  37. if (peek() == ':' && ch2 == '=') {
  38. skip(); // ':'
  39. skip(); // '='
  40. return Operator::COLON_EQUALS;
  41. } else if (peek() == '+' && ch2 == '=') {
  42. skip(); // '+'
  43. skip(); // '='
  44. return Operator::PLUS_EQUALS;
  45. } else if (peek() == '=' && ch2 == '+') {
  46. skip(); // '='
  47. skip(); // '+'
  48. return Operator::EQUALS_PLUS;
  49. }
  50. return Operator::NONE;
  51. }
  52. void BXParser::skip(char expected) {
  53. int ch = get();
  54. if (ch == EOF) {
  55. error(std::string("Expected '") + expected + "', got EOF");
  56. } else if (ch != expected) {
  57. error(std::string("Expected '") + expected + "', got '" + (char)ch + "'");
  58. }
  59. }
  60. [[noreturn]] void BXParser::error(std::string msg) {
  61. throw BXParseError(std::to_string(line_) + ":" + std::to_string(ch_) + ": " + msg);
  62. }
  63. static bool isWhitespace(int ch) {
  64. if (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n')
  65. return true;
  66. return false;
  67. }
  68. void BXParser::skipWhitespace() {
  69. if (flags_ & FLAG_ONE_LINE) {
  70. int ch;
  71. while (isWhitespace(ch = peek()) && ch != '\r' && ch != '\n')
  72. get();
  73. } else {
  74. while (isWhitespace(peek()))
  75. get();
  76. }
  77. }
  78. char BXParser::parseEscape() {
  79. skip(); // '\'
  80. int ch;
  81. switch (ch = get()) {
  82. case EOF:
  83. error("Unexpected EOF");
  84. case 'n':
  85. return '\n';
  86. case 'r':
  87. return '\r';
  88. case 't':
  89. return '\t';
  90. default:
  91. return (char)ch;
  92. }
  93. }
  94. static void appendVariableToString(
  95. const BXVariables &vars, std::string &name,
  96. std::string &value) {
  97. if (name.size() == 0)
  98. return;
  99. auto it = vars.find(name);
  100. if (it == vars.end())
  101. return;
  102. auto &vec = it->second;
  103. bool first = true;
  104. for (auto &part: vec) {
  105. if (!first) {
  106. value += ' ';
  107. }
  108. first = false;
  109. value += part;
  110. }
  111. }
  112. static void appendVariableToArray(
  113. const BXVariables &vars, const std::string &name,
  114. std::vector<std::string> &values) {
  115. if (name.size() == 0)
  116. return;
  117. auto it = vars.find(name);
  118. if (it == vars.end())
  119. return;
  120. auto &vec = it->second;
  121. for (auto &part: vec) {
  122. values.push_back(part);
  123. }
  124. }
  125. void BXParser::parseExpansion(const BXVariables &vars, std::vector<std::string> &values) {
  126. skip(); // '$'
  127. std::string str;
  128. switch (peek()) {
  129. case '{':
  130. skip();
  131. parseString(vars, str, '}');
  132. skip('}');
  133. appendVariableToArray(vars, str, values);
  134. break;
  135. default:
  136. if (!parseIdentifier(str)) {
  137. error("No identifier after $.");
  138. }
  139. appendVariableToArray(vars, str, values);
  140. break;
  141. }
  142. }
  143. void BXParser::parseQuotedExpansion(const BXVariables &vars, std::string &content) {
  144. skip(); // '$'
  145. std::string str;
  146. switch (peek()) {
  147. case '{':
  148. skip();
  149. parseString(vars, str, '}');
  150. skip('}');
  151. appendVariableToString(vars, str, content);
  152. break;
  153. default:
  154. if (!parseIdentifier(str)) {
  155. error("No identifier after $.");
  156. }
  157. appendVariableToString(vars, str, content);
  158. break;
  159. }
  160. }
  161. void BXParser::parseQuotedString(const BXVariables &vars, std::string &content) {
  162. skip(); // '"'
  163. int ch;
  164. while ((ch = peek()) != EOF) {
  165. switch (ch) {
  166. case EOF:
  167. error("Unexpected EOF");
  168. case '\\':
  169. content.push_back(parseEscape());
  170. break;
  171. case '$':
  172. parseQuotedExpansion(vars, content);
  173. break;
  174. case '"':
  175. skip();
  176. return;
  177. default:
  178. content.push_back(get());
  179. break;
  180. }
  181. }
  182. }
  183. bool BXParser::parseString(const BXVariables &vars, std::string &content, int sep) {
  184. bool success = false;
  185. int ch;
  186. while (1) {
  187. ch = peek();
  188. if ((sep > 0 && ch == sep) || isWhitespace(ch)) {
  189. return success;
  190. }
  191. switch (ch) {
  192. case EOF:
  193. return success;
  194. case '\\':
  195. content.push_back(parseEscape());
  196. success = true;
  197. break;
  198. case '$':
  199. parseQuotedExpansion(vars, content);
  200. success = true;
  201. break;
  202. case '"':
  203. parseQuotedString(vars, content);
  204. success = true;
  205. break;
  206. default:
  207. if (ch == ':' && peek2() == '=')
  208. return success;
  209. content.push_back(get());
  210. success = true;
  211. break;
  212. }
  213. }
  214. }
  215. bool BXParser::parseIdentifier(std::string &content) {
  216. int ch = peek();
  217. if (!(
  218. (ch >= 'a' && ch <= 'z') ||
  219. (ch >= 'A' && ch <= 'Z') ||
  220. (ch == '_'))) {
  221. return false;
  222. }
  223. content += get();
  224. while (1) {
  225. ch = peek();
  226. if (!(
  227. (ch >= '0' && ch <= '9') ||
  228. (ch >= 'a' && ch <= 'z') ||
  229. (ch >= 'A' && ch <= 'Z') ||
  230. (ch == '_'))) {
  231. return true;
  232. }
  233. content += get();
  234. }
  235. }
  236. void BXParser::parse(BXVariables &vars) {
  237. std::string key, value;
  238. std::vector<std::string> values;
  239. skipWhitespace();
  240. if (!parseString(vars, key)) {
  241. return;
  242. }
  243. skipWhitespace();
  244. Operator prevOper = readOperator();
  245. if (prevOper == Operator::NONE) {
  246. error("Expected operator.");
  247. }
  248. auto doAssignment = [&] {
  249. switch (prevOper) {
  250. case Operator::COLON_EQUALS:
  251. vars[key] = std::move(values);
  252. values.clear();
  253. break;
  254. case Operator::PLUS_EQUALS:
  255. {
  256. auto &vec = vars[key];
  257. vec.reserve(vec.size() + values.size());
  258. for (size_t i = 0; i < values.size(); ++i) {
  259. vec.push_back(std::move(values[i]));
  260. }
  261. }
  262. values.clear();
  263. break;
  264. case Operator::EQUALS_PLUS:
  265. {
  266. auto &vec = vars[key];
  267. vec.reserve(vec.size() + values.size());
  268. for (size_t i = 0; i < vec.size(); ++i) {
  269. values.push_back(std::move(vec[i]));
  270. }
  271. vec = std::move(values);
  272. }
  273. values.clear();
  274. break;
  275. case Operator::NONE:
  276. break;
  277. }
  278. };
  279. while (true) {
  280. skipWhitespace();
  281. // Parse next value
  282. if (peek() == '$') {
  283. parseExpansion(vars, values);
  284. value.clear();
  285. continue; // We can't have an assignment after an expansion
  286. } else if (!parseString(vars, value)) {
  287. break;
  288. }
  289. skipWhitespace();
  290. // If there's an operator next, the value we just read was a actually a key.
  291. // Otherwise, it was just another value.
  292. Operator op = readOperator();
  293. if (op == Operator::NONE) {
  294. values.push_back(std::move(value));
  295. value.clear();
  296. } else {
  297. if (value.size() == 0) {
  298. error("Expected string before assignment operator");
  299. }
  300. doAssignment();
  301. prevOper = op;
  302. key = std::move(value);
  303. value.clear();
  304. }
  305. }
  306. doAssignment();
  307. }
  308. void BXParser::parseList(const BXVariables &vars, std::vector<std::string> &values) {
  309. while (true) {
  310. skipWhitespace();
  311. std::string value;
  312. if (!parseString(vars, value)) {
  313. break;
  314. }
  315. values.push_back(std::move(value));
  316. }
  317. }
  318. BXWriter::~BXWriter() {
  319. if (bufidx_ > 0) {
  320. stream_.write(buf_, bufidx_);
  321. }
  322. }
  323. void BXWriter::put(char ch) {
  324. buf_[bufidx_++] = ch;
  325. if (bufidx_ == sizeof(buf_)) {
  326. stream_.write(buf_, sizeof(buf_));
  327. bufidx_ = 0;
  328. }
  329. }
  330. void BXWriter::put(const std::string &str) {
  331. size_t w = sizeof(buf_) - bufidx_;
  332. if (w > str.size()) {
  333. w = str.size();
  334. }
  335. memcpy(buf_ + bufidx_, str.c_str(), w);
  336. if (str.size() - w > 0) {
  337. stream_.write(buf_, bufidx_ + w);
  338. stream_.write(str.c_str() + w, str.size() - w);
  339. bufidx_ = 0;
  340. } else {
  341. bufidx_ += w;
  342. }
  343. }
  344. void BXWriter::escape(const std::string &str) {
  345. put('"');
  346. for (char ch: str) {
  347. if (ch == '$' || ch == '"' || ch == '\\') {
  348. put('\\');
  349. }
  350. put(ch);
  351. }
  352. put('"');
  353. }
  354. void BXWriter::write(const BXVariables &vars) {
  355. for (const auto &pair: vars) {
  356. size_t chars = 0;
  357. put(pair.first);
  358. put(" :=");
  359. for (auto &val: pair.second) {
  360. if (chars >= 80) {
  361. put('\n');
  362. put('\t');
  363. chars = 0;
  364. } else {
  365. put(' ');
  366. }
  367. escape(val);
  368. chars += val.size();
  369. }
  370. put('\n');
  371. }
  372. }