/* This (almost) completes that parsing phase for the very small language. This input { fact = 1; i = 1; n = 7; if n < 0 then print 999 else while i <= n do { fact = fact * i; i = i + 1 } fi; print fact } produces this output block sequence assignment, fact number, 1, 1 assignment, i number, 1, 1 assignment, n number, 7, 7 conditional expression, < name, n number, 0, 0 print number, 999, 999 loop expression, <= name, i name, n block sequence assignment, fact expression, * name, fact name, i assignment, i expression, + name, i number, 1, 1 print name, fact */ #include #include #include #include #include #include #include // this is not allowed in assignments or tests in ECE 318 #include using namespace std; jmp_buf escape; unordered_map variables; struct symbol { string form; string type; int value; bool hasvalue; symbol(string t, string f = "") { type = t; form = f; hasvalue = false; } symbol(string t, int v) { type = t; form = ""; value = v; hasvalue = true; } symbol(string t, string f, int v) { type = t; form = f; value = v; hasvalue = true; } string to_string() const { ostringstream os; os << "symbol[type = \"" << type << "\""; if (form != "") os << ", form = \"" << form << "\""; if (hasvalue) os << ", value = " << value; os << "]"; return os.str(); } }; ostream & operator<<(ostream & out, const symbol & sym) { out << sym.to_string(); return out; } struct inputsystem { string line; int pos, len, linenumber, numerrors; istream * file; bool dontread; static const char control_d = 'D' - 64; void clear() { line = ""; pos = 0; len = 0; numerrors = 0; linenumber = 0; dontread = false; file = & cin; } void open(istream & f) { file = & f; } void close() { if (file != & cin) ((ifstream *)file)->close(); } inputsystem(istream & f) { clear(); open(f); } inputsystem() { clear(); } char get() { while (true) { if (pos < 0) { pos += 1; return ' '; } else if (pos == len) { pos += 1; return ' '; } else if (pos > len) { getline(* file, line); if (file->fail()) return control_d; pos = 0; len = line.length(); linenumber += 1; continue; } char c = line[pos]; pos += 1; return c; } } void unget() { pos -= 1; } void unget(char c) { pos -= 1; if (pos >= 0) line[pos] = c; } void error(string message, string detail = "") { cout << "Line " << linenumber << " Error " << message << " " << detail << "\n"; cout << line << "\n"; numerrors += 1; // if (numerrors >= 10) // { cout << "Too many errors, giving up.\n"; // exit(1); } } longjmp(escape, 1); } symbol lexan() { static symbol sym("error"); if (dontread) { dontread = false; return sym; } char c = get(); while (c <= ' ' && c != control_d) c = get(); if (c >= '0' && c <= '9') { string form = ""; int value = 0; while (c >= '0' && c <= '9') { form += c; value = value * 10 + c - '0'; c = get(); } unget(); return sym = symbol("number", form, value); } else if (c == '{') return sym = symbol("punctuation", "{"); else if (c == '}') return sym = symbol("punctuation", "}"); else if (c == '(') return sym = symbol("punctuation", "("); else if (c == ')') return sym = symbol("punctuation", ")"); else if (c == ';') return sym = symbol("punctuation", ";"); else if (c == '=') { c = get(); if (c == '=') return sym = symbol("operator", "=="); unget(); return sym = symbol("operator", "="); } else if (c == '+') return sym = symbol("operator", "+"); else if (c == '-') return sym = symbol("operator", "-"); else if (c == '*') return sym = symbol("operator", "*"); else if (c == '/') return sym = symbol("operator", "/"); else if (c == '%') return sym = symbol("operator", "%"); else if (c == '^') return sym = symbol("operator", "^"); else if (c == '>') { c = get(); if (c == '=') return sym = symbol("operator", ">="); unget(); return sym = symbol("operator", ">"); } else if (c == '<') { c = get(); if (c == '=') return sym = symbol("operator", "<="); unget(); return sym = symbol("operator", "<"); } else if (c == '!') { c = get(); if (c == '=') return sym = symbol("operator", "!="); error("lone !"); return sym = symbol("error"); } else if (c == '&') { c = get(); if (c == '&') return sym = symbol("operator", "&&"); error("lone &\n"); return sym = symbol("error"); } else if (c == '|') { c = get(); if (c == '|') return sym = symbol("operator", "||"); error("lone |\n"); return sym = symbol("error"); } else if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z') { string form = ""; while (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9') { form += c; c = get(); } unget(); if (form == "while" || form == "do" || form == "if" || form == "then" || form == "else" || form == "fi" || form == "print") return sym = symbol("reserved_word", form); else if (form == "and") return sym = symbol("operator", "&&"); else if (form == "or") return sym = symbol("operator", "||"); else if (form == "not") return sym = symbol("operator", form); else return sym = symbol("name", form); } else if (c == control_d) return sym = symbol("end_of_file", "eof"); else { string msg = "unrecognised character: "; if (c >= ' ' && c <= '~') msg = msg + "'" + c + "'"; else msg = msg + "ASCII[" + to_string((int)c) + "]"; error(msg); } return sym = symbol("error"); } void unlexan() { dontread = true; } void test() { while (true) { symbol sy = lexan(); cout << sy << "\n"; if (sy.type == "end_of_file") break; } } }; struct node { string type, strval; int intval; vector subtrees; node(string t, string v = "", int i = -1) { type = t; strval = v; intval = i; } node(string t, int i) { type = t; strval = ""; intval = i; } void add(node * c) { subtrees.push_back(c); } }; void print(node * n, int depth = 0) { cout << setw(depth * 3) << "" << n->type; if (n->strval != "") cout << ", " << n->strval; if (n->intval != -1) cout << ", " << n->intval; cout << "\n"; for (int i = 0; i < n->subtrees.size(); i += 1) print(n->subtrees[i], depth + 1); } node * p_expression(inputsystem & in); node * p_statement(inputsystem & in); node * p_value(inputsystem & in) { symbol s = in.lexan(); if (s.type == "number") return new node("number", s.form, s.value); else if (s.type == "name") return new node("name", s.form); else if (s.form == "(") { node * r = p_expression(in); s = in.lexan(); if (s.form != ")") { in.error("parentheses not balanced"); return NULL; } return r; } else { in.error("unexpected symbol in value", s.form); return NULL; } } node * p_expression(inputsystem & in) { node * L = p_value(in); while (true) { symbol s = in.lexan(); if (s.type != "operator") { in.unlexan(); break; } node * R = p_value(in); node * n = new node("expression", s.form); n->add(L); n->add(R); L = n; } return L; } node * p_output(inputsystem & in) { symbol s = in.lexan(); if (s.form != "print") { in.error("print was expected, got", s.form); return NULL; } node * t = p_expression(in); node * n = new node("print"); n->add(t); return n; } node * p_sequence(inputsystem & in) { node * r = new node("sequence"); while (true) { node * n = p_statement(in); r->add(n); symbol s = in.lexan(); if (s.form != ";") { in.unlexan(); break; } } return r; } node * p_block(inputsystem & in) { symbol s = in.lexan(); if (s.form != "{") { in.error("{ was expected, got", s.form); return NULL; } node * n = new node("block"); node * t = p_sequence(in); n->add(t); s = in.lexan(); if (s.form != "}") { in.error("error in statement, } was expected got", s.form); return NULL; } return n; } node * p_assignment(inputsystem & in) { symbol s = in.lexan(); if (s.type != "name") { in.error("variable for assignment expected, got", s.form); return NULL; } node * n = new node("assignment", s.form); s = in.lexan(); if (s.form != "=") { in.error("= after variable for assignment expected, got", s.form); return NULL; } n->add(p_expression(in)); return n; } node * p_loop(inputsystem & in) { symbol s = in.lexan(); if (s.form != "while") { in.error("while was expected, got", s.form); return NULL; } node * n = new node("loop"); n->add(p_expression(in)); s = in.lexan(); if (s.form != "do") { in.error("do was expected after while expression, got", s.form); return NULL; } n->add(p_statement(in)); return n; } node * p_conditional(inputsystem & in) { symbol s = in.lexan(); if (s.form != "if") { in.error("if was expected, got", s.form); return NULL; } node * n = new node("conditional"); n->add(p_expression(in)); s = in.lexan(); if (s.form != "then") { in.error("then was expected after if expression, got", s.form); return NULL; } n->add(p_statement(in)); s = in.lexan(); if (s.form == "else") { n->add(p_statement(in)); s = in.lexan(); } if (s.form != "fi") { in.error("fi was exected at end of if, got", s.form); return NULL; } return n; } node * p_statement(inputsystem & in) { symbol s = in.lexan(); if (s.type == "end_of_file") return new node("eof"); in.unlexan(); if (s.form == "print") return p_output(in); else if (s.form == "while") return p_loop(in); else if (s.form == "if") return p_conditional(in); else if (s.form == "{") return p_block(in); else if (s.type == "name" || s.type == "number" || s.form == "(") return p_assignment(in); else { in.error("start of statement expected, got", s.form); return NULL; } } int main(int argc, char * argv[]) { ifstream fi; inputsystem in; if (argc > 1) { fi.open(argv[1]); if (fi.fail()) { cout << "Can't open \"" << argv[1] << "\"\n"; exit(1); } in.open(fi); } while (true) { node * t = NULL; if (setjmp(escape) == 0) { t = p_statement(in); if (t->type == "eof") break; if (t == NULL) continue; symbol s = in.lexan(); if (s.type != "end_of_file") in.error("expected end of file but got ", s.form); print(t); } else cout << "error detected while parsing\n"; } in.close(); }