#include #include #include #include #include #include #include "hashtable.h" using namespace std; bool debug = false; int combine(int a, int b) { return (a << 16) + b; } int first(int a) { return a >> 16; } int second(int a) { return a & 0xFFFF; } void split(int a, int & b, int & c) { b = a >> 16; c = a & 0xFFFF; } string str(int n) { string r; if (n > 9) r = str(n / 10); return r + (char)(n % 10 + '0'); } const int ty_reswd = 1, ty_int = 2, ty_symbol = 3, ty_name = 4, ty_string = 5, ty_eof = 6, ty_bad = 7; string type_names[] = { "", "reswd", "int", "symbol", "name", "string", "eof", "bad" }; const int num_types = sizeof(type_names) / sizeof(type_names[0]); const int rw_if = 1, rw_then = 2, rw_else = 3, rw_while = 4, rw_do = 5, rw_read = 6, rw_print = 7, rw_and = 8, rw_or = 9, rw_not = 10; string reswd_names[] = { "", "if", "then", "else", "while", "do", "read", "print", "and", "or", "not" }; const int num_reswds = sizeof(reswd_names) / sizeof(reswd_names[0]); const int sy_open_curly = 1, sy_close_curly = 2, sy_open_paren = 3, sy_close_paren = 4, sy_semicolon = 5, sy_plus = 6, sy_minus = 7, sy_times = 8, sy_divide = 9, sy_modulo = 10, sy_less_equal = 11, sy_not_equal = 12, sy_less = 13, sy_greater_equal = 14, sy_greater = 15, sy_equal = 16, sy_assign = 17, sy_comma = 18; string symbols[] = { "", "open_curly", "close_curly", "open_paren", "close_paren", "semicolon", "plus", "minus", "times", "divide", "modulo", "less_equal", "not_equal", "less", "greater_equal", "greater", "equal", "assign", "comma" }; const int num_symbols = sizeof(symbols) / sizeof(symbols[0]); struct lexeme { int type; string form; int value; lexeme * myself; static int number_created; lexeme(int t, string f, int v) { type = t; form = f; value = v; myself = NULL; } lexeme(int t, string f = "") { type = t; form = f; number_created += 1; value = number_created; myself = NULL; } string key() { return form; } void print(ostream & os) { os << "lexeme(type = "; if (type > 0 && type < num_types) os << type_names[type]; else os << "bad(" << type << ")"; os << ", form = \"" << form << "\""; if (type == ty_reswd) if (value > 0 && value < num_reswds) os << ", value = " << reswd_names[value]; else os << ", value = bad(" << value << ")"; else if (type == ty_int) os << ", value = " << value; else if (type == ty_symbol) if (value > 0 && value < num_symbols) os << ", value = " << symbols[value]; else os << ", value = bad(" << value << ")"; if (type == ty_name) os << ", id = " << value; os << ")"; } }; int lexeme::number_created = 0; ostream & operator<<(ostream & os, lexeme & lex) { lex.print(os); return os; } const int nt_error = 0, nt_number = 1, nt_name = 2, nt_string = 3, nt_read = 4, nt_simple = 5, nt_expression = 6, nt_expr_list = 7, nt_print_stmt = 8, nt_assign_stmt = 9, nt_while_stmt = 10, nt_if_stmt = 11, nt_stmt_list = 12, nt_compound = 13, nt_lexeme = 14; string node_names[] = { "error", "number", "name", "string", "read", "simple", "expression", "expr-list", "print-stmt", "assign-stmt", "while-stmt", "if-stmt", "stmt-list", "compound", "lexeme" }; const int num_node_names = sizeof(node_names) / sizeof(node_names[0]); struct basic_node { int type; int int_val; string string_val; basic_node(int t, int v = 0, string s = "") { type = t; int_val = v; string_val = s; } string safe_type() { if (type >= 0 && type < num_node_names) return node_names[type]; else return "bad(" + str(type) + ")"; } string safe_value() { if (type == nt_simple || type == nt_expression) { int t = first(int_val), d = second(int_val); if (t == ty_reswd) return reswd_names[d]; else if (t == ty_symbol) return symbols[d]; else return "(" + str(t) + ", " + str(d) + ")"; } else if (type == nt_number) return str(int_val); return ""; } virtual void print(ostream & os, int indent = 0) = 0; virtual ~basic_node() { } }; struct node: public basic_node { vector parts; node(int t): basic_node(t) { } node(int t, int v): basic_node(t, v) { } node(int t, string s): basic_node(t, 0, s) { } node(int t, int v, string s): basic_node(t, v, s) { } node(int t, string s, int v): basic_node(t, v, s) { } virtual ~node() { for (int i = 0; i < parts.size(); i += 1) delete parts[i]; } virtual void add(basic_node * n) { parts.push_back(n); } virtual void print(ostream & os, int indent = 0) { os << setw(4 * indent) << "" << safe_type() << " " << safe_value(); if (string_val != "") os << " \"" << string_val << "\""; cout << "\n"; for (int i = 0; i < parts.size(); i += 1) parts[i]->print(os, indent + 1); } }; struct lexeme_node: public basic_node { lexeme * lex; lexeme_node(lexeme * l): basic_node(nt_lexeme) { lex = l; } virtual void print(ostream & os, int indent = 0) { os << setw(4 * indent) << "" << * lex << "\n"; } }; string str(basic_node * bn) { if (bn == NULL) return "NULL"; node * n = dynamic_cast(bn); if (n != NULL) { string r = n->safe_type(); string t = n->safe_value(); if (t != "") r = r + "/" + t; if (n->string_val != "") r = r + "/\"" + n->string_val + "\""; r = r + "("; int np = n->parts.size(); if (np > 0) r = r + n->parts[0]->safe_type(); for (int i = 1; i < np; i += 1) r = r + ", " + n->parts[i]->safe_type(); r = r + ")"; return r; } lexeme_node * ln = dynamic_cast(bn); if (ln != NULL) return ln->safe_type() + "/" + ln->lex->myself->form + "/" + str(ln->lex->myself->number_created); return "bad"; } class interpreter { protected: hashtable ht; istream & in; bool normal; void unread() { normal = false; } lexeme next_symbol(string ind) { static lexeme L(ty_bad, "???", -1); if (! normal) { normal = true; if (debug) cout << ind << "lex. an. reusing " << L << "\n"; return L; } char c; while (true) { c = in.get(); if (in.fail() || c > ' ') break; } if (in.fail()) L = lexeme(ty_eof); else if (c == '{') L = lexeme(ty_symbol, "{", sy_open_curly); else if (c == '}') L = lexeme(ty_symbol, "}", sy_close_curly); else if (c == '(') L = lexeme(ty_symbol, "(", sy_open_paren); else if (c == ')') L = lexeme(ty_symbol, ")", sy_close_paren); else if (c == ',') L = lexeme(ty_symbol, ",", sy_comma); else if (c == ';') L = lexeme(ty_symbol, ";", sy_semicolon); else if (c == '+') L = lexeme(ty_symbol, "+", sy_plus); else if (c == '-') L = lexeme(ty_symbol, "-", sy_minus); else if (c == '*') L = lexeme(ty_symbol, "*", sy_times); else if (c == '/') L = lexeme(ty_symbol, "/", sy_divide); else if (c == '%') L = lexeme(ty_symbol, "%", sy_modulo); else if (c == '<') { c = in.peek(); if (c == '=') { in.get(); L = lexeme(ty_symbol, "<=", sy_less_equal); } else if (c == '>') { in.get(); L = lexeme(ty_symbol, "<>", sy_not_equal); } else L = lexeme(ty_symbol, "<", sy_less); } else if (c == '>') { c = in.peek(); if (c == '=') { in.get(); L = lexeme(ty_symbol, ">=", sy_greater_equal); } else L = lexeme(ty_symbol, ">", sy_greater); } else if (c == '=') { c = in.peek(); if (c == '=') { in.get(); L = lexeme(ty_symbol, "==", sy_equal); } else L = lexeme(ty_bad, string("?") + c); } else if (c == ':') { c = in.peek(); if (c == '=') { in.get(); L = lexeme(ty_symbol, ":=", sy_assign); } else L = lexeme(ty_bad, string(":") + c); } else if (c == '"') { string s = ""; bool bad = false; while (true) { c = in.get(); if (in.fail()) { s += c; bad = true; break; } if (c == '"') break; s += c; } if (bad) L = lexeme(ty_bad, s); else L = lexeme(ty_string, s); } else if (c >= '0' && c <= '9') { string s = ""; int val = 0; while (true) { s += c; val = val * 10 + c - '0'; c = in.peek(); if (c < '0' || c > '9') break; in.get(); } L = lexeme(ty_int, s, val); } else if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z') { string s = ""; while (true) { if (c >= 'A' && c <= 'Z') s += (char)(c -'A' + 'a'); else s += c; c = in.peek(); if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '_') in.get(); else break; } lexeme * r = ht.lookup(s); if (r == NULL) { r = new lexeme(ty_name, s); r->myself = r; ht.add(r); } L = * r; } else L = lexeme(ty_bad, string("") + c); if (debug) cout << ind << "lex. an. returning " << L << "\n"; return L; } public: interpreter(istream & i): in(i) { ht.add(new lexeme(ty_reswd, "if", rw_if)); ht.add(new lexeme(ty_reswd, "then", rw_then)); ht.add(new lexeme(ty_reswd, "else", rw_else)); ht.add(new lexeme(ty_reswd, "while", rw_while)); ht.add(new lexeme(ty_reswd, "do", rw_do)); ht.add(new lexeme(ty_reswd, "read", rw_read)); ht.add(new lexeme(ty_reswd, "print", rw_print)); ht.add(new lexeme(ty_reswd, "and", rw_and)); ht.add(new lexeme(ty_reswd, "or", rw_or)); ht.add(new lexeme(ty_reswd, "not", rw_not)); normal = true; } // ::= number | name | string | "read" | "(" ")" basic_node * parse_value(string ind) { if (debug) cout << ind << "parse_value called\n"; lexeme L = next_symbol(ind + " "); basic_node * n = NULL; if (L.type == ty_int) n = new node(nt_number, L.value); else if (L.type == ty_name) n = new lexeme_node(L.myself); else if (L.type == ty_string) n = new node(nt_string, L.form); else if (L.type == ty_reswd && L.value == rw_read) n = new node(nt_read); else if (L.type == ty_symbol && L.value == sy_open_paren) { n = parse_expression(ind + " "); L = next_symbol(ind + " "); if (L.type != ty_symbol || L.value != sy_close_paren) { cerr << "error in expression, " << L << " seen when ) was expected\n"; exit(1); } } else { cerr << "value expected but " << L << " appeared\n"; exit(1); } if (debug) cout << ind << "+ parse_value returning " << str(n) << "\n"; return n; } // ::= | { "-" | "not" } basic_node * parse_simple(string ind) { if (debug) cout << ind << "parse_simple called\n"; lexeme L = next_symbol(ind + " "); if (L.type == ty_symbol && L.value == sy_minus || L.type == ty_reswd && L.value == rw_not) { int k = combine(L.type, L.value); node * n = new node(nt_simple, k, L.form); basic_node * simp = parse_simple(ind + " "); n->add(simp); if (debug) cout << ind << "+ parse_simple returning " << str(n) << "\n"; return n; } else { unread(); basic_node * n = parse_value(ind + " "); if (debug) cout << ind << "+ parse_simple returning " << str(n) << "\n"; return n; } } // ::= ( operator ) * basic_node * parse_expression(string ind) { if (debug) cout << ind << "parse_expression called\n"; basic_node * left = parse_simple(ind + "| "); lexeme L = next_symbol(ind + "| "); while (L.type == ty_symbol && (L.value == sy_plus || L.value == sy_minus || L.value == sy_times || L.value == sy_divide || L.value == sy_modulo || L.value == sy_less_equal || L.value == sy_not_equal || L.value == sy_less || L.value == sy_greater_equal || L.value == sy_greater || L.value == sy_equal) || L.type == ty_reswd && (L.value == rw_and || L.value == rw_or)) { basic_node * right = parse_simple(ind + "| "); int k = combine(L.type, L.value); node * n = new node(nt_expression, k, L.form); n->add(left); n->add(right); left = n; L = next_symbol(ind + "| "); } unread(); if (debug) cout << ind << "+ parse_expression returning " << str(left) << "\n"; return left; } // ::= { "," } basic_node * parse_expr_list(string ind) { if (debug) cout << ind << "parse_expr_list called\n"; basic_node * left = parse_expression(ind + " "); lexeme L = next_symbol(ind + " "); if (L.type == ty_symbol && L.value == sy_comma) { basic_node * right = parse_expr_list(ind + " "); node * n = new node(nt_expr_list); n->add(left); n->add(right); if (debug) cout << ind << "+ parse_expr_list returning " << str(n) << "\n"; return n; } else { unread(); if (debug) cout << ind << "parse_expr_list returning " << str(left) << "\n"; return left; } } // ::= "print" basic_node * parse_print_stmt(string ind) { if (debug) cout << ind << "parse_print_stmt called\n"; lexeme L = next_symbol(ind + "| "); if (L.type == ty_reswd && L.value == rw_print) { basic_node * el = parse_expr_list(ind + "| "); node * n = new node(nt_print_stmt); n->add(el); if (debug) cout << ind << "parse_print_stmt returning " << str(n) << "\n"; return n; } else { cerr << "print statement expected but " << L << " seen\n"; exit(1); } } // ::= "while" "do" basic_node * parse_while_stmt(string ind) { if (debug) cout << ind << "parse_while_stmt called\n"; lexeme L = next_symbol(ind + "| "); if (L.type == ty_reswd && L.value == rw_while) { basic_node * e = parse_expression(ind + "| "); L = next_symbol(ind + "| "); if (L.type != ty_reswd || L.value != rw_do) { cerr << "do expected\n"; exit(1); } basic_node * s = parse_statement(ind + "| "); node * n = new node(nt_while_stmt); n->add(e); n->add(s); if (debug) cout << ind << "+ parse_while_stmt returning " << str(n) << "\n"; return n; } else { cerr << "while expected\n"; exit(1); } } // ::= | | | | basic_node * parse_statement(string ind) { if (debug) cout << ind << "parse_statement called\n"; lexeme L = next_symbol(ind + " "); unread(); basic_node * n = NULL; if (L.type == ty_reswd && L.value == rw_print) n = parse_print_stmt(ind + " "); else if (L.type == ty_reswd && L.value == rw_while) n = parse_while_stmt(ind + " "); else { cerr << "bad statement\n"; exit(1); } if (debug) cout << ind << "+ parse_statement returning " << str(n) << "\n"; return n; } // ::= ( ";" ) * basic_node * parse_stmt_list(string ind) { if (debug) cout << ind << "parse_stmt_list called\n"; node * n = new node(nt_stmt_list); while (true) { basic_node * s = parse_statement(ind + " "); n->add(s); lexeme L = next_symbol(ind + " "); if (L.type != ty_symbol || L.value != sy_semicolon) { unread(); break; } } if (debug) cout << ind << "+ parse_stmt_list returning " << str(n) << "\n"; return n; } // ::= "{" "}" basic_node * parse_compound(string ind) { if (debug) cout << ind << "parse_compound called\n"; lexeme L = next_symbol(ind + "| "); if (L.type != ty_symbol || L.value != sy_open_curly) { cerr << "{ expected\n"; exit(1); } basic_node * n = parse_stmt_list(ind + "| "); L = next_symbol(ind + "| "); if (L.type != ty_symbol || L.value != sy_close_curly) { cerr << "} expected\n"; exit(1); } if (debug) cout << ind << "+ parse_compound returning " << str(n) << "\n"; return n; } bool ended() { if (debug) cout << "ended called\n"; lexeme L = next_symbol(" "); unread(); if (L.type == ty_eof) { if (debug) cout << "ended returning true\n"; return true; } else { if (debug) cout << "ended returning false\n"; return false; } } }; int main(int argc, char * argv[]) { if (argc == 2 && string(argv[1]) == "debug") debug = true; else if (argc != 1) { cerr << "usage: " << argv[0] << " [debug]\n"; exit(0); } interpreter I(cin); while (true) { cout << "> "; if (I.ended()) { cout << "\n"; break; } basic_node * t = I.parse_compound(" "); t->print(cout, 0); delete t; } }