/* First version of the syntax definition: lexical items: number = any sequence of one or more decimal digits name = any sequence of letters, digits, or underline, first must be a letter, not case sensitive string = any sequence of characters except " or newline inside a pair of "s spaces, newlines, etc allowed between any two lexemes reserved words not case sensitive operator = + - * / % < <= <> > >= == and or grammar: ::= number | name | string | "read" | "(" ")" ::= | { "-" | "not" } ::= ( operator ) * ::= { "," } ::= "print" ::= name ":=" ::= "while" "do" ::= "if" "then" { "else" } ::= ( ";" ) * ::= "{" "}" ::= | | | | ::= */ #include #include #include #include #include #include #include "hashtable.h" using namespace std; int combine(int a, int b) { return (a << 16) + b; } int first(int a) { return a >> 16; } int second(int a) { return a & 0xFFFF; } void split(int a, int & b, int & c) { b = a >> 16; c = a & 0xFFFF; } const int ty_reswd = 1, ty_int = 2, ty_symbol = 3, ty_name = 4, ty_string = 5, ty_eof = 6, ty_bad = 7; string type_names[] = { "", "reswd", "int", "symbol", "name", "string", "eof", "bad" }; const int num_types = sizeof(type_names) / sizeof(type_names[0]); const int rw_if = 1, rw_then = 2, rw_else = 3, rw_while = 4, rw_do = 5, rw_read = 6, rw_print = 7, rw_and = 8, rw_or = 9, rw_not = 10; string reswd_names[] = { "if", "then", "else", "while", "do", "read", "print", "and", "or", "not" }; const int num_reswds = sizeof(reswd_names) / sizeof(reswd_names[0]); const int sy_open_curly = 1, sy_close_curly = 2, sy_open_paren = 3, sy_close_paren = 4, sy_semicolon = 5, sy_plus = 6, sy_minus = 7, sy_times = 8, sy_divide = 9, sy_modulo = 10, sy_less_equal = 11, sy_not_equal = 12, sy_less = 13, sy_greater_equal = 14, sy_greater = 15, sy_equal = 16, sy_assign = 17, sy_comma = 18; string symbols[] = { "", "open_curly", "close_curly", "open_paren", "close_paren", "semicolon", "plus", "minus", "times", "divide", "modulo", "less_equal", "not_equal", "less", "greater_equal", "greater", "equal", "assign", "comma" }; const int num_symbols = sizeof(symbols) / sizeof(symbols[0]); struct lexeme { int type; string form; int value; lexeme(int t, const string & f = "", int v = 0) { type = t; form = f; value = v; } string key() { return form; } void print(ostream & os) const { os << "lexeme(type = "; if (type > 0 && type < num_types) os << type_names[type]; else os << "bad(" << type << ")"; os << ", form = \"" << form << "\""; if (type == ty_reswd) if (value > 0 && value < num_reswds) os << ", value = " << reswd_names[value]; else os << ", value = bad(" << value << ")"; else if (type == ty_int) os << ", value = " << value; else if (type == ty_symbol) if (value > 0 && value < num_symbols) os << ", value = " << symbols[value]; else os << ", value = bad(" << value << ")"; os << ")"; } }; ostream & operator<<(ostream & os, const lexeme & lex) { lex.print(os); return os; } const int nt_error = 0, nt_number = 1, nt_name = 2, nt_string = 3, nt_read = 4, nt_simple = 5, nt_expression = 6, nt_expr_list = 7, nt_print_stmt = 8, nt_assign_stmt = 9, nt_while_stmt = 10, nt_if_stmt = 11, nt_stmt_list = 12, nt_compound = 13; string node_names[] = { "error", "number", "name", "string", "read", "simple", "expression", "expr-list", "print-stmt", "assign-stmt", "while-stmt", "if-stmt", "stmt-list", "compound" }; const int num_node_names = sizeof(node_names) / sizeof(node_names[0]); struct node { int type; int int_val; string string_val; vector parts; node(int t) { type = t; int_val = 0; string_val = ""; } node(int t, int v) { type = t; int_val = v; string_val = ""; } node(int t, string s) { type = t; int_val = 0; string_val = s; } node(int t, int v, string s) { type = t; int_val = v; string_val = s; } node(int t, string s, int v) { type = t; int_val = v; string_val = s; } ~node() { for (int i = 0; i < parts.size(); i += 1) delete parts[i]; } void add(node * n) { parts.push_back(n); } void print(ostream & os, int indent = 0) const { os << setw(4 * indent) << ""; if (type >= 0 && type < num_node_names) os << node_names[type]; else os << "bad(" << type << ")"; if (type == nt_simple || type == nt_expression) { os << " "; int t = first(int_val), d = second(int_val); if (t == ty_reswd) os << reswd_names[d]; else if (t == ty_symbol) os << symbols[d]; else os << "(" << t << ", " << d << ")"; } else if (type == nt_number) os << " " << int_val; os << " \"" << string_val << "\"\n"; for (int i = 0; i < parts.size(); i += 1) parts[i]->print(os, indent + 1); } }; ostream & operator<<(ostream & os, const node & n) { n.print(os); return os; } class interpreter { protected: hashtable ht; istream & in; bool normal; void unread() { normal = false; } lexeme next_symbol() { static lexeme L(ty_bad, "???", -1); if (! normal) { normal = true; return L; } char c; while (true) { c = in.get(); if (in.fail() || c > ' ') break; } if (in.fail()) L = lexeme(ty_eof); else if (c == '{') L = lexeme(ty_symbol, "{", sy_open_curly); else if (c == '}') L = lexeme(ty_symbol, "}", sy_close_curly); else if (c == '(') L = lexeme(ty_symbol, "(", sy_open_paren); else if (c == ')') L = lexeme(ty_symbol, ")", sy_close_paren); else if (c == ',') L = lexeme(ty_symbol, ",", sy_comma); else if (c == ';') L = lexeme(ty_symbol, ";", sy_semicolon); else if (c == '+') L = lexeme(ty_symbol, "+", sy_plus); else if (c == '-') L = lexeme(ty_symbol, "-", sy_minus); else if (c == '*') L = lexeme(ty_symbol, "*", sy_times); else if (c == '/') L = lexeme(ty_symbol, "/", sy_divide); else if (c == '%') L = lexeme(ty_symbol, "%", sy_modulo); else if (c == '<') { c = in.peek(); if (c == '=') { in.get(); L = lexeme(ty_symbol, "<=", sy_less_equal); } else if (c == '>') { in.get(); L = lexeme(ty_symbol, "<>", sy_not_equal); } else L = lexeme(ty_symbol, "<", sy_less); } else if (c == '>') { c = in.peek(); if (c == '=') { in.get(); L = lexeme(ty_symbol, ">=", sy_greater_equal); } else L = lexeme(ty_symbol, ">", sy_greater); } else if (c == '=') { c = in.peek(); if (c == '=') { in.get(); L = lexeme(ty_symbol, ":=", sy_assign); } else L = lexeme(ty_bad, string("?") + c); } else if (c == ':') { c = in.peek(); if (c == '=') { in.get(); L = lexeme(ty_symbol, ":=", sy_assign); } else L = lexeme(ty_bad, string(":") + c); } else if (c == '"') { string s = "\""; bool bad = false; while (true) { c = in.get(); if (in.fail()) { s += c; bad = true; break; } if (c == '"') break; s += c; } if (bad) L = lexeme(ty_bad, s); else L = lexeme(ty_string, s); } else if (c >= '0' && c <= '9') { string s = ""; int val = 0; while (true) { s += c; val = val * 10 + c - '0'; c = in.peek(); if (c < '0' || c > '9') break; in.get(); } L = lexeme(ty_int, s, val); } else if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z') { string s = ""; while (true) { if (c >= 'A' && c <= 'Z') s += (char)(c -'A' + 'a'); else s += c; c = in.peek(); if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '_') in.get(); else break; } lexeme * r = ht.lookup(s); if (r == NULL) { r = new lexeme(ty_name, s); ht.add(r); } L = * r; } else L = lexeme(ty_bad, string("") + c); return L; } public: interpreter(istream & i): in(i) { ht.add(new lexeme(ty_reswd, "if", rw_if)); ht.add(new lexeme(ty_reswd, "then", rw_then)); ht.add(new lexeme(ty_reswd, "else", rw_else)); ht.add(new lexeme(ty_reswd, "while", rw_while)); ht.add(new lexeme(ty_reswd, "do", rw_do)); ht.add(new lexeme(ty_reswd, "read", rw_read)); ht.add(new lexeme(ty_reswd, "print", rw_print)); ht.add(new lexeme(ty_reswd, "and", rw_and)); ht.add(new lexeme(ty_reswd, "or", rw_or)); ht.add(new lexeme(ty_reswd, "not", rw_not)); normal = true; } // ::= number | name | string | "read" | "(" ")" node * parse_value() { lexeme L = next_symbol(); if (L.type == ty_int) return new node(nt_number, L.value); else if (L.type == ty_name) return new node(nt_name, L.form); else if (L.type == ty_string) return new node(nt_string, L.form); else if (L.type == ty_reswd && L.value == rw_read) return new node(nt_read); else if (L.type == ty_symbol && L.value == sy_open_paren) { node * expr = parse_expression(); L = next_symbol(); if (L.type != ty_symbol || L.value != sy_close_paren) { cerr << "error in expression, " << L << " seen when ) was expected\n"; exit(1); } return expr; } else { cerr << "value expected but " << L << " appeared\n"; exit(1); } } // ::= | { "-" | "not" } node * parse_simple() { lexeme L = next_symbol(); if (L.type == ty_symbol && L.value == sy_minus || L.type == ty_reswd && L.value == rw_not) { int k = combine(L.type, L.value); node * n = new node(nt_simple, k, L.form); node * simp = parse_simple(); n->add(simp); return n; } else { unread(); return parse_value(); } } // ::= ( operator ) * node * parse_expression() { node * left = parse_simple(); lexeme L = next_symbol(); while (L.type == ty_symbol && (L.value == sy_plus || L.value == sy_minus || L.value == sy_times || L.value == sy_divide || L.value == sy_modulo || L.value == sy_less_equal || L.value == sy_not_equal || L.value == sy_less || L.value == sy_greater_equal || L.value == sy_greater || L.value == sy_equal) || L.type == ty_reswd && (L.value == rw_and || L.value == rw_or)) { node * right = parse_simple(); int k = combine(L.type, L.value); node * n = new node(nt_expression, k, L.form); n->add(left); n->add(right); left = n; L = next_symbol(); } unread(); return left; } bool ended() { lexeme L = next_symbol(); unread(); return L.type == ty_eof; } // ::= { "," } node * parse_expr_list() { node * left = parse_expression(); lexeme L = next_symbol(); if (L.type == ty_symbol && L.value == sy_comma) { node * right = parse_expr_list(); node * n = new node(nt_expr_list); n->add(left); n->add(right); return n; } else { unread(); return left; } } // ::= "print" node * parse_print_stmt() { lexeme L = next_symbol(); if (L.type == ty_reswd && L.value == rw_print) { node * el = parse_expr_list(); node * n = new node(nt_print_stmt); n->add(el); return n; } else { cerr << "print statement expected but " << L << " seen\n"; exit(1); } } }; int main() { interpreter I(cin); while (true) { cout << "> "; if (I.ended()) { cout << "\n"; break; } node * t = I.parse_print_stmt(); cout << * t << "\n"; delete t; } }