#include #include #include #include #include "hashtable.h" using namespace std; const int ty_reswd = 1, ty_int = 2, ty_symbol = 3, ty_name = 4, ty_string = 5, ty_eof = 6, ty_bad = 7; string type_names[] = { "", "reswd", "int", "symbol", "name", "string", "eof", "bad" }; const int num_types = sizeof(type_names) / sizeof(type_names[0]); const int rw_if = 1, rw_then = 2, rw_else = 3, rw_while = 4, rw_do = 5, rw_read = 6, rw_print = 7; string reswd_names[] = { "if", "then", "else", "while", "do", "read", "print" }; const int num_reswds = sizeof(reswd_names) / sizeof(reswd_names[0]); const int sy_open_curly = 1, sy_close_curly = 2, sy_open_paren = 3, sy_close_paren = 4, sy_semicolon = 5, sy_plus = 6, sy_minus = 7, sy_times = 8, sy_divide = 9, sy_modulo = 10, sy_less_equal = 11, sy_not_equal = 12, sy_less = 13, sy_greater_equal = 14, sy_greater = 15, sy_equal = 16, sy_assign = 17; string symbols[] = { "", "open_curly", "close_curly", "open_paren", "close_paren", "semicolon", "plus", "minus", "times", "divide", "modulo", "less_equal", "not_equal", "less", "greater_equal", "greater", "equal", "assign" }; const int num_symbols = sizeof(symbols) / sizeof(symbols[0]); struct lexeme { int type; string form; int value; lexeme(int t, const string & f = "", int v = 0) { type = t; form = f; value = v; } string key() { return form; } void print(ostream & os) const { os << "lexeme(type = "; if (type > 0 && type < num_types) os << type_names[type]; else os << "bad(" << type << ")"; os << ", form = \"" << form << "\""; if (type == ty_reswd) if (value > 0 && value < num_reswds) os << ", value = " << reswd_names[value]; else os << ", value = bad(" << value << ")"; else if (type == ty_int) os << ", value = " << value; else if (type == ty_symbol) if (value > 0 && value < num_symbols) os << ", value = " << symbols[value]; else os << ", value = bad(" << value << ")"; os << ")"; } }; ostream & operator<<(ostream & os, const lexeme & lex) { lex.print(os); return os; } void prepare(hashtable ht) { ht.add(new lexeme(ty_reswd, "if", rw_if)); ht.add(new lexeme(ty_reswd, "then", rw_then)); ht.add(new lexeme(ty_reswd, "else", rw_else)); ht.add(new lexeme(ty_reswd, "while", rw_while)); ht.add(new lexeme(ty_reswd, "do", rw_do)); ht.add(new lexeme(ty_reswd, "read", rw_read)); ht.add(new lexeme(ty_reswd, "print", rw_print)); } bool normal = true; void unread() { normal = false; } lexeme next_symbol(istream & in, hashtable & ht) { static lexeme L(ty_bad, "???", -1); if (! normal) { normal = true; return L; } char c; while (true) { c = in.get(); if (in.fail() || c > ' ') break; } if (in.fail()) L = lexeme(ty_eof); else if (c == '{') L = lexeme(ty_symbol, "{", sy_open_curly); else if (c == '}') L = lexeme(ty_symbol, "}", sy_close_curly); else if (c == '(') L = lexeme(ty_symbol, "(", sy_open_paren); else if (c == ')') L = lexeme(ty_symbol, ")", sy_close_paren); else if (c == ';') L = lexeme(ty_symbol, ";", sy_semicolon); else if (c == '+') L = lexeme(ty_symbol, "+", sy_plus); else if (c == '-') L = lexeme(ty_symbol, "-", sy_minus); else if (c == '*') L = lexeme(ty_symbol, "*", sy_times); else if (c == '/') L = lexeme(ty_symbol, "/", sy_divide); else if (c == '%') L = lexeme(ty_symbol, "%", sy_modulo); else if (c == '<') { c = in.peek(); if (c == '=') { in.get(); L = lexeme(ty_symbol, "<=", sy_less_equal); } else if (c == '>') { in.get(); L = lexeme(ty_symbol, "<>", sy_not_equal); } else L = lexeme(ty_symbol, "<", sy_less); } else if (c == '>') { c = in.peek(); if (c == '=') { in.get(); L = lexeme(ty_symbol, ">=", sy_greater_equal); } else L = lexeme(ty_symbol, ">", sy_greater); } else if (c == '=') { c = in.peek(); if (c == '=') { in.get(); L = lexeme(ty_symbol, ":=", sy_assign); } else L = lexeme(ty_bad, string("?") + c); } else if (c == ':') { c = in.peek(); if (c == '=') { in.get(); L = lexeme(ty_symbol, ":=", sy_assign); } else L = lexeme(ty_bad, string(":") + c); } else if (c == '"') { string s = "\""; bool bad = false; while (true) { c = in.get(); if (in.fail()) { s += c; bad = true; break; } if (c == '"') break; s += c; } if (bad) L = lexeme(ty_bad, s); else L = lexeme(ty_string, s); } else if (c >= '0' && c <= '9') { string s = ""; int val = 0; while (true) { s += c; val = val * 10 + c - '0'; c = in.peek(); if (c < '0' || c > '9') break; in.get(); } L = lexeme(ty_int, s, val); } else if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z') { string s = ""; while (true) { if (c >= 'A' && c <= 'Z') s += (char)(c -'A' + 'a'); else s += c; c = in.peek(); if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '_') in.get(); else break; } lexeme * r = ht.lookup(s); if (r == NULL) { r = new lexeme(ty_name, s); ht.add(r); } L = * r; } else L = lexeme(ty_bad, string("") + c); return L; } int main() { hashtable ht; prepare(ht); string s; while (true) { lexeme L = next_symbol(cin, ht); cout << L << "\n"; if (L.type == ty_eof) break; } }