#include #include #include #include #include #include using namespace std; struct symbol { string form; string type; int value; bool hasvalue; symbol(string t, string f = "") { type = t; form = f; hasvalue = false; } symbol(string t, int v) { type = t; form = ""; value = v; hasvalue = true; } symbol(string t, string f, int v) { type = t; form = f; value = v; hasvalue = true; } string to_string() const { ostringstream os; os << "symbol[type = \"" << type << "\""; if (form != "") os << ", form = \"" << form << "\""; if (hasvalue) os << ", value = " << value; os << "]"; return os.str(); } }; ostream & operator<<(ostream & out, const symbol & sym) { out << sym.to_string(); return out; } struct inputsystem { string line; int pos, len, linenumber, numerrors; istream * file; bool dontread; static const char control_d = 'D' - 64; void clear() { line = ""; pos = 0; len = 0; numerrors = 0; linenumber = 0; dontread = false; file = & cin; } void open(istream & f) { file = & f; } void close() { if (file != & cin) ((ifstream *)file)->close(); } inputsystem(istream & f) { clear(); open(f); } inputsystem() { clear(); } char get() { while (true) { if (pos < 0) { pos += 1; return ' '; } else if (pos == len) { pos += 1; return ' '; } else if (pos > len) { getline(* file, line); if (file->fail()) return control_d; pos = 0; len = line.length(); linenumber += 1; continue; } char c = line[pos]; pos += 1; return c; } } void unget() { pos -= 1; } void unget(char c) { pos -= 1; if (pos >= 0) line[pos] = c; } void error(string message, string detail = "") { cout << "Line " << linenumber << " Error " << message << detail << "\n"; cout << line << "\n"; numerrors += 1; if (numerrors >= 10) { cout << "Too many errors, giving up.\n"; exit(1); } } symbol lexan() { static symbol sym("error"); if (dontread) { dontread = false; return sym; } char c = get(); while (c <= ' ' && c != control_d) c = get(); if (c >= '0' && c <= '9') { string form = ""; int value = 0; while (c >= '0' && c <= '9') { form += c; value = value * 10 + c - '0'; c = get(); } unget(); return sym = symbol("number", form, value); } else if (c == '{') return sym = symbol("punctuation", "{"); else if (c == '}') return sym = symbol("punctuation", "}"); else if (c == '(') return sym = symbol("punctuation", "("); else if (c == ')') return sym = symbol("punctuation", ")"); else if (c == ';') return sym = symbol("punctuation", ";"); else if (c == '=') { c = get(); if (c == '=') return sym = symbol("operator", "=="); unget(); return sym = symbol("operator", "="); } else if (c == '+') return sym = symbol("operator", "+"); else if (c == '-') return sym = symbol("operator", "-"); else if (c == '*') return sym = symbol("operator", "*"); else if (c == '/') return sym = symbol("operator", "/"); else if (c == '%') return sym = symbol("operator", "%"); else if (c == '^') return sym = symbol("operator", "^"); else if (c == '>') { c = get(); if (c == '=') return sym = symbol("operator", ">="); unget(); return sym = symbol("operator", ">"); } else if (c == '<') { c = get(); if (c == '=') return sym = symbol("operator", "<="); unget(); return sym = symbol("operator", "<"); } else if (c == '!') { c = get(); if (c == '=') return sym = symbol("operator", "!="); error("lone !"); return sym = symbol("error"); } else if (c == '&') { c = get(); if (c == '&') return sym = symbol("operator", "&&"); error("lone &\n"); return sym = symbol("error"); } else if (c == '|') { c = get(); if (c == '|') return sym = symbol("operator", "||"); error("lone |\n"); return sym = symbol("error"); } else if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z') { string form = ""; while (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9') { form += c; c = get(); } unget(); if (form == "while" || form == "do" || form == "if" || form == "then" || form == "else" || form == "fi" || form == "print") return sym = symbol("reserved_word", form); else if (form == "and" || form == "or" || form == "not") return sym = symbol("operator", form); else return sym = symbol("name", form); } else if (c == control_d) return sym = symbol("end_of_file", "eof"); else { string msg = "unrecognised character: "; if (c >= ' ' && c <= '~') msg = msg + "'" + c + "'"; else msg = msg + "ASCII[" + to_string((int)c) + "]"; error(msg); } return sym = symbol("error"); } void unlexan() { dontread = true; } void test() { while (true) { symbol sy = lexan(); cout << sy << "\n"; if (sy.type == "end_of_file") break; } } }; int main(int argc, char * argv[]) { ifstream fi; inputsystem in; if (argc > 1) { fi.open(argv[1]); if (fi.fail()) { cout << "Can't open \"" << argv[1] << "\"\n"; exit(1); } in.open(fi); } in.test(); in.close(); }