/* la.c */ #include #include #include "io.h" #include "la.h" HashTable symbol_table; void init_symbol_table(void) { int i; for (i=0; iname,symbol)==0) return (p); p=p->next; } p=malloc(sizeof(SymbolDescription)); p->name=malloc(strlen(symbol)+1); strcpy(p->name,symbol); p->decls=NULL; p->kind=SKunknown; p->next=symbol_table[h]; symbol_table[h]=p; return (p); } void predefine_reserved_words(void) { lookup_symbol("IF")->kind=RWif; lookup_symbol("ELSE")->kind=RWelse; lookup_symbol("WHILE")->kind=RWwhile; lookup_symbol("RETURN")->kind=RWreturn; lookup_symbol("INT")->kind=TYint; lookup_symbol("FLOAT")->kind=TYfloat; lookup_symbol("CHAR")->kind=TYchar; lookup_symbol("VOID")->kind=TYvoid; lookup_symbol("STRUCT")->kind=TYstruct; lookup_symbol("NULL")->kind=SKnull; } SymbolKind lex_kind=SKunknown; int lex_integer=0; float lex_float=0.0; SymbolDescription *lex_symbol=NULL; char lex_name[100]; int already_got_lexeme=0; void print_char(FILE *f, char c) { if (c<32) { fputc('\\',f); if (c==10) fputc('n',f); else if (c==13) fputc('r',f); else if (c==9) fputc('t',f); else if (c==0) fputc('0',f); else fprintf(f,"%03o",c); } else if (c=='\'') fprintf(f,"\\\'"); else if (c=='\"') fprintf(f,"\\\""); else if (c=='\\') fprintf(f,"\\\\"); else fputc(c,f); } void print_string(FILE *f, char *s) { char c; fputc('\"',f); while (*s!=0) { print_char(f,*s); s+=1; } fputc('\"',f); } char deal_with_magic_character(void) { char c; c=nextchar(); switch (c) { case 'n': return (10); case 't': return (9); case 'r': return (13); case '0': return (0); case '\\': return ('\\'); case '\"': return ('\"'); case '\'': return ('\''); case ' ': case '\t': case '\n': case '\r': while (c<=' ') c=nextchar(); if (c=='\\') return (deal_with_magic_character()); return (c); default: return (c); } } void print_lower_case(FILE *f, char *s) { while (*s!=0) { if (*s>='A' && *s<='Z') fputc(*s-'A'+'a',f); else fputc(*s,f); s+=1; } } void print_identifier(FILE *f, SymbolDescription *id) { print_lower_case(f,id->name); } void print_lexeme_nicely(FILE *f) { if (lex_kind>FirstReservedWord && lex_kindFirstType && lex_kind"); break; case OPnot: fprintf(f,"!"); break; case OPnotequal: fprintf(f,"!="); break; case OPless: fprintf(f,"<"); break; case OPlessequal: fprintf(f,"<="); break; case OPgreater: fprintf(f,">"); break; case OPgreaterequal: fprintf(f,">="); break; case OPFnotequal: fprintf(f,"#!="); break; case OPFless: fprintf(f,"#<"); break; case OPFlessequal: fprintf(f,"#<="); break; case OPFgreater: fprintf(f,"#>"); break; case OPFgreaterequal: fprintf(f,"#>="); break; default: fprintf(f,"[Badly-coded-lexeme]"); break; } } void print_operator(FILE *f, SymbolKind k) { SymbolKind old=lex_kind; lex_kind=k; print_lexeme_nicely(f); lex_kind=old; } void print_type_symbol(FILE *f, SymbolKind k) { switch (k) { case TYint: fprintf(f,"int"); break; case TYfloat: fprintf(f,"float"); break; case TYchar: fprintf(f,"char"); break; case TYvoid: fprintf(f,"void"); break; default: fprintf(f,""); } } void print_declarations(SymbolDescription *var); void get_lexeme(void) { char ch; if (already_got_lexeme) { already_got_lexeme=0; return; } lex_kind=SKunknown; ch=nextchar(); while (lex_kind==SKunknown) { while (ch<=' ') ch=nextchar(); register_symbol_start(); switch (ch) { case 127: lex_kind=SKendoffile; return; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { int gotdot=0; float digitweight; lex_kind=SKinteger; lex_integer=0; lex_float=0.0; while (ch>='0' && ch<='9' || ch=='.') { if (ch=='.') { if (gotdot) syntax_error("multiple decimal points"); gotdot=1; lex_kind=SKfloat; lex_float=lex_integer; digitweight=0.1; } else { lex_integer=lex_integer*10+ch-'0'; if (gotdot) { lex_float+=(ch-'0')*digitweight; digitweight/=10.0; } } ch=nextchar(); } already_got_next_char=1; return; } case 'a': case 'i': case 'q': case 'y': case 'G': case 'O': case 'W': case 'b': case 'j': case 'r': case 'z': case 'H': case 'P': case 'X': case 'c': case 'k': case 's': case 'A': case 'I': case 'Q': case 'Y': case 'd': case 'l': case 't': case 'B': case 'J': case 'R': case 'Z': case 'e': case 'm': case 'u': case 'C': case 'K': case 'S': case '_': case 'f': case 'n': case 'v': case 'D': case 'L': case 'T': case 'g': case 'o': case 'w': case 'E': case 'M': case 'U': case 'h': case 'p': case 'x': case 'F': case 'N': case 'V': lex_integer=0; while (ch>='a' && ch<='z' || ch>='A' && ch<='Z' || ch>='0' && ch<='9' || ch=='_') { if (ch>='a' && ch<='z') ch-='a'-'A'; if (lex_integer<99) { lex_name[lex_integer]=ch; lex_integer+=1; } ch=nextchar(); } lex_name[lex_integer]=0; already_got_next_char=1; lex_symbol=lookup_symbol(lex_name); if (lex_symbol->kind==SKunknown) lex_symbol->kind=SKidentifier; lex_kind=lex_symbol->kind; return; case '#': lex_integer=0; ch=nextchar(); while (ch>='a' && ch<='z' || ch>='A' && ch<='Z' || ch>='0' && ch<='9' || ch=='_') { if (ch>='a' && ch<='z') ch-='a'-'A'; if (lex_integer<99) { lex_name[lex_integer]=ch; lex_integer+=1; } ch=nextchar(); } lex_name[lex_integer]=0; already_got_next_char=1; if (strcmp(lex_name,"INCLUDE")==0) { do ch=nextchar(); while (ch<=' ' && ch!='\n'); lex_integer=0; while (ch>' ' && ch!=127) { if (lex_integer<99) { lex_name[lex_integer]=ch; lex_integer+=1; } ch=nextchar(); } lex_name[lex_integer]=0; include_file(lex_name); lex_kind=SKunknown; continue; } else if (strcmp(lex_name,"PRINT")==0) { translate_end_of_line_to(127); while (1) { get_lexeme(); if (lex_kind==SKendoffile) break; print_lexeme_nicely(stdout); fputc(' ',stdout); } translate_end_of_line_to(10); ch=' '; fputc('\n',stdout); lex_kind=SKunknown; continue; } else if (strcmp(lex_name,"LOOKUP")==0) { translate_end_of_line_to(127); while (1) { get_lexeme(); if (lex_kind==SKendoffile) break; if (lex_kind==SKidentifier) print_declarations(lex_symbol); } translate_end_of_line_to(10); ch=' '; lex_kind=SKunknown; continue; } else if (strcmp(lex_name,"DUMP")==0) { int i; for (i=0; ikind==SKidentifier && e->decls!=NULL) print_declarations(e); e=e->next; } } abandon_rest_of_input_line(); lex_kind=SKunknown; continue; } else if (strcmp(lex_name,"LINE")==0) { lex_kind=SKinteger; lex_integer=current_line_number(); return; } else if (strcmp(lex_name,"FILE")==0) { lex_kind=SKstring; strcpy(lex_name,current_file_name()); return; } else { syntax_error("Bad use of #\n"); abandon_rest_of_input_line(); lex_kind=SKunknown; continue; } case '\"': lex_integer=0; ch=nextchar(); while (ch!='\"' && ch!=10 && ch!=127) { if (ch=='\\') ch=deal_with_magic_character(); if (lex_integer<99) { lex_name[lex_integer]=ch; lex_integer+=1; } ch=nextchar(); } lex_name[lex_integer]=0; if (ch!='\"') { syntax_error("non-terminated string constant"); already_got_next_char=1; } lex_kind=SKstring; return; case '\'': lex_kind=SKinteger; ch=nextchar(); if (ch=='\\') ch=deal_with_magic_character(); else if (ch=='\'' || ch=='\n') { syntax_error("empty character constant"); lex_integer=0; return; } lex_integer=ch; ch=nextchar(); if (ch!='\'') { syntax_error("non-terminated character constant"); already_got_next_char=1; } return; case '{': lex_kind=SKbegin; return; case '}': lex_kind=SKend; return; case '(': lex_kind=SKopenpar; return; case ')': lex_kind=SKclosepar; return; case '[': lex_kind=SKopensquare; return; case ']': lex_kind=SKclosesquare; return; case ';': lex_kind=SKsemicolon; return; case ',': lex_kind=SKcomma; return; case '&': ch=nextchar(); if (ch=='&') lex_kind=OPand; else { lex_kind=OPaddress; already_got_next_char=1; } return; case '|': ch=nextchar(); if (ch!='|') { syntax_error("single | not allowed"); already_got_next_char=1; } lex_kind=OPor; return; case '+': ch=nextchar(); if (ch=='=') lex_kind=OPplusequal; else { lex_kind=OPplus; already_got_next_char=1; } return; case '-': ch=nextchar(); if (ch=='=') lex_kind=OPminusequal; else if (ch=='>') lex_kind=OParrow; else { lex_kind=OPminus; already_got_next_char=1; } return; case '*': ch=nextchar(); if (ch=='=') lex_kind=OPstarequal; else { lex_kind=OPstar; already_got_next_char=1; } return; case '/': ch=nextchar(); if (ch=='/') { do ch=nextchar(); while (ch!='\n' && ch!=127); continue; } else if (ch=='*') { int state=0; while (1) { ch=nextchar(); if (ch=='*' && state==0) state=1; else if (ch=='/' && state==1) { ch=nextchar(); break; } else state=0; } continue; } else if (ch=='=') lex_kind=OPdivideequal; else { lex_kind=OPdivide; already_got_next_char=1; } return; case '%': ch=nextchar(); if (ch=='=') lex_kind=OPmodequal; else { lex_kind=OPmod; already_got_next_char=1; } return; case '=': ch=nextchar(); if (ch=='=') lex_kind=OPequalequal; else { lex_kind=OPequal; already_got_next_char=1; } return; case '!': ch=nextchar(); if (ch=='=') lex_kind=OPnotequal; else { lex_kind=OPnot; already_got_next_char=1; } return; case '<': ch=nextchar(); if (ch=='=') lex_kind=OPlessequal; else { lex_kind=OPless; already_got_next_char=1; } return; case '>': ch=nextchar(); if (ch=='=') lex_kind=OPgreaterequal; else { lex_kind=OPgreater; already_got_next_char=1; } return; case '.': lex_kind=OPdot; return; default: syntax_error("Character not allowed"); ch=nextchar(); lex_kind=SKunknown; } } } void print_lexeme(FILE *f) { if (lex_kind>FirstReservedWord && lex_kindFirstType && lex_kind]"); break; case OPnot: fprintf(f,"[Operator,!]"); break; case OPnotequal: fprintf(f,"[Operator,!=]"); break; case OPless: fprintf(f,"[Operator,<]"); break; case OPlessequal: fprintf(f,"[Operator,<=]"); break; case OPgreater: fprintf(f,"[Operator,>]"); break; case OPgreaterequal: fprintf(f,"[Operator,>=]"); break; default: fprintf(f,"[Badly-coded-lexeme]"); break; } } void init_lexical_analyser(void) { init_symbol_table(); predefine_reserved_words(); init_input_system(); already_got_lexeme=0; } void test_lexical_analyser(void) { init_lexical_analyser(); while (lex_kind!=SKendoffile) { get_lexeme(); print_lexeme(stdout); printf("\n"); } }