#include #include #include #include // ================================================================ // PART ONE // defining S-expressions and cons cells struct sexpression { unsigned int type: 3; // range is 0 to 7 unsigned int inuse: 1; // only used during garbage collection int value: 28; // range will only be +/- 134,000,000. }; const int nil_type = 0, int_type = 1, char_type = 2, cons_type = 3, symbol_type = 4, special_type = 5; inline sexpression make(int type, int value) { sexpression s; s.type = type; s.value = value; return s; } inline bool operator==(const sexpression & a, const sexpression & b) { return a.type == b.type && a.value == b.value; } inline bool operator!=(const sexpression & a, const sexpression & b) { return a.type != b.type || a.value != b.value; } const int memory_size = 100000; sexpression car[memory_size], cdr[memory_size]; sexpression first_free, NIL, UNDEFINED, ERROR, ENDOFFILE; sexpression STACK, AL; const int spec_undefined = 1, spec_error = 2, spec_endoffile = 3; void init_memory() { NIL = make(nil_type, 0); UNDEFINED = make(special_type, spec_undefined); ERROR = make(special_type, spec_error); ENDOFFILE = make(special_type, spec_endoffile); STACK = NIL; AL = NIL; for (int i = 0; i < memory_size; i += 1) cdr[i] = make(cons_type, i+1); cdr[memory_size-1] = NIL; first_free = make(cons_type, 0); } // ================================================================ // PART TWO // The symbol table struct symbol_entry { char * name; sexpression value; int next_same_hash; }; symbol_entry * symbol_list; // This is just an array of all known symbols // so that symbols can have a unique number // that will fit in an sexpression's value field. // It can grow, like a vector. int * hash_table; // As a result, the hash table only has to record the // position in the symbol list of the first symbol that // has a particular hash value. int symbol_list_capacity, number_of_symbols; const int hash_table_size = 2000; void enlarge_symbol_list() { int oldsize = symbol_list_capacity; if (oldsize == 0) symbol_list_capacity = 1000; else symbol_list_capacity *= 2; symbol_entry * new_list = new symbol_entry[symbol_list_capacity]; for (int i = 0; i < oldsize; i += 1) new_list[i] = symbol_list[i]; delete[] symbol_list; symbol_list = new_list; } int hash(char * s) { unsigned int h = 938191; for (int i = 0; true; i += 1) { if (s[i] == 0) break; h = h * 69 + tolower(s[i]); } return h % hash_table_size; } void add_known_symbol(char * name, int num) { if (num >= symbol_list_capacity) enlarge_symbol_list(); if (num >= number_of_symbols) number_of_symbols = num + 1; int hv = hash(name); symbol_list[num].name = name; symbol_list[num].value = UNDEFINED; symbol_list[num].next_same_hash = hash_table[hv]; hash_table[hv] = num; } int find_symbol_index(char * name) { int hv = hash(name); int pos = hash_table[hv]; while (pos != -1) { if (strcasecmp(name, symbol_list[pos].name) == 0) return pos; pos = symbol_list[pos].next_same_hash; } return -1; } int add_new_symbol(char * name) { int n = number_of_symbols; add_known_symbol(name, n); return n; } sexpression name_to_symbol(char * name) { if (strcasecmp(name, "nil") == 0) return NIL; int symnum = find_symbol_index(name); if (symnum == -1) symnum = add_new_symbol(strdup(name)); return make(symbol_type, symnum); } enum symbols { sym_t, sym_car, sym_cdr, sym_cons, sym_add, sym_sub, sym_mul, sym_div, sym_quote, sym_eq, sym_lambda, sym_null, sym_if, sym_print, sym_defun, sym_load, sym_setq }; const int last_predefined = sym_setq; void init_symbols() { number_of_symbols = 0; symbol_list_capacity = 0; symbol_list = NULL; hash_table = new int[hash_table_size]; for (int i = 0; i < hash_table_size; i += 1) hash_table[i] = -1; add_known_symbol("T", sym_t); add_known_symbol("CAR", sym_car); add_known_symbol("CDR", sym_cdr); add_known_symbol("CONS", sym_cons); add_known_symbol("+", sym_add); add_known_symbol("-", sym_sub); add_known_symbol("*", sym_mul); add_known_symbol("/", sym_div); add_known_symbol("QUOTE", sym_quote); add_known_symbol("EQ", sym_eq); add_known_symbol("LAMBDA", sym_lambda); add_known_symbol("NULL", sym_null); add_known_symbol("IF", sym_if); add_known_symbol("PRINT", sym_print); add_known_symbol("DEFUN", sym_defun); add_known_symbol("LOAD", sym_load); // e.g. (LOAD 'xxx) loads xxx.lsp add_known_symbol("SETQ", sym_setq); } // ================================================================ // PART THREE // memory management, mostly garbage collection const bool report_garbage_collections = true; void mark_all_as_unused() { for (int i = 0; i < memory_size; i += 1) car[i].inuse = false; if (report_garbage_collections) { printf(", %d cells total", memory_size); fflush(stdout); } } void mark_as_used(sexpression s) { while (s.type == cons_type) { if (car[s.value].inuse) return; car[s.value].inuse = true; mark_as_used(car[s.value]); s = cdr[s.value]; } } void rebuild_free_list() { first_free = NIL; int collected = 0; for (int i = 0; i < memory_size; i += 1) { if (car[i].inuse == false) { collected += 1; cdr[i] = first_free; first_free = make(cons_type, i); } } if (report_garbage_collections) { printf(", %d cells recovered", collected); fflush(stdout); } } void preserve_important_things() { for (int i=0; i ' ') break; } if (c == '(' || c == ')' || c == '.' || c == '\'') { input_item[0] = c; input_item[1] = 0; input_item_is_special = true; return; } while (c > ' ' && c != '(' && c != ')') { if (length < max_item_length) { input_item[length] = c; length += 1; } c = fgetc(input_source); } input_item[length] = 0; if (c > ' ') ungetc(c, input_source); } bool is_all_number(char * s) { int start = 0; if (s[0]=='-' || s[0]=='+') start = 1; if (s[start] == 0) return false; for (int i=start; true; i+=1) { if (s[i] == 0) return true; if (! isdigit(s[i])) return false; } } void print_for_debugging(sexpression s) { int type = s.type; if (type == nil_type) printf("NIL"); else if (type == int_type) printf("int(%d)", s.value); else if (type == char_type) printf("char(%d)", s.value); else if (type == cons_type) printf("cons(%d)", s.value); else if (type == symbol_type) printf("symbol(%d, \"%s\")", s.value, symbol_list[s.value].name); else if (type == special_type) { if (s.value == spec_undefined) printf("special(undefined)"); else if (s.value == spec_error) printf("special(error)"); else if (s.value == spec_endoffile) printf("special(endoffile)"); else printf("special(%d)", s.value); } else printf("improper(%d, %d)", type, s.value); } int character_code(char * s) { if (strcasecmp(s, "#\\space") == 0) return ' '; else if (strcasecmp(s, "#\\newline") == 0) return '\n'; else if (s[2] == '#') return atol(s+3); else return s[2]; } sexpression convert_input_to_sexpression() { if (input_item_is_special) return ERROR; else if (is_all_number(input_item)) return make(int_type, atol(input_item)); else if (input_item[0]=='#' && input_item[1]=='\\' && input_item[2]!=0) return make(char_type, character_code(input_item)); else return name_to_symbol(input_item); } // ================================================================ // PART FIVE // full input and output of sexpressions char read_one_sexpression() // pushes the sexpression (if there was one) onto the // stack and returns 'S'. For special characters '(', // ')', '.' it doesn't push anything, just returns the // special character. // complicated only because a garbage collection could happen // at any moment, and because dealing with dots and errors // is just annoying. { read_next_item(); if (input_item_is_special) { char what = input_item[0]; if (what == '~' || what == '.' || what == ')') return what; else if (what == '(') { what = read_one_sexpression(); if (what == ')') { push(NIL); // the sexpression was just () return 'S'; } else if (what != 'S') push(ERROR); sexpression last_cell = new_cons(top(), NIL); int last_cell_index = last_cell.value; pop_then_push(last_cell); while (true) { sexpression orig_stack = STACK; what = read_one_sexpression(); if (what == ')') return 'S'; else if (what == '~') { cdr[last_cell_index] = ERROR; return 'S'; } else if (what == '.') { what = read_one_sexpression(); if (what == 'S') cdr[last_cell_index] = pop(); else { cdr[last_cell_index] = ERROR; if (what == ')') return 'S'; } while (true) { read_next_item(); if (input_item_is_special && input_item[0] == ')') return 'S'; cdr[last_cell_index] = ERROR; } } else { cdr[last_cell_index] = STACK; last_cell_index = STACK.value; cdr[last_cell_index] = NIL; STACK = orig_stack; } } } else if (what == '\'') { sexpression orig_stack_top = STACK; what = read_one_sexpression(); if (what != 'S') push(ERROR); int endplace = STACK.value; push(make(symbol_type, sym_quote)); push(STACK); cdr[endplace] = NIL; cdr[STACK.value] = orig_stack_top; return 'S'; } else return what; } else { push(convert_input_to_sexpression()); return 'S'; } } void read_sexpression() { char c = read_one_sexpression(); if (c == '~') push(ENDOFFILE); else if (c != 'S') push(ERROR); } void print_rest_of_list(sexpression s); void print_one_sexpression(sexpression s) { int type = s.type; if (type == nil_type) printf("NIL"); else if (type == int_type) printf("%d", s.value); else if (type == char_type) { int v = s.value; if (v>' ' && v<='~') printf("#\\%c", v); else if (v==' ') printf("#\\space"); else if (v=='\n') printf("#\\newline"); else printf("#\\#%d", v); } else if (type == symbol_type) printf("%s", symbol_list[s.value].name); else if (type == special_type) { if (s.value == 1) printf("UNDEFINED"); else if (s.value == 2) printf("ERROR"); else printf("special(%d)", s.value); } else if (type == cons_type) { printf("("); print_one_sexpression(car[s.value]); print_rest_of_list(cdr[s.value]); } else printf("improper(%d, %d)", type, s.value); } void print_rest_of_list(sexpression s) { while (true) { int type = s.type; if (type == nil_type) { printf(")"); return; } else if (type == cons_type) { printf(" "); print_one_sexpression(car[s.value]); s = cdr[s.value]; } else { printf(" . "); print_one_sexpression(s); printf(")"); return; } } } void print_sexpression(sexpression s) { print_one_sexpression(s); printf("\n"); } // ================================================================ // PART SIX // evaluate sexpression carof(sexpression s) { if (s.type != cons_type) { printf("ERROR: taking CAR of "); print_one_sexpression(s); printf("\n"); return ERROR; } return car[s.value]; } sexpression cdrof(sexpression s) { if (s.type != cons_type) { printf("ERROR: taking CDR of "); print_one_sexpression(s); printf("\n"); return ERROR; } return cdr[s.value]; } void setcarof(sexpression cell, sexpression val) { if (cell.type != cons_type) { printf("ERROR: setting CAR of "); print_one_sexpression(cell); printf("\n"); } else car[cell.value] = val; } void setcdrof(sexpression cell, sexpression val) { if (cell.type != cons_type) { printf("ERROR: setting CDR of "); print_one_sexpression(cell); printf("\n"); } else cdr[cell.value] = val; } int numberof(sexpression s) { if (s.type != int_type) { printf("ERROR: "); print_one_sexpression(s); printf(" is not a number\n"); return 1; } return s.value; } sexpression ALfind(sexpression key) { sexpression alist = AL; while (alist.type == cons_type) { sexpression pair = carof(alist); if (carof(pair) == key) return cdrof(pair); alist = cdrof(alist); } return UNDEFINED; } sexpression ALadd(sexpression key, sexpression value) { AL = new_cons(NIL, AL); setcarof(AL, new_cons(key, value)); } sexpression value_of_symbol(sexpression s) { int symbol_index = s.value; if (symbol_index <= last_predefined) return s; // so that for example evaluate(CONS) = CONS sexpression val = ALfind(s); if (val == UNDEFINED) return symbol_list[s.value].value; return val; } FILE * open_file_from_symbol(sexpression name, char * extension, char * mode) { if (name.type != symbol_type) { printf("ERROR: load not given a symbol\n"); return NULL; } char filename[max_item_length+10]; char * symname = symbol_list[name.value].name; for (int i=0; true; i+=1) { char c = symname[i]; if (c == 0) { strcpy(filename+i, extension); break; } filename[i] = tolower(c); } FILE * f = fopen(filename, mode); if (f == NULL) printf("ERROR: could not open file '%s'\n", filename); return f; } void evaluate(sexpression s) // s should be kept on the stack until we've // finished, just in case there's a garbage // collection. This function leaves the result // on the stack. { int type = s.type; if (type == symbol_type) push(value_of_symbol(s)); else if (type != cons_type) push(s); else // it's a cons cell { evaluate(carof(s)); sexpression op = top(); if (op.type == symbol_type) { int opcode = op.value; if (opcode > last_predefined) { printf("ERROR: "); // it's just a user-defined name. print_one_sexpression(op); printf(" is not a function\n"); pop_then_push(ERROR); return; } if (opcode == sym_quote) pop_then_push(carof(cdrof(s))); else if (opcode == sym_add) { evaluate(carof(cdrof(s))); int a = numberof(pop()); evaluate(carof(cdrof(cdrof(s)))); int b = numberof(pop()); pop_then_push(make(int_type, a+b)); } else if (opcode == sym_sub) { evaluate(carof(cdrof(s))); int a = numberof(pop()); evaluate(carof(cdrof(cdrof(s)))); int b = numberof(pop()); pop_then_push(make(int_type, a-b)); } else if (opcode == sym_mul) { evaluate(carof(cdrof(s))); int a = numberof(pop()); evaluate(carof(cdrof(cdrof(s)))); int b = numberof(pop()); pop_then_push(make(int_type, a*b)); } else if (opcode == sym_div) { evaluate(carof(cdrof(s))); int a = numberof(pop()); evaluate(carof(cdrof(cdrof(s)))); int b = numberof(pop()); if (b == 0) { printf("ERROR: division by zero\n"); pop_then_push(make(int_type, 0)); } else pop_then_push(make(int_type, a/b)); } else if (opcode == sym_car) { evaluate(carof(cdrof(s))); sexpression param = pop(); pop_then_push(carof(param)); } else if (opcode == sym_cdr) { evaluate(carof(cdrof(s))); sexpression param = pop(); pop_then_push(cdrof(param)); } else if (opcode == sym_cons) { evaluate(carof(cdrof(s))); sexpression param1 = top(); evaluate(carof(cdrof(cdrof(s)))); sexpression param2 = top(); sexpression result = new_cons(param1, param2); pop(); pop(); pop_then_push(result); } else if (opcode == sym_setq) { sexpression var = carof(cdrof(s)); if (var.type != symbol_type || var.value <= last_predefined) { printf("ERROR: in SETQ, "); print_one_sexpression(var); printf(" is not a variable\n"); } else { evaluate(carof(cdrof(cdrof(s)))); sexpression result = pop(); symbol_list[var.value].value = result; } pop_then_push(var); } else if (opcode == sym_defun) { push(NIL); sexpression f = new_cons(make(symbol_type, sym_lambda), cdrof(cdrof(s))); pop_then_push(f); f = new_cons(f, NIL); pop_then_push(f); f = new_cons(carof(cdrof(s)), f); pop_then_push(f); f = new_cons(make(symbol_type, sym_setq), f); pop(); pop_then_push(f); evaluate(f); f = pop(); pop_then_push(f); } else if (opcode == sym_load) { evaluate(carof(cdrof(s))); sexpression name = pop(); FILE * f = open_file_from_symbol(name, ".lsp", "r"); if (f == NULL) pop_then_push(ERROR); else { FILE * old_source = input_source; input_source = f; while (true) { read_sexpression(); sexpression item = top(); if (item == ENDOFFILE) break; evaluate(item); pop(); pop(); } fclose(f); input_source = old_source; pop(); pop_then_push(name); } } else { printf("ERROR: don't know how to evaluate ("); print_one_sexpression(op); printf(" ...)\n"); pop_then_push(ERROR); } } else if (op.type == cons_type) { printf("ERROR: you haven't implemented user-defined functions\n"); pop_then_push(ERROR); } else { printf("ERROR: "); print_one_sexpression(op); printf(" is not a function\n"); pop_then_push(ERROR); } } } // ================================================================ // PART SEVEN // the read eval print loop void main() { input_source = stdin; init_memory(); init_symbols(); while (true) { printf("> "); read_sexpression(); printf(" "); print_sexpression(top()); evaluate(top()); printf(" => "); print_sexpression(top()); pop(); pop(); if (STACK != NIL) { printf("[stack not empty]\n"); print_sexpression(STACK); STACK = NIL; } if (AL != NIL) { printf("[AL not empty]\n"); print_sexpression(AL); AL = NIL; } printf("\n"); } }