from copy import copy import pprint trace = False stepping = False dictionary = {} def read_dictionary(): global dictionary dictionary = {} num = 0 f = open("vocab.txt", "r") for line in f: num += 1 parts = line.split() if len(parts) == 0: continue if len(parts) < 2: print("vocab.txt line", num, "too short") continue word = parts[0] pos = parts[1] if word in dictionary: defns = dictionary[word] else: defns = {} dictionary[word] = defns if pos in defns: print("vocab.txt line", num, "duplicate entry for", word, "as", pos) continue defn = {} dictionary[word][pos] = defn for entry in parts[2:]: item = entry.split(":") if item[0] in defn: print("vocab.txt line", num, "duplicate entry for", item[0], "in", word, "as", pos) continue if len(item) == 1: vals = { "true" } else: vals = set(v for v in item[1].split(",")) defn[item[0]] = vals f.close() def show_dictionary(): global dictionary for word in sorted(dictionary): print(word, end = ":\n") defs = dictionary[word] for pos in defs: print(" ", pos, ":", sep = "", end = "") ents = defs[pos] for key in ents: print(" ", key, "=", "/".join(ents[key]), sep = "", end = "") print() def summarise_dictionary(): global dictionary words = sorted(dictionary) print(len(words), "different words") pos = set() for w in words: for p in dictionary[w]: pos.add(p) print(len(pos), "categories:") pos = sorted(pos) for p in pos: ents = {} for w in words: dw = dictionary[w] if p in dw: dwp = dw[p] for i in dwp: if i != "root": if i in ents: ent = ents[i] else: ent = set() ents[i] = ent for v in dwp[i]: ent.add(v) print(" ", p, ":", sep = "") for k in sorted(ents): print(" ", k, ": ", ", ".join(ents[k]), sep = "") grammar = {} start = None def read_grammar(): global grammar, start grammar = {} start = None num = 0 rules = None graph = None f = open("grammar.txt", "r") for line in f: num += 1 line = line.lstrip() if line == "": continue elif line.startswith("graph"): if graph != None: if "start" not in states: print("grammar.txt line graph", graph, "has no start state") raise Exception("bad") parts = line.split() if len(parts) != 2: print("grammar.txt line", num, "wrong format for graph introduction") raise Exception("bad") graph = parts[1] if parts[1] in grammar: print("grammar.txt line", num, "redefinition of graph", graph) raise Exception("bad") states = {} grammar[graph] = states state = None elif line.startswith("start"): parts = line.split() if len(parts) != 2: print("grammar.txt line", num, "wrong format for start definition") raise Exception("bad") start = parts[1] elif line.startswith("state"): if graph == None: print("grammar.txt line", num, "state definition outside any graph") raise Exception("bad") parts = line.split() if len(parts) != 2: print("grammar.txt line", num, "wrong format for state introduction") raise Exception("bad") state = parts[1] if state in states: print("grammar.txt line", num, "redefinition of state", state, "in graph", graph) raise Exception("bad") rules = [] states[state] = rules else: if state == None: print("grammar.txt line", num, "rule outside any state definition") raise Exception("bad") try: rule = eval(line) except: print("grammar.txt line", num, "badly formatted rule\n") raise if type(rule) != list or len(rule) != 3: print("grammar.txt line", num, "badly formatted rule type", type(rule), "length", len(rule)) raise Exception("bad") rules.append(rule) f.close() if start == None: print("grammar.txt no start graph") raise Exception("bad") def show_grammar(): global grammar for graph in sorted(grammar): print(graph, end = ":\n") rules = grammar[graph] for state in rules: print(" ", state, end = ":\n") rule = rules[state] for r in rule: print(" ", r[a_condition]) print(" ", r[a_disposition]) print(" ", r[a_action]) print() s_graph = 0 s_node = 1 s_inpos = 2 s_arcpos = 3 s_build = 4 s_notes = 5 s_lastcall = 6 a_condition = 0 a_disposition = 1 a_action = 2 typed = None stack = () used = None successes = 0 phrases = [ "the very big fat orange cat" ] def clean(s): r = bytearray() for chr in s: ascii = ord(chr) if ascii == 8: if len(r) > 0: del r[-1] else: r.append(ascii) return r.decode() def make_start_state(): global start, typed, phrases typed = clean(input(" <- ")).split() if len(typed) == 1: if typed[0].isdecimal(): n = int(typed[0]) if n > 0 and n <= len(phrases): typed = phrases[n - 1].split() print(typed) return [start, "start", 0, 0, [], {}, ()] def backtrack(): global stack if stack == (): print("\nbacktrack on empty stack\n") raise Exception("bad") stack = stack[1] if stack != () and stack[0][s_lastcall] == stack: stack[0][s_arcpos] += 1 def simple(x): if not x: return x elif type(x) in [str, int, float, bool]: return x elif type(x) == tuple: return tuple(minimal(i) for i in x) elif type(x) == list: return [minimal(i) for i in x] elif type(x) == dict: return [",".join(k + ":" + minimal(x[k]) for k in x)] else: return type(x).__name__ def minimal(x): if not x: return x elif type(x) in [str, int, float, bool]: return x elif type(x) == tuple or type(x) == list or type(x) == dict: return ",".join(almost_nothing(i) for i in x) else: return type(x).__name__ def almost_nothing(x): if not x or type(x) in [str, int, float, bool]: return str(x) else: return type(x).__name__ def print_stack_item(t): print(" gr", t[s_graph], "nd", t[s_node], "in", t[s_inpos], "ar", t[s_arcpos], "bu", simple(t[s_build]), "nt", simple(t[s_notes]), "lc", id(t[s_lastcall])) def print_stack(max = 999999): global stack if stack == (): print(" stack empty") return p = stack i = 0 while p != () and i <= max: print(" stack ", "top" if i == 0 else str(i).rjust(3), "id", id(p), end = "") print_stack_item(p[0]) p = p[1] i += 1 catword = None def parse(): global typed, grammar, stack, trace, used, successes, catword stack = (make_start_state(), ()) state = None steps = 0 successes = 0 while True: if stack == (): print() if successes == 0: print("finished, meaningless: no successful parses") elif successes == 1: print("finished, unambiguous: one successful parse") else: print("finished, ambiguous:", successes, "successful parses") return state = stack[0] graphname, nodename, inpos, arcpos, build, notes, lastcall = state word = typed[inpos] if inpos < len(typed) else "end" graph = grammar[graphname] arcs = graph[nodename] steps += 1 if trace: print("\n\nstep ", steps, ", word ", word, sep = "") print_stack() if stepping: input("press enter to continue: ") used = None newstate = None catword = None if arcpos >= len(arcs): if trace: print(" no more arcs: backtrack") backtrack() continue arc = arcs[arcpos] cond = arc[a_condition] if trace: print("checking arc", arcpos, "condition", cond) p = evaluate(cond, state) if p == False: state[s_arcpos] += 1 continue elif p != True: print("\nnon-boolean result", p, "in condition"); raise Exception("bad") disp = arc[a_disposition] if disp == (): newstate = state.copy() perform(arc[a_action], newstate) elif disp == "consume": newstate = state.copy() newstate[s_inpos] += 1 if newstate[s_inpos] > len(typed): print("\nattempt to consume input when none left"); raise Exception("bad") perform(arc[a_action], newstate) elif disp[0] == "sub": newstate = ([disp[1], "start", state[s_inpos], 0, [], {}, ()]) stack = (newstate, stack) newstate[s_lastcall] = stack state[s_arcpos] += 1 upnotes = None def evaluate(condition, state): global typed, grammar, dictionary, stack, trace, used, upnotes, catword printres = True r = False if state[s_inpos] >= len(typed): word = "" entries = {} else: word = typed[state[s_inpos]] if word not in dictionary: print("\nunknown word", word) raise Exception("bad") entries = dictionary[word] if trace: print(" check", condition, end = ": ") lenc = len(condition) if type(condition) == str: if condition == "always": r = True elif condition == "end": r = word == "" elif condition == "more": r = state[s_inpos] + 1< len(typed) else: r = condition elif type(condition) == tuple and lenc > 0: op = condition[0] if op == "cat": for pos in condition[1:]: r = pos in entries if r: catword = (pos, word) used = (pos, word, None) break elif op == "word": for i in condition[1:]: r = condition[i] == word if r: used = ("word", word, None) break elif op == "noted" and lenc == 2: r = condition[1] in state[s_notes] and state[s_notes][condition[1]] != False elif op == "noted" and lenc > 2: for i in condition[2:]: r = condition[1] in state[s_notes] and state[s_notes][condition[1]] == evaluate[i] if r: break elif op == "value" and lenc == 2: if condition[1] in state[s_notes]: return state[s_notes][condition[1]] else: return False elif op == "=" and lenc == 3: a = evaluate(condition[1], state) b = evaluate(condition[2], state) if type(a) == set: if type(b) == set: return len(a & b) != 0 else: return b in a elif type(b) == set: return a in b else: return a == b elif op == "has" and lenc >= 2: if catword == None: print("\nuse of has without prior successful cat") raise Exception("bad") entry = dictionary[catword[1][catword[0]]] if lenc == 2: r = condition[1] in entry and entry[condition[1]] != False elif lenc > 2: if condition[1] in entry: vals = entry[condition[1]] for i in condition[2:]: r = i in vals if r: break elif op == "tag" and lenc == 2: if catword == None: print("\nuse of has without prior successful cat") raise Exception("bad") entry = dictionary[catword[1]][catword[0]] if condition[1] not in entry: print("XXXX tag got nothing") return set() else: print("XXXX tag got", entry[condition[1]]) return entry[condition[1]] elif op == "not" and lenc == 2: r = evaluate(condition[1], state) if type(r) != bool: print("\nbad operand for not:", r) raise Exception("bad") r = not r elif op == "or": printres = False if trace: print() for part in condition[1:]: r = evaluate(part, state) if r == True: break elif r != False: print("\nbad operand for and:", r) raise Exception("bad") else: print("\nbad condition", condition) raise Exception("bad") elif type(condition) == list: printres = False if trace: print() for part in condition: r = evaluate(part, state) if r == False: break elif r != True: print("\nbad operand for implicit and:", r) raise Exception("bad") else: print("\nbad condition", condition) raise Exception("bad") if trace and printres: print(r) return r def print_item(item, ind = 0): if len(item) == 2: print(" " * ind, item[0]) print_parse(item[1], ind + 1) else: print(" " * ind, item[0], '"' + item[1] + '"', item[2:]) def print_parse(p, ind = 0): for item in p: print_item(item, ind) newbuild = None def perform(action, state, depth = 0): global typed, grammar, dictionary, stack, trace, used, newbuild, successes, upnotes if depth == 0: newbuild = None word = typed[state[s_inpos]] if state[s_inpos] < len(typed) else "" if action == "accept": action = ("accept", "asis") lena = len(action) if type(action) == list: for a in action: perform(a, state, depth + 1) elif type(action) == str: if trace: print(" action", action) if action == "build": if not newbuild: newbuild = state[s_build].copy() newbuild.append((used[0], used[1], dictionary[used[1]][used[0]])) state[s_build] = newbuild elif action == "receive": if not newbuild: newbuild = state[s_build].copy() newbuild.append((used[0], used[1])) state[s_build] = newbuild elif action == "stay": state[s_arcpos] = 0 stack = (state, stack) else: print("bad action", action, "in graph", state[s_graph]) elif type(action) == tuple: if trace: print(" action", action) op = action[0] if op == "accept" and lena == 2: detail = action[1] build = state[s_build] if len(build) <= 1 and detail == "nosolo": result = (build[0][0], build[0][1]) elif detail == "asis": result = (state[s_graph], build) else: result = (evaluate(detail, state), build) if trace: print("subgraph success for", state[s_graph]) if state[s_graph] == start: successes += 1 if trace: print("\n") print() print("***************************************************************************") print_parse([result]) if trace: print("***************************************************************************\n") backtrack() else: if trace: print_parse(result) lastcall = state[s_lastcall] oldstate = lastcall[1][0].copy() oldgraph = grammar[oldstate[s_graph]] oldnode = oldgraph[oldstate[s_node]] arc = oldnode[oldstate[s_arcpos] - 1] action = arc[a_action] oldstate[s_inpos] = state[s_inpos] used = result upnotes = state[s_notes] perform(action, oldstate) elif op == "to" and lena == 2: state[s_node] = action[1] state[s_arcpos] = 0 stack = (state, stack) elif op == "jump" and lena == 2: state[s_node] = action[1] state[s_arcpos] = 0 stack = (state, stack[1]) elif op == "note" and lena == 2: state[s_notes][action[1]] = True elif op == "note" and lena == 3: value = evaluate(action[2], state) state[s_notes][action[1]] = value elif op == "lift" and lena == 2: if type(upnotes) == dict and action[1] in upnotes: print("XXXX lift", action[1], "gets", upnotes[action[1]]) state[s_notes][action[1]] = upnotes[action[1]] else: print("XXXX lift", action[1], "gets nothing") else: print("bad action", op, "in graph", state[s_graph]) else: print("bad action", action, "in graph", state[s_graph]) read_dictionary() read_grammar() parse()