import random freqs = [ [ 0 for i in range(0, 29) ] for j in range(0, 29) ] numbers = list(range(0, 29)) lenfrs = [ 0 ] def read_files(): global freqs, lenfrs fi = open("D:\\books\\project-gutenberg\\whole-thing\\text-files\\approximations\\268.txt", "r") for line in fi: for word in line.split(): chars = clean(word) length = len(chars) if length < 1: continue if len(lenfrs) <= length: lenfrs += [ 0 ] * (length - len(lenfrs) + 1) lenfrs[length] += 1 prev = 0 for c in chars: freqs[prev][c] += 1 prev = c fi.close() for i in range(0, len(freqs)): for j in range(0, len(freqs[i])): freqs[i][j] *= freqs[i][j] def clean(s): r = [] for c in s: if c >= 'a' and c <= 'z': r.append(ord(c) - ord('a') + 1) elif c >= 'A' and c <= 'Z': r.append(ord(c) - ord('Z') + 1) elif c == '\'': r.append(27) elif c == '-' and len(r) > 1: return r return r decoder = "<abcdefghijklmnopqrstuvwxyz'>" def create(n = 1): global freqs, decoder, numbers lennums = list(range(0, len(lenfrs))) r = [] for i in range(0, n): word = "" prev = 0 length = random.choices(lennums, lenfrs)[0] for i in range(0, length): ch = random.choices(numbers[1:], freqs[prev][1:])[0] word += decoder[ch] prev = ch r.append(word) return r read_files() x = create(25) for w in x: print(w)