import random

freqs = [ [ 0 for i in range(0, 29) ] for j in range(0, 29) ]
numbers = list(range(0, 29))
lenfrs = [ 0 ]

def read_files():
  global freqs, lenfrs
  fi = open("D:\\books\\project-gutenberg\\whole-thing\\text-files\\approximations\\268.txt", "r")
  for line in fi:
    for word in line.split():
      chars = clean(word)
      length = len(chars)
      if length < 1:
        continue
      if len(lenfrs) <= length:
        lenfrs += [ 0 ] * (length - len(lenfrs) + 1)
      lenfrs[length] += 1
      prev = 0
      for c in chars:
        freqs[prev][c] += 1
        prev = c
  fi.close()
  for i in range(0, len(freqs)):
    for j in range(0, len(freqs[i])):
      freqs[i][j] *= freqs[i][j]

def clean(s):
  r = []
  for c in s:
    if c >= 'a' and c <= 'z':
      r.append(ord(c) - ord('a') + 1)
    elif c >= 'A' and c <= 'Z':
      r.append(ord(c) - ord('Z') + 1)
    elif c == '\'':
      r.append(27)
    elif c == '-' and len(r) > 1:
      return r
  return r

decoder = "<abcdefghijklmnopqrstuvwxyz'>"

def create(n = 1):
  global freqs, decoder, numbers
  lennums = list(range(0, len(lenfrs)))
  r = []
  for i in range(0, n):
    word = ""
    prev = 0
    length = random.choices(lennums, lenfrs)[0]
    for i in range(0, length):
      ch = random.choices(numbers[1:], freqs[prev][1:])[0]
      word += decoder[ch]
      prev = ch
    r.append(word)
  return r

read_files()
x = create(25)
for w in x:
  print(w)