# GENERAL-PURPOSE WORD FILTERING AND PROCESSING alphabet = "abcdefghijklmnopqrstuvwxyz" dictionary_path = "/usr/share/dict/cracklib-small" # dictionary_path = "/usr/share/dict/words_alpha.txt" with open(dictionary_path) as f: content = f.readlines() dictionary = ([str(x[:-1]) for x in content]) # remove newlines def longest_path_from_adjacency(neighbors) : # e.g. word ladders initials = [(a,b) in neighbors] # if not 0 ==len([c for (_,c) in neighbors if a == c])] longest = [] agenda = [list(x)[::-1] for x in initials] while agenda : path = agenda.pop(0) if not longest or len(path) >= len(longest[0]) : if len(path) == len(longest[0]) : longest = [path] else : longest += [path] print(path) new_paths = [[b] + path for (a,b) in neighbors if path[0] == a and b not in path] agenda = new_paths + agenda return longest # def all_paths_from_adj(compare_before, dictionary=dictionary) : # neighbors = {} # key : neighbors of key # longest_paths = {} # key : longest path starting from key # def combine_paths(paths_at_head, paths_at_tail, path) : # new_paths = [x + path for x in paths_at tail] # agenda = [[x] for x in dictionary] # while agenda : # path = agenda.pop(0) # head = path[-1] # tail = path[0] # if longest_paths.get(tail) is not None : # # We have explored longest paths here already # longest_paths[head] = combine_paths(longest_paths.get(head),longest_paths.get(tail), path) def adj_mutate_one(x,y) : """Two words are similar if they have the same length and mutating one letter changes one word into the other""" if len(x) != len(y) : return False similarities = sum([int(a==b) for (a,b) in zip(x,y) if a == b]) if similarities == len(x)-1 : return True return False def adj_mutate_one_consecutive(x,y) : """Two words are similar if they have the same length, and incrementing one letter of x changes it into y.""" if len(x) != len(y) : return False diffs = [(a,b) for (a,b) in zip(x,y) if a != b] if len(diffs) != 1 : return False if all([chr(1+ord(a)) == b for (a,b) in diffs]) : return True return False dictionary_a = set([w for w in dictionary if all([x in alphabet for x in w])]) # for x in dictionary_a : # for y in [w for w in dictionary_a if len(w) == len(x)] : # if adj_mutate_one_consecutive(x,y) : # print(x,y) def increment_at(index, w) : return "".join([chr(1+ord(a)) if i == index else a for (a,i) in zip(w, range(len(w)))]) if False: longest_word = max([w for w in dictionary_a], key=len) longest_chain = [None] for position in range(1,len(longest_word))[::-1] : for x in dictionary_a : if position + 1 >= len(x) : continue chain = [x] while increment_at(position, x) in dictionary_a : x = increment_at(position, x) chain += [x] if len(chain) >= len(longest_chain) : longest_chain = chain print(chain) print(increment_at(2, "porter")) # FIND ALL 12-letter words made of 12 unique letters. words = filter(lambda x: len(x)==12 and len(set(x))==12, dictionary) print(list(words))