tvl-depot/scratch/facebook/hard/suffix-tree.py
William Carroll fa717e8a6f Re-implement suffix_tree function
Create a suffix tree from an input string. This implementation uses a stack to
control the flow of the program.

I expected this attempt to be easier than my first attempt, but surprisingly, it
was similarly difficult. It took me ~30-45 minutes to successfully implement
this function, and I'm still not pleased with the final result.
2020-11-19 21:12:36 +00:00

93 lines
2.5 KiB
Python

import random
from collections import deque
def exists(pattern, tree):
"""
Return true if `pattern` exists in `tree`.
"""
if len(pattern) == 0:
return True
if len(pattern) == 1:
for branch in tree:
if branch[0] == pattern[0]:
return True
return False
for branch in tree:
if branch[0] == pattern[0]:
return exists(pattern[1:], branch[1])
return False
# Branch :: (Char, [Branch])
# SuffixTree :: [Branch]
def suffix_tree(xs):
"""
Create a suffix tree from the input string, `xs`.
"""
root = []
for i in range(len(xs)):
curr = xs[i:]
parent = root
for c1 in curr:
grafted = False
for c2, children in parent:
if c1 == c2:
grafted = True
parent = children
if grafted:
continue
else:
children = []
child = (c1, children)
parent.append(child)
parent = children
return root
def suffix_tree(x):
"""
Creates a suffix from the input string, `x`. This implementation uses a
stack.
"""
result = [None, []]
q = deque()
for i in range(len(x)):
q.append((result, x[i:]))
while q:
parent, x = q.popleft()
s = []
s.append((parent, x))
while s:
parent, x = s.pop()
if not x:
continue
c, rest = x[0], x[1:]
grafted = False
for child in parent[1]:
if c == child[0]:
s.append((child, rest))
grafted = True
if not grafted:
child = [c, []]
parent[1].append(child)
s.append((child, rest))
return result[1]
################################################################################
# Tests
################################################################################
x = random.choice(["burrito", "pizza", "guacamole"])
tree = suffix_tree(x)
for branch in tree:
print(branch)
for _ in range(3):
n = len(x)
i, j = random.randint(0, n), random.randint(0, n)
pattern = x[min(i, j):max(i, j)]
print("Checking \"{}\" for \"{}\" ...".format(x, pattern))
print("Result: {}".format(exists(pattern, tree)))
pattern = random.choice(["foo", "bar", "baz"])
print("Checking \"{}\" for \"{}\" ...".format(x, pattern))
print("Result: {}".format(exists(pattern, tree)))
print()