Implement a suffix tree

While it took me awhile to implement, this exercise was definitely worth
doing. I think there should be a more elegant way to construct the tree using
maybe a stack, but I couldn't find it.

All of this was part of a larger effort to search a string for a variety of
patterns. The solution is to compile the string into a suffix tree and then
search the suffix tree for each of the patterns.

I'm glad I didn't gloss over this exercise.
This commit is contained in:
William Carroll 2020-11-19 00:35:23 +00:00
parent c0268ed31a
commit 1088e4143d

View file

@ -0,0 +1,64 @@
import random
def exists(pattern, tree):
"""
Return true if `pattern` exists in `tree`.
"""
if len(pattern) == 0:
return True
if len(pattern) == 1:
for branch in tree:
if branch[0] == pattern[0]:
return True
return False
for branch in tree:
if branch[0] == pattern[0]:
return exists(pattern[1:], branch[1])
return False
# Branch :: (Char, [Branch])
# SuffixTree :: [Branch]
def suffix_tree(xs):
"""
Create a suffix tree from the input string, `xs`.
"""
root = []
for i in range(len(xs)):
curr = xs[i:]
parent = root
for c1 in curr:
grafted = False
for c2, children in parent:
if c1 == c2:
grafted = True
parent = children
if grafted:
continue
else:
children = []
child = (c1, children)
parent.append(child)
parent = children
return root
################################################################################
# Tests
################################################################################
x = random.choice(["burrito", "pizza", "guacamole"])
tree = suffix_tree(x)
for branch in tree:
print(branch)
for _ in range(3):
n = len(x)
i, j = random.randint(0, n), random.randint(0, n)
pattern = x[min(i, j):max(i, j)]
print("Checking \"{}\" for \"{}\" ...".format(x, pattern))
print("Result: {}".format(exists(pattern, tree)))
pattern = random.choice(["foo", "bar", "baz"])
print("Checking \"{}\" for \"{}\" ...".format(x, pattern))
print("Result: {}".format(exists(pattern, tree)))
print()