Implement a suffix tree
While it took me awhile to implement, this exercise was definitely worth doing. I think there should be a more elegant way to construct the tree using maybe a stack, but I couldn't find it. All of this was part of a larger effort to search a string for a variety of patterns. The solution is to compile the string into a suffix tree and then search the suffix tree for each of the patterns. I'm glad I didn't gloss over this exercise.
This commit is contained in:
parent
c0268ed31a
commit
1088e4143d
1 changed files with 64 additions and 0 deletions
64
scratch/facebook/hard/suffix-tree.py
Normal file
64
scratch/facebook/hard/suffix-tree.py
Normal file
|
@ -0,0 +1,64 @@
|
|||
import random
|
||||
|
||||
def exists(pattern, tree):
|
||||
"""
|
||||
Return true if `pattern` exists in `tree`.
|
||||
"""
|
||||
if len(pattern) == 0:
|
||||
return True
|
||||
if len(pattern) == 1:
|
||||
for branch in tree:
|
||||
if branch[0] == pattern[0]:
|
||||
return True
|
||||
return False
|
||||
for branch in tree:
|
||||
if branch[0] == pattern[0]:
|
||||
return exists(pattern[1:], branch[1])
|
||||
return False
|
||||
|
||||
# Branch :: (Char, [Branch])
|
||||
# SuffixTree :: [Branch]
|
||||
|
||||
def suffix_tree(xs):
|
||||
"""
|
||||
Create a suffix tree from the input string, `xs`.
|
||||
"""
|
||||
root = []
|
||||
for i in range(len(xs)):
|
||||
curr = xs[i:]
|
||||
parent = root
|
||||
for c1 in curr:
|
||||
grafted = False
|
||||
for c2, children in parent:
|
||||
if c1 == c2:
|
||||
grafted = True
|
||||
parent = children
|
||||
if grafted:
|
||||
continue
|
||||
else:
|
||||
children = []
|
||||
child = (c1, children)
|
||||
parent.append(child)
|
||||
parent = children
|
||||
return root
|
||||
|
||||
|
||||
################################################################################
|
||||
# Tests
|
||||
################################################################################
|
||||
|
||||
x = random.choice(["burrito", "pizza", "guacamole"])
|
||||
tree = suffix_tree(x)
|
||||
for branch in tree:
|
||||
print(branch)
|
||||
|
||||
for _ in range(3):
|
||||
n = len(x)
|
||||
i, j = random.randint(0, n), random.randint(0, n)
|
||||
pattern = x[min(i, j):max(i, j)]
|
||||
print("Checking \"{}\" for \"{}\" ...".format(x, pattern))
|
||||
print("Result: {}".format(exists(pattern, tree)))
|
||||
pattern = random.choice(["foo", "bar", "baz"])
|
||||
print("Checking \"{}\" for \"{}\" ...".format(x, pattern))
|
||||
print("Result: {}".format(exists(pattern, tree)))
|
||||
print()
|
Loading…
Reference in a new issue