-
Notifications
You must be signed in to change notification settings - Fork 1
/
trie.py
95 lines (86 loc) · 3.38 KB
/
trie.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# Predict words based on prefixes
# Source : Shubhadeep Roychowdhury
# https://towardsdatascience.com/implementing-a-trie-data-structure-in-python-in-less-than-100-lines-of-code-a877ea23c1a1
from typing import Tuple
trie_words = []
class TrieNode(object):
"""
Our trie node implementation. Very basic. but does the job
"""
def __init__(self, char: str):
self.char = char
self.children = []
# Is it the last character of the word.`
self.word_finished = False
# How many times this character appeared in the addition process
self.counter = 1
def add(root, word: str):
"""
Adding a word in the trie structure
"""
node = root
for char in word:
found_in_child = False
# Search for the character in the children of the present `node`
for child in node.children:
if child.char == char:
# We found it, increase the counter by 1 to keep track that another
# word has it as well
child.counter += 1
# And point the node to the child that contains this char
node = child
found_in_child = True
break
# We did not find it so add a new child
if not found_in_child:
new_node = TrieNode(char)
node.children.append(new_node)
# And then point node to the new child
node = new_node
# Everything finished. Mark it as the end of a word.
node.word_finished = True
def find_prefix(root, prefix: str) -> Tuple[bool, int]:
"""
Check and return
1. If the prefix exists in any of the words we added so far
2. If yes then how may words actually have the prefix
"""
global trie_words
trie_words = []
node = root
# If the root node has no children, then return False.
# Because it means we are trying to search in an empty trie
if not root.children:
return False, 0
for idx, char in enumerate(prefix):
char_not_found = True
# Search through all the children of the present `node`
for child in node.children:
if child.char == char:
# We found the char existing in the child.
char_not_found = False
# Assign node as the child containing the char and break
node = child
break
# Have we reached end of a word while our prefix string is not exhausted?
# Meaning, let's say, we have 'hammer' as a word in trie and the prefix
# we are searching for is 'hammeres'.
if node.word_finished and idx < (len(prefix) - 1) and not node.children:
# If so, return false. We are trying to search for a prefix longer than
# the actual closest match.
return []
# Return False anyway when we did not find a char.
if char_not_found:
return []
# Well, we are here means we have found the prefix. Return true to indicate that
# And also the counter of the last node. This indicates how many words have this
# prefix
all_words_helper(prefix, node)
return trie_words
def all_words_helper(prefix, node):
global trie_words
for child in node.children:
new_string = prefix + child.char
if child.word_finished:
trie_words.append(new_string)
all_words_helper(new_string, child)