Skip to content

Commit 5c20061

Browse files
committed
feat(data-structures, trie, stream-checker): adds a stream checker data
structure for suffice matches The stream checker data structure leverages the use of the Trie data structure to find suffixes that match words that it was initialized with. Since the trie is a Prefix Tree essentially matching on prefixes, this required a reverse of the Trie to instead match on suffixes. Note that not change to the Trie node is changed other than the initialization of using char in the constructor. BREAKING CHANGE The Trie data structure does not handle the search correctly anymore and will need to be refactored to cater for the changes that have been introduced.
1 parent 37b2941 commit 5c20061

File tree

10 files changed

+224
-63
lines changed

10 files changed

+224
-63
lines changed

datastructures/streams/__init__.py

Whitespace-only changes.
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Stream of Characters
2+
3+
Design a data structure that processes a stream of characters and, after each character is received, determines if a
4+
suffix of these characters is a string in a given array of strings words.
5+
6+
For example, if words = ["dog"] and the stream adds the characters ‘d’, ‘c’, ‘a’ , and ‘t’ in sequence, the algorithm
7+
should detect that the suffix "cat" of the stream "dcat" matches the word "cat" from the list.
8+
9+
So, for words, the goal is to detect if any of these words appear as a suffix of the stream built so far. To accomplish
10+
this, implement a class StreamChecker:
11+
12+
- **Constructor**: Initializes the object with the list of target words.
13+
- **boolean query(char letter)**: Appends a character to the stream and returns TRUE if any suffix of the stream matches
14+
a word in the list words.
15+
16+
Constraints:
17+
18+
- 1 ≤ words.length ≤ 1000
19+
- 1 ≤ words[i].length ≤ 200
20+
- words[i] consists of lowercase English letters.
21+
- letter is a lowercase English letter.
22+
- At most 4 * 10^2 calls will be made to query.
23+
24+
Examples:
25+
26+
![Example 1](./images/examples/stream_checker_example_1.png)
27+
![Example 2](./images/examples/stream_checker_example_2.png)
28+
![Example 3](./images/examples/stream_checker_example_3.png)
29+
30+
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
from typing import Deque, List
2+
from collections import deque
3+
from datastructures.trees.trie import TrieNode
4+
5+
6+
class StreamChecker(object):
7+
8+
def __init__(self, words: List[str]):
9+
"""
10+
Initializes a StreamChecker instance.
11+
12+
Constructor Time: O(Ltotal), where Ltotal is the sum of the lengths of all words. This is a one-time cost.
13+
14+
Parameters:
15+
words (List[str]): List of words to be checked in the stream.
16+
17+
Returns:
18+
instance of streamchecker
19+
"""
20+
self.words = words
21+
self.trie = TrieNode()
22+
self.max_len = 0
23+
self.__build_trie()
24+
# deque(maxlen) is key for stream history optimization
25+
self.stream: Deque[str] = deque(maxlen=self.max_len)
26+
27+
def __build_trie(self):
28+
# insert the words in reverse order into the trie
29+
for word in self.words[::-1]:
30+
# 1. track max length for deque optimization
31+
if len(word) > self.max_len:
32+
self.max_len = len(word)
33+
34+
current = self.trie
35+
# 2. insert characters in reverse order
36+
for letter in word[::-1]:
37+
current = current.children[letter]
38+
39+
# 3. Mark the end of the reversed word
40+
current.is_end = True
41+
42+
def query(self, letter: str) -> bool:
43+
"""
44+
Query Time: O(L), where L is the length of the stream. This is because we only traverse the trie up to the
45+
length of the stream.
46+
47+
Query Time: O(Lmax), where Lmax is the length of the longest word (up to 200). Since this is a constant limit,
48+
we can treat this as O(1) amortized time per query.
49+
50+
Parameters:
51+
letter (str): The next letter in the stream.
52+
53+
Returns:
54+
bool: True if the letter is the end of a word, False otherwise.
55+
"""
56+
self.stream.append(letter)
57+
current = self.trie
58+
59+
# Iterate stream in reverse (newest character first)
60+
for character in reversed(self.stream):
61+
# Check for dead end (critical for query logic)
62+
if character not in current.children:
63+
return False
64+
65+
# Traverse to the next node
66+
current = current.children[character]
67+
68+
# check for match(success condition)
69+
if current.is_end:
70+
return True
71+
72+
# If loop finishes without a match
73+
return False
50.5 KB
Loading
45.8 KB
Loading
39.6 KB
Loading
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import unittest
2+
from . import StreamChecker
3+
4+
5+
class StreamCheckerTestCase(unittest.TestCase):
6+
def test_1(self):
7+
words = ["go", "hi"]
8+
stream = StreamChecker(words)
9+
self.assertFalse(stream.query("h"))
10+
self.assertTrue(stream.query("i"))
11+
self.assertFalse(stream.query("g"))
12+
self.assertTrue(stream.query("o"))
13+
self.assertFalse(stream.query("x"))
14+
self.assertFalse(stream.query("y"))
15+
16+
def test_2(self):
17+
words = ["no", "yes"]
18+
stream = StreamChecker(words)
19+
self.assertFalse(stream.query("y"))
20+
self.assertFalse(stream.query("e"))
21+
self.assertTrue(stream.query("s"))
22+
self.assertFalse(stream.query("n"))
23+
self.assertTrue(stream.query("o"))
24+
25+
def test_3(self):
26+
words = ["a", "aa"]
27+
stream = StreamChecker(words)
28+
self.assertTrue(stream.query("a"))
29+
self.assertTrue(stream.query("a"))
30+
self.assertTrue(stream.query("a"))
31+
self.assertFalse(stream.query("b"))
32+
33+
34+
if __name__ == '__main__':
35+
unittest.main()
Lines changed: 6 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,65 +1,8 @@
1-
from collections import defaultdict
2-
from typing import List
1+
from datastructures.trees.trie.trie_node import TrieNode
2+
from datastructures.trees.trie.trie import Trie
33

44

5-
class TrieNode:
6-
def __init__(self, char: str):
7-
self.char = char
8-
self.children = defaultdict(TrieNode)
9-
self.is_end = False
10-
11-
12-
class Trie:
13-
def __init__(self):
14-
self.root = TrieNode("")
15-
16-
def insert(self, word: str) -> None:
17-
curr = self.root
18-
19-
for char in word:
20-
if char in curr.children:
21-
curr = curr.children[char]
22-
23-
else:
24-
new_node = TrieNode(char)
25-
curr.children[char] = new_node
26-
curr = new_node
27-
28-
curr.is_end = True
29-
30-
def search(self, word: str) -> List[str]:
31-
curr = self.root
32-
33-
if len(word) == 0:
34-
return []
35-
36-
for char in word:
37-
if char in curr.children:
38-
curr = curr.children[char]
39-
else:
40-
return []
41-
42-
output = []
43-
44-
def dfs(node: TrieNode, prefix: str) -> None:
45-
if node.is_end:
46-
output.append((prefix + node.char))
47-
48-
for child in node.children.values():
49-
dfs(child, prefix + node.char)
50-
51-
dfs(curr, word[:-1])
52-
return output
53-
54-
def starts_with(self, prefix: str) -> bool:
55-
"""
56-
Returns true if the given prefix is a prefix of any word in the trie.
57-
"""
58-
curr = self.root
59-
60-
for char in prefix:
61-
if char not in curr.children:
62-
return False
63-
curr = curr.children[char]
64-
65-
return True
5+
__all__ = [
6+
"Trie",
7+
"TrieNode"
8+
]

datastructures/trees/trie/trie.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
from typing import List
2+
from datastructures.trees.trie.trie_node import TrieNode
3+
4+
5+
class Trie:
6+
def __init__(self):
7+
self.root = TrieNode()
8+
9+
def insert(self, word: str) -> None:
10+
curr = self.root
11+
12+
for char in word:
13+
if char in curr.children:
14+
curr = curr.children[char]
15+
else:
16+
new_node = TrieNode()
17+
curr.children[char] = new_node
18+
curr = new_node
19+
20+
curr.is_end = True
21+
22+
def search(self, word: str) -> List[str]:
23+
curr = self.root
24+
25+
if len(word) == 0:
26+
return []
27+
28+
for char in word:
29+
if char in curr.children:
30+
curr = curr.children[char]
31+
else:
32+
return []
33+
34+
output = []
35+
36+
def dfs(node: TrieNode, prefix: str) -> None:
37+
if node.is_end:
38+
output.append((prefix + node.char))
39+
40+
for child in node.children.values():
41+
dfs(child, prefix + node.char)
42+
43+
dfs(curr, word[:-1])
44+
return output
45+
46+
def starts_with(self, prefix: str) -> bool:
47+
"""
48+
Returns true if the given prefix is a prefix of any word in the trie.
49+
"""
50+
curr = self.root
51+
52+
for char in prefix:
53+
if char not in curr.children:
54+
return False
55+
curr = curr.children[char]
56+
57+
return True
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
from typing import DefaultDict
2+
from collections import defaultdict
3+
4+
5+
class TrieNode:
6+
def __init__(self):
7+
# self.char = char
8+
"""
9+
Initializes a TrieNode instance.
10+
11+
A TrieNode contains a character and a dictionary of its children. It also contains a boolean indicating whether the node is the end of a word in the Trie.
12+
13+
Parameters:
14+
None
15+
16+
Returns:
17+
None
18+
"""
19+
self.children: DefaultDict[str, TrieNode] = defaultdict(TrieNode)
20+
self.is_end = False
21+
22+
def __repr__(self):
23+
return f"TrieNode({self.children.items()}, {self.is_end})"

0 commit comments

Comments
 (0)