Skip to content

Commit 484b865

Browse files
authored
Merge pull request #107 from BrianLusina/feat/stream-checker
feat(data structures): stream checker and trie node
2 parents 37b2941 + 4993a24 commit 484b865

File tree

11 files changed

+230
-63
lines changed

11 files changed

+230
-63
lines changed

DIRECTORY.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,9 @@
265265
* Minstack
266266
* [Test Min Stack](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/stacks/minstack/test_min_stack.py)
267267
* [Test Stacks](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/stacks/test_stacks.py)
268+
* Streams
269+
* Stream Checker
270+
* [Test Stream Checker](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/streams/stream_checker/test_stream_checker.py)
268271
* Timemap
269272
* [Test Timemap](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/timemap/test_timemap.py)
270273
* Trees
@@ -303,6 +306,9 @@
303306
* Ternary
304307
* [Node](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/trees/ternary/node.py)
305308
* [Test Ternary Tree Paths](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/trees/ternary/test_ternary_tree_paths.py)
309+
* Trie
310+
* [Trie](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/trees/trie/trie.py)
311+
* [Trie Node](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/trees/trie/trie_node.py)
306312
* Tuples
307313
* [Named Tuples](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/tuples/named_tuples.py)
308314

datastructures/streams/__init__.py

Whitespace-only changes.
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# Stream of Characters
2+
3+
Design a data structure that processes a stream of characters and, after each character is received, determines if a
4+
suffix of these characters is a string in a given array of strings words.
5+
6+
For example, if words = ["dog"] and the stream adds the characters ‘d’, ‘c’, ‘a’ , and ‘t’ in sequence, the algorithm
7+
should detect that the suffix "cat" of the stream "dcat" matches the word "cat" from the list.
8+
9+
So, for words, the goal is to detect if any of these words appear as a suffix of the stream built so far. To accomplish
10+
this, implement a class StreamChecker:
11+
12+
- **Constructor**: Initializes the object with the list of target words.
13+
- **boolean query(char letter)**: Appends a character to the stream and returns TRUE if any suffix of the stream matches
14+
a word in the list words.
15+
16+
Constraints:
17+
18+
- 1 ≤ words.length ≤ 1000
19+
- 1 ≤ words[i].length ≤ 200
20+
- words[i] consists of lowercase English letters.
21+
- letter is a lowercase English letter.
22+
- At most 4 * 10^2 calls will be made to query.
23+
24+
Examples:
25+
26+
![Example 1](./images/examples/stream_checker_example_1.png)
27+
![Example 2](./images/examples/stream_checker_example_2.png)
28+
![Example 3](./images/examples/stream_checker_example_3.png)
29+
30+
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
from typing import Deque, List
2+
from collections import deque
3+
from datastructures.trees.trie import TrieNode
4+
5+
6+
class StreamChecker(object):
7+
8+
def __init__(self, words: List[str]):
9+
"""
10+
Initializes a StreamChecker instance.
11+
12+
Constructor Time: O(Ltotal), where Ltotal is the sum of the lengths of all words. This is a one-time cost.
13+
14+
Parameters:
15+
words (List[str]): List of words to be checked in the stream.
16+
17+
Returns:
18+
instance of streamchecker
19+
"""
20+
self.words = words
21+
self.trie = TrieNode()
22+
self.max_len = 0
23+
self.__build_trie()
24+
# deque(maxlen) is key for stream history optimization
25+
self.stream: Deque[str] = deque(maxlen=self.max_len)
26+
27+
def __build_trie(self):
28+
# insert the words in reverse order into the trie
29+
for word in self.words[::-1]:
30+
# 1. track max length for deque optimization
31+
if len(word) > self.max_len:
32+
self.max_len = len(word)
33+
34+
current = self.trie
35+
# 2. insert characters in reverse order
36+
for letter in word[::-1]:
37+
current = current.children[letter]
38+
39+
# 3. Mark the end of the reversed word
40+
current.is_end = True
41+
42+
def query(self, letter: str) -> bool:
43+
"""
44+
Query Time: O(L), where L is the length of the stream. This is because we only traverse the trie up to the
45+
length of the stream.
46+
47+
Query Time: O(Lmax), where Lmax is the length of the longest word (up to 200). Since this is a constant limit,
48+
we can treat this as O(1) amortized time per query.
49+
50+
Parameters:
51+
letter (str): The next letter in the stream.
52+
53+
Returns:
54+
bool: True if the letter is the end of a word, False otherwise.
55+
"""
56+
self.stream.append(letter)
57+
current = self.trie
58+
59+
# Iterate stream in reverse (newest character first)
60+
for character in reversed(self.stream):
61+
# Check for dead end (critical for query logic)
62+
if character not in current.children:
63+
return False
64+
65+
# Traverse to the next node
66+
current = current.children[character]
67+
68+
# check for match(success condition)
69+
if current.is_end:
70+
return True
71+
72+
# If loop finishes without a match
73+
return False
50.5 KB
Loading
45.8 KB
Loading
39.6 KB
Loading
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import unittest
2+
from . import StreamChecker
3+
4+
5+
class StreamCheckerTestCase(unittest.TestCase):
6+
def test_1(self):
7+
words = ["go", "hi"]
8+
stream = StreamChecker(words)
9+
self.assertFalse(stream.query("h"))
10+
self.assertTrue(stream.query("i"))
11+
self.assertFalse(stream.query("g"))
12+
self.assertTrue(stream.query("o"))
13+
self.assertFalse(stream.query("x"))
14+
self.assertFalse(stream.query("y"))
15+
16+
def test_2(self):
17+
words = ["no", "yes"]
18+
stream = StreamChecker(words)
19+
self.assertFalse(stream.query("y"))
20+
self.assertFalse(stream.query("e"))
21+
self.assertTrue(stream.query("s"))
22+
self.assertFalse(stream.query("n"))
23+
self.assertTrue(stream.query("o"))
24+
25+
def test_3(self):
26+
words = ["a", "aa"]
27+
stream = StreamChecker(words)
28+
self.assertTrue(stream.query("a"))
29+
self.assertTrue(stream.query("a"))
30+
self.assertTrue(stream.query("a"))
31+
self.assertFalse(stream.query("b"))
32+
33+
34+
if __name__ == '__main__':
35+
unittest.main()
Lines changed: 6 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,65 +1,8 @@
1-
from collections import defaultdict
2-
from typing import List
1+
from datastructures.trees.trie.trie_node import TrieNode
2+
from datastructures.trees.trie.trie import Trie
33

44

5-
class TrieNode:
6-
def __init__(self, char: str):
7-
self.char = char
8-
self.children = defaultdict(TrieNode)
9-
self.is_end = False
10-
11-
12-
class Trie:
13-
def __init__(self):
14-
self.root = TrieNode("")
15-
16-
def insert(self, word: str) -> None:
17-
curr = self.root
18-
19-
for char in word:
20-
if char in curr.children:
21-
curr = curr.children[char]
22-
23-
else:
24-
new_node = TrieNode(char)
25-
curr.children[char] = new_node
26-
curr = new_node
27-
28-
curr.is_end = True
29-
30-
def search(self, word: str) -> List[str]:
31-
curr = self.root
32-
33-
if len(word) == 0:
34-
return []
35-
36-
for char in word:
37-
if char in curr.children:
38-
curr = curr.children[char]
39-
else:
40-
return []
41-
42-
output = []
43-
44-
def dfs(node: TrieNode, prefix: str) -> None:
45-
if node.is_end:
46-
output.append((prefix + node.char))
47-
48-
for child in node.children.values():
49-
dfs(child, prefix + node.char)
50-
51-
dfs(curr, word[:-1])
52-
return output
53-
54-
def starts_with(self, prefix: str) -> bool:
55-
"""
56-
Returns true if the given prefix is a prefix of any word in the trie.
57-
"""
58-
curr = self.root
59-
60-
for char in prefix:
61-
if char not in curr.children:
62-
return False
63-
curr = curr.children[char]
64-
65-
return True
5+
__all__ = [
6+
"Trie",
7+
"TrieNode"
8+
]

datastructures/trees/trie/trie.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
from typing import List
2+
from datastructures.trees.trie.trie_node import TrieNode
3+
4+
5+
class Trie:
6+
def __init__(self):
7+
self.root = TrieNode()
8+
9+
def insert(self, word: str) -> None:
10+
curr = self.root
11+
12+
for char in word:
13+
if char in curr.children:
14+
curr = curr.children[char]
15+
else:
16+
new_node = TrieNode()
17+
curr.children[char] = new_node
18+
curr = new_node
19+
20+
curr.is_end = True
21+
22+
def search(self, word: str) -> List[str]:
23+
curr = self.root
24+
25+
if len(word) == 0:
26+
return []
27+
28+
for char in word:
29+
if char in curr.children:
30+
curr = curr.children[char]
31+
else:
32+
return []
33+
34+
output = []
35+
36+
def dfs(node: TrieNode, prefix: str) -> None:
37+
if node.is_end:
38+
output.append((prefix + node.char))
39+
40+
for child in node.children.values():
41+
dfs(child, prefix + node.char)
42+
43+
dfs(curr, word[:-1])
44+
return output
45+
46+
def starts_with(self, prefix: str) -> bool:
47+
"""
48+
Returns true if the given prefix is a prefix of any word in the trie.
49+
"""
50+
curr = self.root
51+
52+
for char in prefix:
53+
if char not in curr.children:
54+
return False
55+
curr = curr.children[char]
56+
57+
return True

0 commit comments

Comments
 (0)