diff --git a/DIRECTORY.md b/DIRECTORY.md index 8e5cb0d2..6fcb0090 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -23,6 +23,7 @@ * [Test Is Valid Subsequence](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/arrays/subsequence/test_is_valid_subsequence.py) * Backtracking * Combination + * [Test Combination](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/backtracking/combination/test_combination.py) * [Test Combination 2](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/backtracking/combination/test_combination_2.py) * [Test Combination 3](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/backtracking/combination/test_combination_3.py) * Decode Message @@ -139,6 +140,8 @@ * [Test Can Visit All Rooms](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/graphs/keys_and_rooms/test_can_visit_all_rooms.py) * Knight On Chess Board * [Test Knight On Chess Board](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/graphs/knight_on_chess_board/test_knight_on_chess_board.py) + * Last Day Where You Can Still Cross + * [Test Last Day You Can Still Cross](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/graphs/last_day_where_you_can_still_cross/test_last_day_you_can_still_cross.py) * Maxareaofisland * [Test Max Area Of Island](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/graphs/maxareaofisland/test_max_area_of_island.py) * Min Cost To Supply @@ -203,6 +206,8 @@ * Ransom Note * [Test Ransom Note](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/hash_table/ransom_note/test_ransom_note.py) * Heap + * Construct Target With Sums + * [Test Construct Target With Sums](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/heap/construct_target_with_sums/test_construct_target_with_sums.py) * Kclosestelements * [Test Find K Closest Elements](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/heap/kclosestelements/test_find_k_closest_elements.py) * Longest Happy String @@ -403,6 +408,8 @@ * Topkfreqwords * [Test Top K Frequent Words](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/trie/topkfreqwords/test_top_k_frequent_words.py) * Two Pointers + * Append Chars To Make Subsequence + * [Test Append Chars To Make Subsequence](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/two_pointers/append_chars_to_make_subsequence/test_append_chars_to_make_subsequence.py) * Array 3 Pointers * [Test Array 3 Pointers](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/two_pointers/array_3_pointers/test_array_3_pointers.py) * Container With Most Water @@ -600,6 +607,8 @@ * Lrucache * [With Internal Linked List](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/lrucache/with_internal_linked_list.py) * [With Ordered Dict](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/lrucache/with_ordered_dict.py) + * Map Sum + * [Test Map Sum Pairs](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/map_sum/test_map_sum_pairs.py) * Orderedstream * [Test Ordered Stream](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/orderedstream/test_ordered_stream.py) * Queues diff --git a/datastructures/map_sum/README.md b/datastructures/map_sum/README.md new file mode 100644 index 00000000..e90f19cd --- /dev/null +++ b/datastructures/map_sum/README.md @@ -0,0 +1,37 @@ +# Map Sum Pairs + +Design a data structure that supports the following operations: + +1. Insert a key-value pair: + - Each key is a string, and each value is an integer. + - If the key already exists, update its value to (overriding the previous value). + +2. Return the prefix sum: + - Given a string, `prefix`, return the total sum of all values associated with keys that start with this prefix. + +To accomplish this, implement a class MapSum: + +- Constructor: Initializes the object. +- `void insert (String key, int val)`: Inserts the key-value pair into the data structure. If the key already exists, + its value is updated to the new one. +- `int sum (String prefix)`: Returns the total sum of values for all keys that begin with the specified prefix. + +## Constraints + +- 1 ≤ `key.length`, `prefix.length` ≤ 50 +- Both `key` and `prefix` consist of only lowercase English letters. +- 1 ≤ `val` ≤ 1000 +- At most 50 calls will be made to insert and sum. + +## Examples + +![Example 1](./images/examples/map_sum_pairs_example_1.png) +![Example 2](./images/examples/map_sum_pairs_example_2.png) +![Example 3](./images/examples/map_sum_pairs_example_3.png) + +## Topics + +- Hash Table +- String +- Design +- Trie diff --git a/datastructures/map_sum/__init__.py b/datastructures/map_sum/__init__.py new file mode 100644 index 00000000..04490d99 --- /dev/null +++ b/datastructures/map_sum/__init__.py @@ -0,0 +1,129 @@ +from typing import Dict +from collections import Counter + +from datastructures.trees.trie import TrieNode + + +class MapSumBruteForce(object): + """ + This solution to creating a map sum data structure that finds the sum of keys with a matching prefix uses a + Hash Table combined with Brute-Force Search and String Matching. + + Time Complexity: Every insert operation is O(1). Every sum operation is O(N*P) where N is the number of items in the + map, and P is the length of the input prefix. + + Space Complexity: The space used by map is linear in the size of all input key and val values combined. + """ + + def __init__(self): + self.mapping: Dict[str, int] = {} + + def insert(self, key: str, val: int) -> None: + """ + Inserts the key with the given value into the hash table + Args: + key (str): key to insert + val (int): value to insert + """ + self.mapping[key] = val + + def sum(self, prefix: str) -> int: + """ + Finds the sum of all keys with the prefix `prefix`. + Args: + prefix (str): prefix to search for + Returns: + int: sum of all keys with the prefix `prefix` + """ + running_sum = 0 + for k, v in self.mapping.items(): + if k.startswith(prefix): + running_sum += v + + return running_sum + + +class MapSumPrefix(object): + """ + We can remember the answer for all possible prefixes in a HashMap score. When we get a new (key, val) pair, we + update every prefix of key appropriately: each prefix will be changed by delta = val - map[key], where map is the + previously associated value of key (zero if undefined.) + + Time Complexity: Every insert operation is O(K^2), where K is the length of the key, as K strings are made of an + average length of K. Every sum operation is O(1). + + Space Complexity: The space used by map is linear in the size of all input key and val values combined. + """ + + def __init__(self): + self.mapping: Dict[str, int] = {} + self.score = Counter() + + def insert(self, key: str, val: int) -> None: + """ + Inserts the key with the given value into the hash table + Args: + key (str): key to insert + val (int): value to insert + """ + delta = val - self.mapping.get(key, 0) + self.mapping[key] = val + for i in range(len(key) + 1): + prefix = key[:i] + self.score[prefix] += delta + + def sum(self, prefix: str) -> int: + """ + Finds the sum of all keys with the prefix `prefix`. + Args: + prefix (str): prefix to search for + Returns: + int: sum of all keys with the prefix `prefix` + """ + return self.score[prefix] + + +class MapSumTrie(object): + """ + Since we are dealing with prefixes, a Trie (prefix tree) is a natural data structure to approach this problem. For + every node of the trie corresponding to some prefix, we will remember the desired answer (score) and store it at + this node. As in the approach of using a prefix has map, this involves modifying each node by delta = val - map[key]. + + Time Complexity: Every insert operation is O(K), where K is the length of the key. Every sum operation is O(K). + Space Complexity: The space used is linear in the size of the total input. + """ + + def __init__(self): + self.mapping: Dict[str, int] = {} + self.score = Counter() + self.root = TrieNode() + + def insert(self, key: str, val: int) -> None: + """ + Inserts the key with the given value into the hash table + Args: + key (str): key to insert + val (int): value to insert + """ + delta = val - self.mapping.get(key, 0) + self.mapping[key] = val + current = self.root + current.score += delta + for char in key: + current = current.children[char] + current.score += delta + + def sum(self, prefix: str) -> int: + """ + Finds the sum of all keys with the prefix `prefix`. + Args: + prefix (str): prefix to search for + Returns: + int: sum of all keys with the prefix `prefix` + """ + current = self.root + for char in prefix: + if char not in current.children: + return 0 + current = current.children[char] + return current.score diff --git a/datastructures/map_sum/images/examples/map_sum_pairs_example_1.png b/datastructures/map_sum/images/examples/map_sum_pairs_example_1.png new file mode 100644 index 00000000..34ae0f7f Binary files /dev/null and b/datastructures/map_sum/images/examples/map_sum_pairs_example_1.png differ diff --git a/datastructures/map_sum/images/examples/map_sum_pairs_example_2.png b/datastructures/map_sum/images/examples/map_sum_pairs_example_2.png new file mode 100644 index 00000000..773fafb9 Binary files /dev/null and b/datastructures/map_sum/images/examples/map_sum_pairs_example_2.png differ diff --git a/datastructures/map_sum/images/examples/map_sum_pairs_example_3.png b/datastructures/map_sum/images/examples/map_sum_pairs_example_3.png new file mode 100644 index 00000000..e6f753f4 Binary files /dev/null and b/datastructures/map_sum/images/examples/map_sum_pairs_example_3.png differ diff --git a/datastructures/map_sum/test_map_sum_pairs.py b/datastructures/map_sum/test_map_sum_pairs.py new file mode 100644 index 00000000..59e082f1 --- /dev/null +++ b/datastructures/map_sum/test_map_sum_pairs.py @@ -0,0 +1,110 @@ +import unittest +from typing import Tuple, List +from parameterized import parameterized +from datastructures.map_sum import MapSumBruteForce, MapSumPrefix, MapSumTrie + + +MAP_SUM_TEST_CASES = [ + ( + [ + ("insert", ("apple", 3)), + ("sum", ("ap", 3)), + ("insert", ("apple", 2)), + ("sum", ("ap", 2)), + ], + ), + ( + [ + ("insert", ("apple", 3)), + ("sum", ("ap", 3)), + ("insert", ("ap", 2)), + ("sum", ("ap", 5)), + ], + ), + ( + [ + ("insert", ("apple", 3)), + ("insert", ("apple", 5)), + ("sum", ("ap", 5)), + ("insert", ("apricot", 2)), + ("sum", ("ap", 7)), + ], + ), + ( + [ + ("insert", ("car", 3)), + ("insert", ("cat", 2)), + ("insert", ("cart", 4)), + ("sum", ("ca", 9)), + ("sum", ("car", 7)), + ], + ), + ( + [ + ("insert", ("dog", 5)), + ("insert", ("cat", 7)), + ("sum", ("z", 0)), + ], + ), + ( + [ + ("insert", ("a", 3)), + ("insert", ("apple", 2)), + ("sum", ("a", 5)), + ("sum", ("app", 2)), + ], + ), +] + + +class MapSumPairsTestCase(unittest.TestCase): + @parameterized.expand(MAP_SUM_TEST_CASES) + def test_map_sum_pairs_brute_force( + self, operations: List[Tuple[str, Tuple[str, int]]] + ): + map_sum = MapSumBruteForce() + for operation in operations: + cmd = operation[0] + params = operation[1] + if cmd == "insert": + key, value = params + map_sum.insert(key, value) + + if cmd == "sum": + prefix, expected = params + actual = map_sum.sum(prefix) + self.assertEqual(expected, actual) + + @parameterized.expand(MAP_SUM_TEST_CASES) + def test_map_sum_pairs_prefix(self, operations: List[Tuple[str, Tuple[str, int]]]): + map_sum = MapSumPrefix() + for operation in operations: + cmd = operation[0] + params = operation[1] + if cmd == "insert": + key, value = params + map_sum.insert(key, value) + + if cmd == "sum": + prefix, expected = params + actual = map_sum.sum(prefix) + self.assertEqual(expected, actual) + + @parameterized.expand(MAP_SUM_TEST_CASES) + def test_map_sum_pairs_trie(self, operations: List[Tuple[str, Tuple[str, int]]]): + map_sum = MapSumTrie() + for operation in operations: + cmd = operation[0] + params = operation[1] + if cmd == "insert": + key, value = params + map_sum.insert(key, value) + + if cmd == "sum": + prefix, expected = params + actual = map_sum.sum(prefix) + self.assertEqual(expected, actual) + + +if __name__ == "__main__": + unittest.main() diff --git a/datastructures/trees/trie/trie_node.py b/datastructures/trees/trie/trie_node.py index 92d4413e..78761ace 100644 --- a/datastructures/trees/trie/trie_node.py +++ b/datastructures/trees/trie/trie_node.py @@ -4,11 +4,11 @@ class TrieNode: def __init__(self): - # self.char = char """ Initializes a TrieNode instance. - A TrieNode contains a character and a dictionary of its children. It also contains a boolean indicating whether the node is the end of a word in the Trie. + A TrieNode contains a character and a dictionary of its children. It also contains a boolean indicating whether + the node is the end of a word in the Trie. Parameters: None @@ -19,6 +19,7 @@ def __init__(self): self.children: DefaultDict[str, TrieNode] = defaultdict(TrieNode) self.is_end = False self.index: Optional[int] = None + self.score: int = 0 def __repr__(self): return f"TrieNode(index={self.index}, is_end={self.is_end})" diff --git a/datastructures/trees/trie/word_dictionary/test_word_dictionary.py b/datastructures/trees/trie/word_dictionary/test_word_dictionary.py index ba744e70..9fdc903e 100644 --- a/datastructures/trees/trie/word_dictionary/test_word_dictionary.py +++ b/datastructures/trees/trie/word_dictionary/test_word_dictionary.py @@ -29,6 +29,33 @@ def test_case_1(self): actual_get_words_two = word_dictionary.get_words() self.assertEqual(expected_words, actual_get_words_two) + def test_case_2(self): + word_dictionary = WordDictionary() + actual_words_1 = word_dictionary.get_words() + self.assertEqual([], actual_words_1) + + word_dictionary.add_word("apple") + word_dictionary.add_word("grape") + actual_words_2 = word_dictionary.get_words() + expected_words_1 = ["apple", "grape"] + self.assertEqual(expected_words_1, actual_words_2) + + actual_search_word_1 = word_dictionary.search_word("strawberry") + self.assertFalse(actual_search_word_1) + + word_dictionary.add_word("banana") + word_dictionary.add_word("banan") + + actual_search_word_2 = word_dictionary.search_word("bana..") + self.assertTrue(actual_search_word_2) + + actual_search_word_3 = word_dictionary.search_word("ba...a") + self.assertTrue(actual_search_word_3) + + actual_get_words_3 = word_dictionary.get_words() + expected_words_2 = ["apple", "banan", "banana", "grape"] + self.assertEqual(sorted(expected_words_2), sorted(actual_get_words_3)) + if __name__ == "__main__": unittest.main()