diff --git a/DIRECTORY.md b/DIRECTORY.md index 6c1dcd4e..67506168 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -198,6 +198,8 @@ * Hash Table * First Unique Character * [Test First Unique Character](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/hash_table/first_unique_character/test_first_unique_character.py) + * Jewels And Stones + * [Test Jewels And Stones](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/hash_table/jewels_and_stones/test_jewels_and_stones.py) * Ransom Note * [Test Ransom Note](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/hash_table/ransom_note/test_ransom_note.py) * Heap @@ -274,14 +276,56 @@ * [Test Check Subarray Sum](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/prefix_sum/continous_sub_array_sum/test_check_subarray_sum.py) * Search * Binary Search + * Big Words + * [Big Words](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/search/binary_search/big_words/big_words.py) + * Cyclically Shifted Array + * [Test Cyclically Shifted Array](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/search/binary_search/cyclically_shifted_array/test_cyclically_shifted_array.py) * Divide Chocolate * [Test Divide Chocolate](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/search/binary_search/divide_chocolate/test_divide_chocolate.py) + * Find Closest Number + * [Test Find Closest Number](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/search/binary_search/find_closest_number/test_find_closest_number.py) + * Find Closest Value + * [Test Find Closest Value](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/search/binary_search/find_closest_value/test_find_closest_value.py) + * Find First In Duplicate List + * [Test Find First In Duplicates](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/search/binary_search/find_first_in_duplicate_list/test_find_first_in_duplicates.py) + * Find Fixed Number + * [Test Find Fixed Number](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/search/binary_search/find_fixed_number/test_find_fixed_number.py) + * Find Peak Element + * [Test Find Peak Element](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/search/binary_search/find_peak_element/test_find_peak_element.py) + * First Boundary + * [Test First Boundary](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/search/binary_search/first_boundary/test_first_boundary.py) + * First Occurrence + * [Test Find First Occurrence](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/search/binary_search/first_occurrence/test_find_first_occurrence.py) + * Integer Square Root + * [Test Integer Square Root](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/search/binary_search/integer_square_root/test_integer_square_root.py) + * Koko Eating Bananas + * [Test Koko Eating Bananas](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/search/binary_search/koko_eating_bananas/test_koko_eating_bananas.py) * Magnetic Force Between Two Balls * [Test Magnetic Force Between Two Balls](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/search/binary_search/magnetic_force_between_two_balls/test_magnetic_force_between_two_balls.py) * Maxruntime N Computers * [Test Max Runtime](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/search/binary_search/maxruntime_n_computers/test_max_runtime.py) + * Min In Rotated Sorted Array + * [Test Find Min Rotated Sorted Array](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/search/binary_search/min_in_rotated_sorted_array/test_find_min_rotated_sorted_array.py) + * My Calendar + * [Test My Calendar](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/search/binary_search/my_calendar/test_my_calendar.py) + * Next Greatest Letter + * [Test Next Greatest Letter](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/search/binary_search/next_greatest_letter/test_next_greatest_letter.py) + * Peak Of Mountain + * [Test Peak Of Mountain](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/search/binary_search/peak_of_mountain/test_peak_of_mountain.py) + * Plates Between Candles + * [Test Plates Between Candles](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/search/binary_search/plates_between_candles/test_plates_between_candles.py) + * Rotated Sorted Array + * [Test Search Rotated Sorted Array](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/search/binary_search/rotated_sorted_array/test_search_rotated_sorted_array.py) + * Search Range + * [Test Search Range](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/search/binary_search/search_range/test_search_range.py) + * Single Non Duplicate + * [Test Single Non Duplicate](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/search/binary_search/single_non_duplicate/test_single_non_duplicate.py) * Split Array Largest Sum * [Test Split Array Largest Sum](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/search/binary_search/split_array_largest_sum/test_split_array_largest_sum.py) + * Square Root + * [Test Sqrt Estimate](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/search/binary_search/square_root/test_sqrt_estimate.py) + * Successful Pairs Spells Potions + * [Test Successful Pairs](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/search/binary_search/successful_pairs_spells_potions/test_successful_pairs.py) * [Test Binary Search](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/search/binary_search/test_binary_search.py) * Interpolation * [Test Interpolation Search](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/search/interpolation/test_interpolation_search.py) @@ -326,6 +370,11 @@ * [Test Daily Temperatures](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/stack/daily_temperatures/test_daily_temperatures.py) * Minimum String Length After Removing Substrings * [Test Min Str Length After Removing Substrings](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/stack/minimum_string_length_after_removing_substrings/test_min_str_length_after_removing_substrings.py) + * Strings + * Caeser Cipher + * [Test Caeser](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/strings/caeser_cipher/test_caeser.py) + * Run Length Encoding + * [Test Run Length](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/strings/run_length_encoding/test_run_length.py) * Taxi Numbers * [Taxi Numbers](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/taxi_numbers/taxi_numbers.py) * Top K Elements @@ -475,6 +524,9 @@ * [Node](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/graphs/undirected/clone_graph/node.py) * [Vertex](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/graphs/vertex.py) * Hashmap + * [Bucket](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/hashmap/bucket.py) + * [Hash Map](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/hashmap/hash_map.py) + * [Item](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/hashmap/item.py) * [Test Hashmap](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/hashmap/test_hashmap.py) * Hashset * [Test My Hashset](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/hashset/test_my_hashset.py) @@ -844,50 +896,6 @@ * Queue * Recent Counter * [Test Recent Calls](https://github.com/BrianLusina/PythonSnips/blob/master/puzzles/queue/recent_counter/test_recent_calls.py) - * Search - * Binary Search - * Big Words - * [Big Words](https://github.com/BrianLusina/PythonSnips/blob/master/puzzles/search/binary_search/big_words/big_words.py) - * Cyclically Shifted Array - * [Test Cyclically Shifted Array](https://github.com/BrianLusina/PythonSnips/blob/master/puzzles/search/binary_search/cyclically_shifted_array/test_cyclically_shifted_array.py) - * Find Closest Number - * [Test Find Closest Number](https://github.com/BrianLusina/PythonSnips/blob/master/puzzles/search/binary_search/find_closest_number/test_find_closest_number.py) - * Find Closest Value - * [Test Find Closest Value](https://github.com/BrianLusina/PythonSnips/blob/master/puzzles/search/binary_search/find_closest_value/test_find_closest_value.py) - * Find First In Duplicate List - * [Test Find First In Duplicates](https://github.com/BrianLusina/PythonSnips/blob/master/puzzles/search/binary_search/find_first_in_duplicate_list/test_find_first_in_duplicates.py) - * Find Fixed Number - * [Test Find Fixed Number](https://github.com/BrianLusina/PythonSnips/blob/master/puzzles/search/binary_search/find_fixed_number/test_find_fixed_number.py) - * Find Peak Element - * [Test Find Peak Element](https://github.com/BrianLusina/PythonSnips/blob/master/puzzles/search/binary_search/find_peak_element/test_find_peak_element.py) - * First Boundary - * [Test First Boundary](https://github.com/BrianLusina/PythonSnips/blob/master/puzzles/search/binary_search/first_boundary/test_first_boundary.py) - * First Occurrence - * [Test Find First Occurrence](https://github.com/BrianLusina/PythonSnips/blob/master/puzzles/search/binary_search/first_occurrence/test_find_first_occurrence.py) - * Integer Square Root - * [Test Integer Square Root](https://github.com/BrianLusina/PythonSnips/blob/master/puzzles/search/binary_search/integer_square_root/test_integer_square_root.py) - * Koko Eating Bananas - * [Test Koko Eating Bananas](https://github.com/BrianLusina/PythonSnips/blob/master/puzzles/search/binary_search/koko_eating_bananas/test_koko_eating_bananas.py) - * Min In Rotated Sorted Array - * [Test Find Min Rotated Sorted Array](https://github.com/BrianLusina/PythonSnips/blob/master/puzzles/search/binary_search/min_in_rotated_sorted_array/test_find_min_rotated_sorted_array.py) - * My Calendar - * [Test My Calendar](https://github.com/BrianLusina/PythonSnips/blob/master/puzzles/search/binary_search/my_calendar/test_my_calendar.py) - * Next Greatest Letter - * [Test Next Greatest Letter](https://github.com/BrianLusina/PythonSnips/blob/master/puzzles/search/binary_search/next_greatest_letter/test_next_greatest_letter.py) - * Peak Of Mountain - * [Test Peak Of Mountain](https://github.com/BrianLusina/PythonSnips/blob/master/puzzles/search/binary_search/peak_of_mountain/test_peak_of_mountain.py) - * Plates Between Candles - * [Test Plates Between Candles](https://github.com/BrianLusina/PythonSnips/blob/master/puzzles/search/binary_search/plates_between_candles/test_plates_between_candles.py) - * Rotated Sorted Array - * [Test Search Rotated Sorted Array](https://github.com/BrianLusina/PythonSnips/blob/master/puzzles/search/binary_search/rotated_sorted_array/test_search_rotated_sorted_array.py) - * Search Range - * [Test Search Range](https://github.com/BrianLusina/PythonSnips/blob/master/puzzles/search/binary_search/search_range/test_search_range.py) - * Single Non Duplicate - * [Test Single Non Duplicate](https://github.com/BrianLusina/PythonSnips/blob/master/puzzles/search/binary_search/single_non_duplicate/test_single_non_duplicate.py) - * Square Root - * [Test Sqrt Estimate](https://github.com/BrianLusina/PythonSnips/blob/master/puzzles/search/binary_search/square_root/test_sqrt_estimate.py) - * Successful Pairs Spells Potions - * [Test Successful Pairs](https://github.com/BrianLusina/PythonSnips/blob/master/puzzles/search/binary_search/successful_pairs_spells_potions/test_successful_pairs.py) * Stack * Asteroid Collision * [Test Asteroid Collision](https://github.com/BrianLusina/PythonSnips/blob/master/puzzles/stack/asteroid_collision/test_asteroid_collision.py) @@ -1100,7 +1108,6 @@ * [Test Variable Length Qty](https://github.com/BrianLusina/PythonSnips/blob/master/tests/algorithms/test_variable_length_qty.py) * Cryptography * [Test Atbash Cipher](https://github.com/BrianLusina/PythonSnips/blob/master/tests/cryptography/test_atbash_cipher.py) - * [Test Caeser](https://github.com/BrianLusina/PythonSnips/blob/master/tests/cryptography/test_caeser.py) * [Test Crypto Square](https://github.com/BrianLusina/PythonSnips/blob/master/tests/cryptography/test_crypto_square.py) * [Test Diffie Hellman](https://github.com/BrianLusina/PythonSnips/blob/master/tests/cryptography/test_diffie_hellman.py) * [Test Handhsake](https://github.com/BrianLusina/PythonSnips/blob/master/tests/cryptography/test_handhsake.py) @@ -1111,7 +1118,6 @@ * [Test Pig Latin](https://github.com/BrianLusina/PythonSnips/blob/master/tests/cryptography/test_pig_latin.py) * [Test Rail Fence Cipher](https://github.com/BrianLusina/PythonSnips/blob/master/tests/cryptography/test_rail_fence_cipher.py) * [Test Rot13](https://github.com/BrianLusina/PythonSnips/blob/master/tests/cryptography/test_rot13.py) - * [Test Run Length](https://github.com/BrianLusina/PythonSnips/blob/master/tests/cryptography/test_run_length.py) * [Test Shell Game](https://github.com/BrianLusina/PythonSnips/blob/master/tests/cryptography/test_shell_game.py) * [Test Simple Cipher](https://github.com/BrianLusina/PythonSnips/blob/master/tests/cryptography/test_simple_cipher.py) * [Test Vigenere](https://github.com/BrianLusina/PythonSnips/blob/master/tests/cryptography/test_vigenere.py) diff --git a/algorithms/hash_table/jewels_and_stones/README.md b/algorithms/hash_table/jewels_and_stones/README.md new file mode 100644 index 00000000..67925c74 --- /dev/null +++ b/algorithms/hash_table/jewels_and_stones/README.md @@ -0,0 +1,61 @@ +# Jewels and Stones + +You're given strings `jewels` representing the types of stones that are jewels, and `stones` representing the stones you have. +Each character in stones is a type of stone you have. You want to know how many of the stones you have are also jewels. + +Letters are case-sensitive, so "a" is considered a different type of stone from "A". + +## Examples + +Example 1: + +```text +Input: jewels = "aA", stones = "aAAbbbb" +Output: 3 +``` + +Example 2: + +```text +Input: jewels = "z", stones = "ZZ" +Output: 0 +``` + +## Constraints + +- 1 <= jewels.length, stones.length <= 50 +- jewels and stones consist of only English letters. +- All the characters of jewels are unique. + +## Topics + +- Hash Table +- String + +## Solution + +The core intuition behind solving this problem is to treat it as a membership-counting task: we aren’t transforming +either string, we’re simply counting how many characters in stones belong to the set of jewel types in jewels, while +respecting case sensitivity. This maps naturally to a hash-based lookup because it lets us store all jewel types in a +structure that supports fast membership checks. In other words, we treat jewels as an allowlist of valid types and stones +as a stream of items to evaluate. As we scan through stones, we increment a counter whenever the current character appears +in the jewel set. As comparisons are case-sensitive, only exact matches contribute to the final count, which represents +how many of your stones are jewels. + +Using the intuition above, we implement the algorithm as follows: + +1. Initialize a new set, jewelSet, from the given jewels. +2. Initialize a variable count to 0. +3. Iterate through each character ch in the stones: + - If ch exists in jewelSet: + - Increment count. +4. After successfully iterating through the stones array, return count. + +### Time complexity + +The time complexity of the solution is O(m+n) because it first builds a set from the m characters in jewels, then scans the +n characters in stones once to count matches. + +### Space complexity + +The space complexity of the solution is O(m) because it stores up to m unique jewel characters in a set. diff --git a/algorithms/hash_table/jewels_and_stones/__init__.py b/algorithms/hash_table/jewels_and_stones/__init__.py new file mode 100644 index 00000000..6766eb97 --- /dev/null +++ b/algorithms/hash_table/jewels_and_stones/__init__.py @@ -0,0 +1,34 @@ +from collections import Counter +from typing import Set + + +def num_jewels_in_stones_with_set(jewels: str, stones: str) -> int: + # Store all jewel types for fast membership checking + jewel_set: Set[str] = set(jewels) + + # Count how many stones are jewels + count = 0 + + # Check each stone and increment count if it's a jewel + for ch in stones: + if ch in jewel_set: + count += 1 + + # Return the total number of jewels found in stones + return count + + +def num_jewels_in_stones_with_dict(jewels: str, stones: str) -> int: + # Store all jewel types for fast membership checking + stone_counts: Counter[str] = Counter(stones) + + # Count how many stones are jewels + count = 0 + + # Check each stone and increment count if it's a jewel + for jewel in jewels: + if jewel in stone_counts: + count += stone_counts[jewel] + + # Return the total number of jewels found in stones + return count diff --git a/algorithms/hash_table/jewels_and_stones/test_jewels_and_stones.py b/algorithms/hash_table/jewels_and_stones/test_jewels_and_stones.py new file mode 100644 index 00000000..37f3a577 --- /dev/null +++ b/algorithms/hash_table/jewels_and_stones/test_jewels_and_stones.py @@ -0,0 +1,34 @@ +import unittest +from parameterized import parameterized +from algorithms.hash_table.jewels_and_stones import ( + num_jewels_in_stones_with_dict, + num_jewels_in_stones_with_set, +) + +JEWELS_AND_STONES_TEST_CASES = [ + ("pQ", "ppPQQq", 4), + ("k", "kkkkK", 4), + ("LMn", "lLmMNn", 3), + ("cD", "ddddccccDD", 6), + ("tRz", "RttZzr", 4), +] + + +class JewelsAndStonesTestCase(unittest.TestCase): + @parameterized.expand(JEWELS_AND_STONES_TEST_CASES) + def test_num_jewels_in_stones_with_set( + self, jewels: str, stones: str, expected: int + ): + actual = num_jewels_in_stones_with_set(jewels, stones) + self.assertEqual(actual, expected) + + @parameterized.expand(JEWELS_AND_STONES_TEST_CASES) + def test_num_jewels_in_stones_with_dict( + self, jewels: str, stones: str, expected: int + ): + actual = num_jewels_in_stones_with_dict(jewels, stones) + self.assertEqual(actual, expected) + + +if __name__ == "__main__": + unittest.main() diff --git a/cryptography/caeser_cipher/README.md b/algorithms/strings/caeser_cipher/README.md similarity index 100% rename from cryptography/caeser_cipher/README.md rename to algorithms/strings/caeser_cipher/README.md diff --git a/cryptography/caeser_cipher/__init__.py b/algorithms/strings/caeser_cipher/__init__.py similarity index 98% rename from cryptography/caeser_cipher/__init__.py rename to algorithms/strings/caeser_cipher/__init__.py index a7b667db..db34e5cc 100755 --- a/cryptography/caeser_cipher/__init__.py +++ b/algorithms/strings/caeser_cipher/__init__.py @@ -82,7 +82,7 @@ def __init__(self, shift): self.alpha = ascii_uppercase self.new_alpha = self.alpha[shift:] + self.alpha[:shift] - def encode(self, plaintext): + def encode(self, plaintext: str): t = plaintext.maketrans(self.alpha, self.new_alpha) return plaintext.upper().translate(t) diff --git a/tests/cryptography/test_caeser.py b/algorithms/strings/caeser_cipher/test_caeser.py similarity index 100% rename from tests/cryptography/test_caeser.py rename to algorithms/strings/caeser_cipher/test_caeser.py diff --git a/cryptography/run_length_encoding/README.md b/algorithms/strings/run_length_encoding/README.md similarity index 100% rename from cryptography/run_length_encoding/README.md rename to algorithms/strings/run_length_encoding/README.md diff --git a/cryptography/run_length_encoding/__init__.py b/algorithms/strings/run_length_encoding/__init__.py similarity index 100% rename from cryptography/run_length_encoding/__init__.py rename to algorithms/strings/run_length_encoding/__init__.py diff --git a/tests/cryptography/test_run_length.py b/algorithms/strings/run_length_encoding/test_run_length.py similarity index 100% rename from tests/cryptography/test_run_length.py rename to algorithms/strings/run_length_encoding/test_run_length.py diff --git a/datastructures/hashmap/README.md b/datastructures/hashmap/README.md index 6b431087..b43e0b87 100644 --- a/datastructures/hashmap/README.md +++ b/datastructures/hashmap/README.md @@ -1,13 +1,183 @@ # HashMap Design -Constraints and assumptions -For simplicity, are the keys integers only? -Yes -For collision resolution, can we use chaining? -Yes -Do we have to worry about load factors? -No -Can we assume inputs are valid or do we have to validate them? -Assume they're valid -Can we assume this fits memory? -Yes \ No newline at end of file +Design a HashMap without using any built-in hash table libraries. + +Implement the HashMap class: + +- HashMap() initializes the object with an empty map. +- void `set(int key, int value)` inserts a (key, value) pair into the HashMap. If the key already exists in the map, + update the corresponding value. +- `int get(int key)` returns the value to which the specified key is mapped, or -1 if this map contains no mapping for + the key. +- `void remove(key)` removes the key and its corresponding value if the map contains the mapping for the key. + +## Example + +Example 1: + +```text +Input +["MyHashMap", "put", "put", "get", "get", "put", "get", "remove", "get"] +[[], [1, 1], [2, 2], [1], [3], [2, 1], [2], [2], [2]] +Output +[null, null, null, 1, -1, null, 1, null, -1] + +Explanation +MyHashMap myHashMap = new MyHashMap(); +myHashMap.put(1, 1); // The map is now [[1,1]] +myHashMap.put(2, 2); // The map is now [[1,1], [2,2]] +myHashMap.get(1); // return 1, The map is now [[1,1], [2,2]] +myHashMap.get(3); // return -1 (i.e., not found), The map is now [[1,1], [2,2]] +myHashMap.put(2, 1); // The map is now [[1,1], [2,1]] (i.e., update the existing value) +myHashMap.get(2); // return 1, The map is now [[1,1], [2,1]] +myHashMap.remove(2); // remove the mapping for 2, The map is now [[1,1]] +myHashMap.get(2); // return -1 (i.e., not found), The map is now [[1,1]] +``` + +## Constraints + +- 0 <= key, value <= 106 +- At most 104 calls will be made to put, get, and remove. + +## Topics + +- Array +- Hash Table +- Linked List +- Design +- Hash Function + +## Solution + +A hash map is a fundamental data structure found in various programming languages. Its key feature is facilitating fast +access to a value associated with a given key. Designing an efficient hash map involves addressing two main challenges: + +1. **Hash function design**: The hash function serves to map a key to a location in the storage space. A good hash + function ensures that keys are evenly distributed across the storage space, preventing the clustering of keys in + certain locations. This even distribution helps maintain efficient access to stored values. + +2. **Collision handling**: Despite efforts to evenly distribute keys, collisions—where two distinct keys map to the same + storage location—are inevitable due to the finite nature of the storage space compared to the potentially infinite + key space. Effective collision-handling strategies are crucial to ensure data integrity and efficient retrieval. To + deal with collisions, we can use methods like chaining, where we link multiple values together at that location, or + open addressing, where we find another empty location for the key. + +### Step-by-step solution construction + +The first step is to design a hash function using the modulo operator, particularly suitable for integer-type keys. +The modulo operator, denoted by %, is a mathematical operation that returns the remainder of dividing one number by +another. When selecting a modulo base, it’s advisable to choose a prime number. This is because choosing a prime number +as the modulo base helps minimize collisions. Since prime numbers offer better distribution of hash codes, reducing the +likelihood of collisions (where two different keys hash to the same value). + +Here’s the implementation of a hash function using a prime number, 2069, as the modulo base. This particular prime number +is likely chosen because it is relatively large, offering a wide range of possible hash codes and reducing the chance +of collisions. + +```python +def calculate_hash(key): + key_base = 2069 + return key % key_base + +def main(): + # Example usage: + keys = [1, 2068, 2070] + i = 0 + for key in keys: + i+=1 + hashed_value = calculate_hash(key) + print(i, ".\tKey:", key) + print("\tHashed value:", hashed_value) + +main() +``` + +```text +1 . Key: 1 + Hashed value: 1 + +2 . Key: 2068 + Hashed value: 2068 + +3 . Key: 2070 + Hashed value: 1 +``` + +In the code provided above, collisions occur because when taking the modulo of keys with the base value of 2069, both +keys 1 and 2070 yield the same hash value of 1, leading to a collision. + +Now, let’s look at a visual representation of hash collision: + +![Solution Key Collision](./images/solutions/hash_map_design_key_collision.png) + +In the scenario illustrated in the diagram above, when two distinct keys are assigned to the same address, it results in +a collision. Therefore, the second step is to handle collision by using a storage space where each element is indexed by +the output of the hash function. To address this, we use a container, bucket, designed to store all values that are +assigned the same hash value by the hash function. + +Let’s look at the diagram below to visualize collision handling through the use of buckets: + +![Solution Key Collision Buckets](./images/solutions/hash_map_design_key_collision_with_buckets.png) + +Now, let’s design a Bucket for collision handling supporting primary operations: Get, Update, and Remove. These operations +allow for efficient management of key-value pairs within each bucket, accommodating cases where multiple keys hash to +the same index. + +- **Get(key)**: Searches the bucket for a key-value pair where the key matches the provided argument. If such a pair is + found, the method returns the corresponding value. If the key does not exist within the bucket, the method returns + −1. This functionality is crucial for retrieval operations in a hash table, allowing for efficient access to stored + data based on keys. +- **Update(key, value)**: Looks for the specified key in the bucket. If the key is found, the method updates the existing + key-value pair with the new value provided. If the key is not found, the method adds a new key-value pair to the bucket. + This dual functionality ensures that the bucket can dynamically adjust to changes in data, either by updating existing + entries or adding new ones to accommodate new keys. +- **Remove(key)**: Searches the bucket for a key-value pair matching the specified key. If such a pair is found, the + method removes it from the bucket, effectively handling the deletion of entries. + +Collision handling occurs implicitly within the Update function of the Bucket. It effectively handles collisions by +allowing multiple key-value pairs with the same hash value (i.e., the same bucket index) to coexist within the bucket. + +Moving forward, the third step involves designing a hash map by utilizing the hash function and the Bucket designed earlier. + +To design a hash map, the core operation involves locating stored values by key. Therefore, for each hash map method— +Get, Put, and Remove—the primary task revolves around locating stored values by key. This process involves two steps: + +1. Applying the hash function to generate a hash key for a given key value, determining the address in the main storage + and finding the corresponding bucket. +2. Iterating through the bucket to check if the desired key-value pair exists. + +![Solution 1](./images/solutions/hash_map_design_solution_1.png) +![Solution 2](./images/solutions/hash_map_design_solution_2.png) +![Solution 3](./images/solutions/hash_map_design_solution_3.png) +![Solution 4](./images/solutions/hash_map_design_solution_4.png) +![Solution 5](./images/solutions/hash_map_design_solution_5.png) +![Solution 6](./images/solutions/hash_map_design_solution_6.png) +![Solution 7](./images/solutions/hash_map_design_solution_7.png) +![Solution 8](./images/solutions/hash_map_design_solution_8.png) +![Solution 9](./images/solutions/hash_map_design_solution_9.png) +![Solution 10](./images/solutions/hash_map_design_solution_10.png) +![Solution 11](./images/solutions/hash_map_design_solution_11.png) + +### Solution Summary + +1. Choose a prime number for the key space size (preferably a large one). +2. Create an array and initialize it with empty buckets equal to the key space size. +3. Generate a hash key by taking the modulus of the input key with the key space size. +4. Implement the following functions: + - Put(key, value): Inserts the value into the bucket at the computed hash key index + - Get(key): Searches for the key in the bucket and returns the associated value + - Remove(key): Deletes the element at the specified key from the bucket and the hash map + +### Time Complexity + +Each method of the hash map has a time complexity of O(N/K), where N represents the total number of possible keys, and +K represents the key space size, which in our case is 2069. + +In an ideal scenario with evenly distributed keys, the average size of each bucket can be considered as N/K. However, in +the worst-case scenario, we may need to iterate through an entire bucket to find the desired value, resulting in a time +complexity of O(N) for each method. + +### Space Complexity + +The space complexity is O(K+M), where K denotes the key space size, and M represents the number of unique keys that have +been inserted into the hashmap. diff --git a/datastructures/hashmap/__init__.py b/datastructures/hashmap/__init__.py index 307bfa54..5be7c89b 100644 --- a/datastructures/hashmap/__init__.py +++ b/datastructures/hashmap/__init__.py @@ -1,79 +1,5 @@ -from typing import Any, List +from datastructures.hashmap.item import Item +from datastructures.hashmap.hash_map import HashMap -class Item: - """ - Represents an item in the HashMap - """ - - def __init__(self, key: int, data: Any): - """ - Creates an instance of an Item in the HashMap. - @param key: Unique Key Identifier for this item in the HashMap - @param data: Represents what's actually stored. This could be any data type - """ - self.key = key - self.data = data - - -class HashMap: - """ - Represents a HashMap/HashTable storing items by Key Value pairs - """ - - def __init__(self, size: int): - """ - Creates an instance of a HashMap/HashTable - @param size: This represents the the size of the items to be stored. - """ - self.size: int = size - self.table: List[list] = [[] for _ in range(size)] - - def _hash_function(self, key: int) -> int: - """ - Hash Function used to calculate the key or where to place an item in the HashMap. This is a modulo operation - of the key and the size of the HashMap - @param key: Key of the item - @return: - """ - return key % self.size - - def set(self, key: int, value: Any) -> None: - """ - Used to add an item to the HashMap/HashTable given it's unique Key and it's value. if the item is already in the - HashMap, then an update to its value is performed. - @param key: Key used to identify the item - @param value: Represents the data of the Item - """ - hash_index = self._hash_function(key) - for item in self.table[hash_index]: - if item.key == key: - item.data = value - return - self.table[hash_index].append(Item(key, value)) - - def get(self, key: int) -> Any: - """ - Gets the Item from the HashMap/HashTable given its key if available. Will throw a KeyError if no such key exists - @param key: Key to use to retrieve an item from the HashMap - @return: Data item of the item - @raise KeyError if the key does not exist - """ - hash_index = self._hash_function(key) - for item in self.table[hash_index]: - if item.key == key: - return item.data - raise KeyError(f"Key {key} does not exist") - - def remove(self, key: int) -> None: - """ - Removes an item from the HashMap/HashTable given its key. Raises a Key error if the key does not exist. - @param key: key to use to retrieve the item and delete it - @raise: KeyError - """ - hash_index = self._hash_function(key) - for index, item in enumerate(self.table[hash_index]): - if item.key == key: - self.table[hash_index].pop(index) - return - raise KeyError(f"Key {key} does not exist") +__all__ = ["HashMap", "Item"] diff --git a/datastructures/hashmap/bucket.py b/datastructures/hashmap/bucket.py new file mode 100644 index 00000000..1f2a8d99 --- /dev/null +++ b/datastructures/hashmap/bucket.py @@ -0,0 +1,46 @@ +from typing import List, Any +from datastructures.hashmap.item import Item + + +class Bucket(object): + def __init__(self): + # Initialize an empty list to store key-value pairs + self.bucket: List[Item] = [] + + def get(self, key: Any) -> Any: + # iterate through each key value pair in the bucket + for item in self.bucket: + # If the key matches the provided key, return the corresponding value + if item.key == key: + return item.data + # if the key is not found, raise a key error and let the call site handle the error + raise KeyError(f"Key {key} does not exist") + + def update(self, key: Any, value: Any) -> None: + # Flag to indicate whether the key is found in the bucket + found = False + # Iterate through each key value pair in the bucket + for idx, item in enumerate(self.bucket): + current_key, current_value = item.key, item.data + # If the key matches the key of the current key-valur pair + if current_key == key: + # Update the value of the key-value pair + self.bucket[idx] = Item(key, value) + # Set the blag to true, indicating the key is found and break out of the loop + found = True + break + + # if the key is not found in the bucket, add it along with its value + if not found: + self.bucket.append(Item(key, value)) + + def remove(self, key: Any) -> None: + # Iterate through each key-value pair in the bucket + for idx, item in enumerate(self.bucket): + current_key, current_value = item.key, item.data + # If the key matches the key of the current key-value pair + if current_key == key: + # Delete the key-value pair from the bucket + del self.bucket[idx] + # Exit the loop as the key has been removed + break diff --git a/datastructures/hashmap/hash_map.py b/datastructures/hashmap/hash_map.py new file mode 100644 index 00000000..0f73da52 --- /dev/null +++ b/datastructures/hashmap/hash_map.py @@ -0,0 +1,62 @@ +from typing import Any, List +from datastructures.hashmap.bucket import Bucket + + +class HashMap: + """ + Represents a HashMap/HashTable storing items by Key Value pairs + """ + + def __init__(self, key_space: int = 2069): + """ + Creates an instance of a HashMap/HashTable + Args: + key_space (int, optional): The size of the key space. Defaults to 2069. + """ + self.key_space: int = key_space + self.buckets: List[Bucket] = [Bucket() for _ in range(key_space)] + + def __hash_function(self, key: int) -> int: + """ + Hash Function used to calculate the key or where to place an item in the HashMap. This is a modulo operation + of the key and the size of the HashMap + Args: + key (int): The key to hash + Returns: + int: The key or where to place an item in the HashMap + """ + return key % self.key_space + + def set(self, key: int, value: Any) -> None: + """ + Used to add an item to the HashMap/HashTable given its unique Key and its value. If the item is already in the + HashMap, then an update to its value is performed. + `@param` key: Key used to identify the item + `@param` value: Represents the data of the Item + """ + hash_key = self.__hash_function(key) + self.buckets[hash_key].update(key, value) + + def get(self, key: int) -> Any: + """ + Gets the Item from the HashMap/HashTable given its key if available. Will throw a KeyError if no such key exists + Args: + key (int): Key used to identify the item + Raises: + KeyError: If no such key exists + Returns: + Any: The Item from the HashMap + """ + hash_key = self.__hash_function(key) + return self.buckets[hash_key].get(key) + + def remove(self, key: int) -> None: + """ + Removes an item from the HashMap/HashTable given its key. Raises a Key error if the key does not exist. + Args: + key (int): Key used to identify the item + Raises: + KeyError: If no such key exists + """ + hash_key = self.__hash_function(key) + self.buckets[hash_key].remove(key) diff --git a/datastructures/hashmap/images/solutions/hash_map_design_key_collision.png b/datastructures/hashmap/images/solutions/hash_map_design_key_collision.png new file mode 100644 index 00000000..e4e22e90 Binary files /dev/null and b/datastructures/hashmap/images/solutions/hash_map_design_key_collision.png differ diff --git a/datastructures/hashmap/images/solutions/hash_map_design_key_collision_with_buckets.png b/datastructures/hashmap/images/solutions/hash_map_design_key_collision_with_buckets.png new file mode 100644 index 00000000..55248962 Binary files /dev/null and b/datastructures/hashmap/images/solutions/hash_map_design_key_collision_with_buckets.png differ diff --git a/datastructures/hashmap/images/solutions/hash_map_design_solution_1.png b/datastructures/hashmap/images/solutions/hash_map_design_solution_1.png new file mode 100644 index 00000000..4eef56a3 Binary files /dev/null and b/datastructures/hashmap/images/solutions/hash_map_design_solution_1.png differ diff --git a/datastructures/hashmap/images/solutions/hash_map_design_solution_10.png b/datastructures/hashmap/images/solutions/hash_map_design_solution_10.png new file mode 100644 index 00000000..77bd0f77 Binary files /dev/null and b/datastructures/hashmap/images/solutions/hash_map_design_solution_10.png differ diff --git a/datastructures/hashmap/images/solutions/hash_map_design_solution_11.png b/datastructures/hashmap/images/solutions/hash_map_design_solution_11.png new file mode 100644 index 00000000..071a7a82 Binary files /dev/null and b/datastructures/hashmap/images/solutions/hash_map_design_solution_11.png differ diff --git a/datastructures/hashmap/images/solutions/hash_map_design_solution_2.png b/datastructures/hashmap/images/solutions/hash_map_design_solution_2.png new file mode 100644 index 00000000..718d989a Binary files /dev/null and b/datastructures/hashmap/images/solutions/hash_map_design_solution_2.png differ diff --git a/datastructures/hashmap/images/solutions/hash_map_design_solution_3.png b/datastructures/hashmap/images/solutions/hash_map_design_solution_3.png new file mode 100644 index 00000000..15961a05 Binary files /dev/null and b/datastructures/hashmap/images/solutions/hash_map_design_solution_3.png differ diff --git a/datastructures/hashmap/images/solutions/hash_map_design_solution_4.png b/datastructures/hashmap/images/solutions/hash_map_design_solution_4.png new file mode 100644 index 00000000..990c7026 Binary files /dev/null and b/datastructures/hashmap/images/solutions/hash_map_design_solution_4.png differ diff --git a/datastructures/hashmap/images/solutions/hash_map_design_solution_5.png b/datastructures/hashmap/images/solutions/hash_map_design_solution_5.png new file mode 100644 index 00000000..941cadc9 Binary files /dev/null and b/datastructures/hashmap/images/solutions/hash_map_design_solution_5.png differ diff --git a/datastructures/hashmap/images/solutions/hash_map_design_solution_6.png b/datastructures/hashmap/images/solutions/hash_map_design_solution_6.png new file mode 100644 index 00000000..9e652dff Binary files /dev/null and b/datastructures/hashmap/images/solutions/hash_map_design_solution_6.png differ diff --git a/datastructures/hashmap/images/solutions/hash_map_design_solution_7.png b/datastructures/hashmap/images/solutions/hash_map_design_solution_7.png new file mode 100644 index 00000000..12431322 Binary files /dev/null and b/datastructures/hashmap/images/solutions/hash_map_design_solution_7.png differ diff --git a/datastructures/hashmap/images/solutions/hash_map_design_solution_8.png b/datastructures/hashmap/images/solutions/hash_map_design_solution_8.png new file mode 100644 index 00000000..e2e49786 Binary files /dev/null and b/datastructures/hashmap/images/solutions/hash_map_design_solution_8.png differ diff --git a/datastructures/hashmap/images/solutions/hash_map_design_solution_9.png b/datastructures/hashmap/images/solutions/hash_map_design_solution_9.png new file mode 100644 index 00000000..8f7c26cb Binary files /dev/null and b/datastructures/hashmap/images/solutions/hash_map_design_solution_9.png differ diff --git a/datastructures/hashmap/item.py b/datastructures/hashmap/item.py new file mode 100644 index 00000000..facb6004 --- /dev/null +++ b/datastructures/hashmap/item.py @@ -0,0 +1,17 @@ +from typing import Any + + +class Item: + """ + Represents an item in the HashMap + """ + + def __init__(self, key: Any, data: Any): + """ + Creates an instance of an Item in the HashMap. + Args: + key: The key of the item. + data: The data of the item. + """ + self.key = key + self.data = data diff --git a/datastructures/hashmap/test_hashmap.py b/datastructures/hashmap/test_hashmap.py index 8dea995d..4cf588c6 100644 --- a/datastructures/hashmap/test_hashmap.py +++ b/datastructures/hashmap/test_hashmap.py @@ -1,53 +1,36 @@ import unittest -from . import HashMap, Item +from datastructures.hashmap.hash_map import HashMap class HashMapTestCases(unittest.TestCase): def test_hashmap_with_size_of_3_adds_items(self): hashmap = HashMap(3) - item_1 = 1, "something" - hashmap.set(item_1[0], item_1[1]) - - expected_item_1 = Item(item_1[0], item_1[1]) - actual_1 = hashmap.table[1][0] - self.assertEqual(actual_1.key, expected_item_1.key) - self.assertEqual(actual_1.data, expected_item_1.data) + item_1_key = 1 + item_1_value = "something" + hashmap.set(item_1_key, item_1_value) - item_2 = 2, "something" - hashmap.set(item_2[0], item_2[1]) + item_2_key = 2 + item_2_value = "something" + hashmap.set(item_2_key, item_2_value) - expected_item_2 = Item(item_2[0], item_2[1]) - actual_2 = hashmap.table[2][0] - self.assertEqual(actual_2.key, expected_item_2.key) - self.assertEqual(actual_2.data, expected_item_2.data) + item_3_key = 3 + item_3_value = "something" + hashmap.set(item_3_key, item_3_value) - item_3 = 3, "something" - hashmap.set(item_3[0], item_3[1]) - - expected_item_3 = Item(item_3[0], item_3[1]) - actual_3 = hashmap.table[0][0] - self.assertEqual(actual_3.key, expected_item_3.key) - self.assertEqual(actual_3.data, expected_item_3.data) - - item_4 = 4, "something" - hashmap.set(item_4[0], item_4[1]) - - expected_item_4 = Item(item_4[0], item_4[1]) - actual_4 = hashmap.table[1][1] - self.assertEqual(actual_4.key, expected_item_4.key) - self.assertEqual(actual_4.data, expected_item_4.data) + item_4_key = 4 + item_4_value = "something" + hashmap.set(item_4_key, item_4_value) # can handle collisions - item_5 = 2, "something" - hashmap.set(item_5[0], item_5[1]) + item_5_key = 2 + item_5_value = "something" + hashmap.set(item_5_key, item_5_value) - expected_item_5 = Item(item_5[0], item_5[1]) - actual_5 = hashmap.table[2][0] - self.assertEqual(actual_5.key, expected_item_5.key) - self.assertEqual(actual_5.data, expected_item_5.data) + actual_5 = hashmap.get(item_5_key) + self.assertEqual(actual_5, item_5_value) def test_hashmap_can_remove_items(self): hashmap = HashMap(3) @@ -65,19 +48,14 @@ def test_hashmap_can_remove_items(self): hashmap.set(item_5[0], item_5[1]) # raises KeyError for non existent keys - self.assertRaises(KeyError, hashmap.remove, 10) - self.assertRaises(KeyError, hashmap.remove, 0) - - # removes items by keys - - # check that the length is 2 for the key position we want to remove - self.assertEqual(2, len(hashmap.table[1])) + self.assertRaises(KeyError, hashmap.get, 10) + self.assertRaises(KeyError, hashmap.get, 0) # remove key hashmap.remove(1) - # this will indicate the item no longer exists at this position - self.assertEqual(1, len(hashmap.table[1])) + with self.assertRaises(KeyError): + hashmap.get(1) def test_hashmap_can_add_items(self): hashmap = HashMap(3)