Note_Tech

All technological notes.


Project maintained by simonangel-fong Hosted on GitHub Pages — Theme by mattgraham

DSA - Hash Table

Back


Hash Table


Time Complexity


Hash Function


Collision Resolution


Implement Hash Table

Constructor

class HashTable:
    def __init__(self, size=7):
        self.data_map = [None]*size

    def __hash(self, key):
        my_hash = 0
        for letter in key:
            # ord(): return ascii number of each letter
            # 23 is a prime number; Here can be any prime number
            my_hash = (my_hash + ord(letter)*23) % len(self.data_map)
        return my_hash

    def print_table(self):
        for i, val in enumerate(self.data_map):
            print(i, ':', val)


my_hash_table = HashTable()

my_hash_table.print_table()

Set_value(): O(1)

def set_item(self, key, value):
    # compute the hash index
    index = self.__hash(key)
    # when the index is empty, then initiate with a list
    if self.data_map[index] is None:
        self.data_map[index] = []
    # if the index is empty, it has been fill with an empty list, then append key-value pair
    # if the index has been filled, append key-value pair to the existing list.
    self.data_map[index].append([key, value])

Get_value(): O(1)

def get_item(self, key):
    index = self.__hash(key)

    if self.data_map[index] is None:
        return None

    for item in self.data_map[index]:
        if item[0] == key:
            return item[1]

    return None

Keys(): O(1)

def keys(self):
    all_keys = []
    for i in range(len(self.data_map)):
        if self.data_map[i] is not None:
            for j in range(len(self.data_map[i])):
                all_keys.append(self.data_map[i][j][0])
    return all_keys

Implement Hash Table in Python

class hash_table(object):

    def __init__(self, size):
        self.size = size
        self.slot = [None] * self.size
        self.data = [None] * self.size

    def __getitem__(self, key):
        return self._get(key)

    def __setitem__(self, key, item):
        self._put(key, item)

    def _put(self, key, item):
        hash_code = self.hash_function(key)

        # if slot is empty
        if self.slot[hash_code] == None:
            self.slot[hash_code] = key
            self.data[hash_code] = item

        # when colision
        else:

            # open addressing, probe for next empty slot
            # loop until next slot is empty or the key exists
            while self.slot[hash_code] != None and self.slot[hash_code] != key:
                hash_code = self.rehash_function(hash_code)

            # when the key exists, overwrite data
            # self.slot[hash_code] == key
            if self.slot[hash_code] != None:
                self.data[hash_code] = item

            # when the slot is empty, store key and data
            # self.slot[hash_code] == None
            if self.slot[hash_code] != key:
                self.slot[hash_code] = key
                self.data[hash_code] = item

    def _get(self, key):
        start_code = hash_code = self.hash_function(key)
        # print(self.slot)
        # if slot is not empty but not match,  rehash
        # when colision
        if self.slot[hash_code] != None and self.slot[hash_code] != key:
            # rehash until next None, or key matches
            while self.slot[hash_code] != None and self.slot[hash_code] != key:
                hash_code = self.rehash_function(hash_code)

                # in case of slot full and not matches, when loop back to start, break
                if hash_code == start_code:
                    break

            # when next slot is None, or when key matches, return data
            # return might be None
            return self.data[hash_code]

        # when not colision
        # return might be None, or data
        else:
            return self.data[hash_code]

    def hash_function(self, key):
        # Remainder Method
        if isinstance(key, str):
            return sum([ord(ch) for ch in key]) % len(self.slot)
        return key % len(self.slot)

    def rehash_function(self, old_hash_code):
        return (old_hash_code+1) % len(self.slot)     # linear probing


Classic: Check if common item exists

# method using set(): python method
def item_in_common(list1, list2):
    s1 = set(list1)
    s2 = set(list2)

    return len(s1.intersection(s2)) > 0


# using for loop: O(n^2)
def item_in_common(list1, list2):
    for i in list1:
        for j in list2:
            if i == j:
                return True
    return False


# method using hash table: O(1)
def item_in_common(list1, list2):
    my_dict = {}
    for i in list1:
        my_dict[i] = True

    for j in list2:
        if j in my_dict:
            return True

    return False


list1 = [1,3,5]
list2 = [2,4,6]

print(item_in_common(list1, list2))

TOP