All technological notes.
Hashing
Hash table
O(1) time.Slots
Characteristics of Hash
O(0).Hashing function
Perfect hash function
Non-integer elements
strings as a sequence of ordinal values.
h(item)=sum(ordinal values)%len(slot_arr)
Remainder Method
remainder hash function then is:
h(item)=item%len(slot_arr)
Folding Method
These pieces are then added together to give the resulting hash value.
h(item)=sum(group_digits)%len(slot_arr)
Mid Square Method
collision
load factor
numberofitems / tablesizeO(1).需要越低越好, 减少同一 slot 重复出现可能,从而提高效率.Open addressing
We could start at the original hash value position and then move in a sequential manner through the slots until we encounter the first slot that is empty.
rehashing
linear probing
quadratic probing
Separate Chaining
class HashTable:
def __init__(self, size=7):
self.data_map = [None]*size
def __hash(self, key):
my_hash = 0
for letter in key:
# ord(): return ascii number of each letter
# 23 is a prime number; Here can be any prime number
my_hash = (my_hash + ord(letter)*23) % len(self.data_map)
return my_hash
def print_table(self):
for i, val in enumerate(self.data_map):
print(i, ':', val)
my_hash_table = HashTable()
my_hash_table.print_table()
O(1)def set_item(self, key, value):
# compute the hash index
index = self.__hash(key)
# when the index is empty, then initiate with a list
if self.data_map[index] is None:
self.data_map[index] = []
# if the index is empty, it has been fill with an empty list, then append key-value pair
# if the index has been filled, append key-value pair to the existing list.
self.data_map[index].append([key, value])
O(1)def get_item(self, key):
index = self.__hash(key)
if self.data_map[index] is None:
return None
for item in self.data_map[index]:
if item[0] == key:
return item[1]
return None
O(1)def keys(self):
all_keys = []
for i in range(len(self.data_map)):
if self.data_map[i] is not None:
for j in range(len(self.data_map[i])):
all_keys.append(self.data_map[i][j][0])
return all_keys
using a list with each element initialized to the special Python value None.
The idea of a dictionary used as a hash table to get and retrieve items using keys is often referred to as a mapping. In our implementation we will have the following methods:
in the map in Return True for a statement of the form key in map, if the given key is in the map, False otherwise.
hash function 将 key 转化为 hash code, 根据 hash code 将 key 存入 相应的 index 的 slot; 将 item 存入相应 index 的 data.class hash_table(object):
def __init__(self, size):
self.size = size
self.slot = [None] * self.size
self.data = [None] * self.size
def __getitem__(self, key):
return self._get(key)
def __setitem__(self, key, item):
self._put(key, item)
def _put(self, key, item):
hash_code = self.hash_function(key)
# if slot is empty
if self.slot[hash_code] == None:
self.slot[hash_code] = key
self.data[hash_code] = item
# when colision
else:
# open addressing, probe for next empty slot
# loop until next slot is empty or the key exists
while self.slot[hash_code] != None and self.slot[hash_code] != key:
hash_code = self.rehash_function(hash_code)
# when the key exists, overwrite data
# self.slot[hash_code] == key
if self.slot[hash_code] != None:
self.data[hash_code] = item
# when the slot is empty, store key and data
# self.slot[hash_code] == None
if self.slot[hash_code] != key:
self.slot[hash_code] = key
self.data[hash_code] = item
def _get(self, key):
start_code = hash_code = self.hash_function(key)
# print(self.slot)
# if slot is not empty but not match, rehash
# when colision
if self.slot[hash_code] != None and self.slot[hash_code] != key:
# rehash until next None, or key matches
while self.slot[hash_code] != None and self.slot[hash_code] != key:
hash_code = self.rehash_function(hash_code)
# in case of slot full and not matches, when loop back to start, break
if hash_code == start_code:
break
# when next slot is None, or when key matches, return data
# return might be None
return self.data[hash_code]
# when not colision
# return might be None, or data
else:
return self.data[hash_code]
def hash_function(self, key):
# Remainder Method
if isinstance(key, str):
return sum([ord(ch) for ch in key]) % len(self.slot)
return key % len(self.slot)
def rehash_function(self, old_hash_code):
return (old_hash_code+1) % len(self.slot) # linear probing
# method using set(): python method
def item_in_common(list1, list2):
s1 = set(list1)
s2 = set(list2)
return len(s1.intersection(s2)) > 0
# using for loop: O(n^2)
def item_in_common(list1, list2):
for i in list1:
for j in list2:
if i == j:
return True
return False
# method using hash table: O(1)
def item_in_common(list1, list2):
my_dict = {}
for i in list1:
my_dict[i] = True
for j in list2:
if j in my_dict:
return True
return False
list1 = [1,3,5]
list2 = [2,4,6]
print(item_in_common(list1, list2))