您的位置:首页 > 理论基础 > 数据结构算法

数据结构Python实现之哈希表,字典以及集合

2018-07-31 17:26 716 查看

哈希表:

class Array(object):#定义一个数组,用于实现哈希表
def __init__(self,size=32,init=None):
self._size = size
self._items = [init] * size
def __getitem__(self, index):
return self._items[index]
def __setitem__(self, index, value):
self._items[index] = value
def __len__(self):
return self._size
def clear(self,value=None):
for i in range(len(self._items)):
self._items[i] = value
def __iter__(self):
for item in self._items:
yield item

'''
定义一个hash表数组的槽
注意,一个槽有三种状态。
1.从未使用HashTable.UNUSED。此槽没有被使用和冲突过,查找时只要找到UNUSED就不用再继续探查了
2.使用过但是remove了,此时是HashTable.EMPTY,该探查点后边的元素扔可能是有key
3.槽正在使用Slot节点
'''
class Slot(object):
def __init__(self,key,value):
self.key = key
self.value = value
#定义一个哈希表类
class HashTable(object):
UNUSED = None #槽没被使用过(槽指的是数组中的一个位置)
EMPTY = Slot(None,None)#使用过却被删除了

def __init__(self):
self._table = Array(8,init=HashTable.UNUSED)#初始化槽没被使用过
self.length = 0
@property
def _load_factor(self):#装载因子(load factor),当装载因子超过某一阈值时,需要进行重哈希操作
return self.length/float(len(self._table))
def __len__(self):#哈希表的长度
return self.length
def _hash(self,key):#定义hash函数,返回哈希表数组的索引
return abs(hash(key))%len(self._table)
def _find_key(self,key):#搜寻key键,如果存在,返回key键对应的索引,否则返回None
index = self._hash(key)
_len = len(self._table)
while self._table[index] is not HashTable.UNUSED:
if self._table[index] is HashTable.EMPTY:
index = (index*5 +1)%_len#解决哈希冲突的一种方式
continue
elif self._table[index].key == key:
return index
else:
index = (index*5 + 1)%_len
return None
def _find_slot_for_insert(self,key):#寻找哈希表中可以插入的槽,
index = self._hash(key)
_len = len(self._table)
while not self._slot_can_insert(index):
index = (index * 5 + 1)%_len#解决哈希冲突的一种方式
return  index
def _slot_can_insert(self,index):#判断发现的槽能否插入,空槽或从未被使用即返回True
return (self._table[index] is HashTable.EMPTY or self._table[index] is HashTable.UNUSED)
def __contains__(self, key):#in 操作符,判断key是否在hash表里,如果存在返回其索引,否则返回None
index = self._find_key(key)
return index is not None
def add(self,key,value):#添加元素操作,有 key 则更新,否则插入
if key in self:#调用__contains__判断key是否在哈希表中
index = self._find_key(key)#搜寻key的索引
self._table[index].value = value#更新哈希表中key对应的value
return False
else:#如果key不再hash表中,则添加元素Slot(key,value),并更新长度
index = self._find_slot_for_insert(key)
self._table[index] = Slot(key,value)
self.length += 1
if self._load_factor >= 0.8:#如果装载因子大于0.8,则进行重哈希操作
self._rehash()
return True

# '''
# 重哈希(Rehashing),
# 步骤就是重新开辟一块新的空间。本函数里的策略是扩大为已经使用的槽数目的两倍。
# 开辟了新空间以后,会把原来哈希表里不为空槽的数据重新插入到新的哈希表里,
# 插入方式和之前一样。这就是 rehashing 操作。
# '''
def _rehash(self):
old_table = self._table
newsize = len(self._table) * 2
self._table = Array(newsize,HashTable.UNUSED)#定义新的哈希表
self.length = 0#初始长度为0
for slot in old_table:#把原来哈希表里不为空槽的数据重新插入到新的哈希表里
if slot is not HashTable.UNUSED and slot is not HashTable.EMPTY:
index = self._find_slot_for_insert(slot.key)
self._table[index]=slot
self.length += 1
def get(self,key,default = None):#获取 key 的值,不存在返回默认值 None
index = self._find_key(key)
if index is None:
return default
else:
return self._table[index].value
def remove(self,key):#删除一个 key,这里其实不是真删除,而是标记为 Empty
index = self._find_key(key)
if index is None:
raise KeyError()
value = self._table[index].value
self.length -= 1
self._table[index] = HashTable.EMPTY
return value
def __iter__(self):
for slot in self._table:
if slot not in (HashTable.EMPTY,HashTable.UNUSED):
yield slot.key
'''
添加操作:
h = HashTable()
h.add('a', 0)
h.add('b', 1)
h.add('c', 2)
print('hash表的长度为:',len(h))
print(h.get('a')==0)
print(h.get('b')==1)
print(h.get('c')==2)
print(h.get('w') is None)
print('遍历输出:')
for i in h:
print(i,':',h.get(i),end=',')

输出:
hash表的长度为: 3
True
True
True
True
遍历输出:
b : 1,c : 2,a : 0,
'''
'''
删除操作:
h = HashTable()
h.add('a', 0)
h.add('b', 1)
h.add('c', 2)
h.add('x', 10)
h.add('y', 11)
h.add('z', 12)
print('hash表的长度为:',len(h))
print('get()返回值:',h.get('a'),';','remove()返回值:',h.remove('a'))
print('get()返回值:',h.get('c'),';','remove()返回值:',h.remove('c'))
print('get()返回值:',h.get('x'),';','remove()返回值:',h.remove('x'))
print('删除操作后hash表的长度为:',len(h))
print('遍历输出:')
for i in h:
print(i,':',h.get(i),end=',')
输出:
hash表的长度为: 6
get()返回值: 0 ; remove()返回值: 0
get()返回值: 2 ; remove()返回值: 2
get()返回值: 10 ; remove()返回值: 10
删除操作后hash表的长度为: 3
遍历输出:
y : 11,z : 12,b : 1,
'''
'''
重哈希
h = HashTable()#哈希表的默认大小为8
for i in range(20):#添加元素超过装载因子,此时会进行重哈希操作
h.add(i, i)
for i in range(20):
print(h.get(i)==i,end=',')

print('hash表的长度为:',len(h))

输出:
True,True,True,True,True,True,True,True,
True,True,True,True,True,True,True,True,
True,True,True,True,
hash表的长度为: 20
'''

字典:

class Array(object):
def __init__(self,size=32,init=None):
self._size = size
self._items = [init] * size
def __getitem__(self, index):
return self._items[index]
def __setitem__(self, index, value):
self._items[index] = value
def __len__(self):
return self._size
def clear(self,value=None):
for i in range(len(self._items)):
self._items[i] = value
def __iter__(self):
for item in self._items:
yield item
class Slot(object):
def __init__(self,key,value):
self.key = key
self.value = value

class Hashable(object):
UNUSED = None
EMPTY = Slot(None,None)

def __init__(self):
self._table = Array(8,init=Hashable.UNUSED)
self.length = 0
@property
def _load_factor(self):
return self.length/float(len(self._table))
def __len__(self):
return self.length
def _hash(self,key):
return abs(hash(key))%len(self._table)
def _find_key(self,key):
index = self._hash(key)
_len = len(self._table)
while self._table[index] is not Hashable.UNUSED:
if self._table[index] is Hashable.EMPTY:
index = (index * 5 + 1 )%_len
continue
elif self._table[index].key==key:
return index
else:
index = (index * 5 + 1)%_len
return None
def _find_slot_for_insert(self,key):
index = self._hash(key)
_len = len(self._table)
while not self._slot_can_insert(index):
index = (index * 5 + 1)%_len
return index
def _slot_can_insert(self,index):
return (self._table[index] is Hashable.EMPTY or self._table[index] is Hashable.UNUSED)
def __contains__(self, key):
index = self._find_key(key)
return index is not None
def add(self,key,value):
if key in self:
index = self._find_key(key)
self._table[index].value = value
return False
else:
index = self._find_slot_for_insert(key)
self._table[index] = Slot(key,value)
self.length += 1
if self._load_factor>=0.8:
self._rehash()
return True
def _rehash(self):
old_table = self._table
newsize = len(self._table)*2
self._table = Array(newsize,Hashable.UNUSED)
self.length = 0
for slot in old_table:
if slot is not Hashable.UNUSED and slot is not Hashable.EMPTY:
index = self._find_slot_for_insert(slot.key)
self._table[index]=slot
self.length += 1
def get(self,key,default=None):
index = self._find_key(key)
if index is None:
return default
else:
return self._table[index].value
def remove(self,key):
index = self._find_key(key)
if index is None:
raise KeyError()
value = self._table[index].value
self.length -= 1
self._table[index] = Hashable.EMPTY
return value
def __iter__(self):
for slot in self._table:
if slot not in (Hashable.EMPTY,Hashable.UNUSED):
yield slot.key

class Dict(Hashable):

def _iter_slot(self):
for slot in self._table:
if slot not in (Hashable.EMPTY,Hashable.UNUSED):
yield slot
def __setitem__(self, key, value):
self.add(key,value)
def __getitem__(self, key):
if key not in self:
raise KeyError()
else:
return self.get(key)
def items(self):
for slot in self._iter_slot():
yield (slot.key,slot.value)
def keys(self):
for slot in self._iter_slot():
yield slot.key
def values(self):
for slot in self._iter_slot():
yield slot.value
# '''
# 通过指定的key值,删除字典的一个键值对
# 返回被删除的key对应的value
# '''
def pop(self,key):
return self.remove(key)
def clear(self):#清空字典
for index in range(len(self._table)):
self._table[index] = Hashable.EMPTY
def update(self,other):
self_list=[]
other_list=[]
for key,value in self.items():
self_list.append((key,value))
for key,value in other.items():
other_list.append((key,value))
copy_list = self_list.copy()
copy_list.extend(other_list)
for key_value in set(copy_list):
if key_value in other_list and key_value not in self_list:
self.add(key_value[0],key_value[1])
else:
continue

'''
d = Dict()
d['a']=1
d['b']=2
d['c']=3
d['d']=4
d['e']=5
print('字典长度:',len(d))
print('遍历字典:')
for key,value in d.items():
print((key,value))
print('key,get(key)获取键值对:')
for key in d.keys():
print((key,d.get(key)))

输出:
字典长度: 5
遍历字典:
('d', 4)
('b', 2)
('e', 5)
('a', 1)
('c', 3)
key,get(key)获取键值对:
('d', 4)
('b', 2)
('e', 5)
('a', 1)
('c', 3)
'''

'''
d = Dict()
d['a']=1
d['b']=2
d['c']=3
d['d']=4
d['e']=5
d.pop('a')
d.pop('d')
for key,value in d.items():
print((key,value))
输出:
('e', 5)
('b', 2)
('c', 3)
'''
'''
d = Dict()
d['a']=1
d['b']=2
d['c']=3
d['d']=4
d['e']=5
d.clear()
for key,value in d.items():
print((key,value))
'''
'''
当d1和d2的键没有交集的时候
d1=Dict()
d2=Dict()
d1['a']=1
d1['b']=2
d1['c']=3
d2['x']=11
d2['y']=22
d2['z']=33
print('d1:')
for key,value in d1.items():
print((key,value))
print('d2:')
for key,value in d2.items():
print((key,value))
d1.update(d2)
print('更新后的d1:')
for key,value in d1.items():
print((key,value))
输出:
d1:
('c', 3)
('a', 1)
('b', 2)
d2:
('y', 22)
('z', 33)
('x', 11)
更新后的d1:
('c', 3)
('y', 22)
('a', 1)
('z', 33)
('b', 2)
('x', 11)

'''
'''
当d1和d2的键有交集的时候
d1=Dict()
d2=Dict()
d1['a']=1
d1['b']=2
d1['c']=3
d2['a']=1
d2['b']=22
d2['x']=33
print('d1:')
for key,value in d1.items():
print((key,value))
print('d2:')
for key,value in d2.items():
print((key,value))
d1.update(d2)
print('更新后的d1:')
for key,value in d1.items():
print((key,value))
输出:
d1:
('a', 1)
('c', 3)
('b', 2)
d2:
('a', 1)
('x', 33)
('b', 22)
更新后的d1:
('a', 1)
('c', 3)
('x', 33)
('b', 22)
'''

集合:

class Set(HashTable):
def add(self,key):
# 集合其实就是一个 dict,只不过我们把它的 value 设置成 1
return super(Set,self).add(key,True)
def __and__(self, other):
#交集: A & B,表示同时在 A 和 B 中的元素。 python 中重载  `__and__` 实现
new_set = Set()
for element_a in self:
if element_a in other:
new_set.add(element_a)
return new_set
def __sub__(self, other):
# 差集:  A - B,表示在 A 中但是不在 B 中的元素。 python 中重载 `__sub__` 实现
new_set = Set()
for element_a in self:
if element_a not in other:
new_set.add(element_a)
return new_set
def __or__(self, other):
#并集: A | B,表示在 A 或者 B 中的元素,两个集合相加。python 中重载 `__or__` 实现
new_set = Set()
for element_a in self:
new_set.add(element_a)
for element_b in other:
new_set.add(element_b)
return new_set
def __xor__(self, other):#对称差: A ^ B,返回在 A 或 B 但是不在 A、B 中都出现的元素。其实就是 (A|B) - (A&B), python 中重载 `__xor__` 实现
new_set= Set()
for element_a in self:
if element_a not in other:
new_set.add(element_a)
for element_b in other:
if element_b not in self:
new_set.add(element_b)
return new_set
def pop(self):#删除集合中的第一个元素
for element in self:
self.remove(element)
break
def remove(self, key):#删除集合中指定的元素
super(Set,self).remove(key)
'''
remove()操作
s1=Set()
s1.add(1)
s1.add(6)
s1.add(4)
s1.add(9)
s1.add(3)
s1.add(8)
print(list(s1))
s1.remove(8)
print(list(s1))
s1.remove(1)
print(list(s1))
s1.remove(4)
print(list(s1))
s1.remove(9)
print(list(s1))
输出:
[8, 1, 3, 4, 6, 9]
[1, 3, 4, 6, 9]
[3, 4, 6, 9]
[3, 6, 9]
[3, 6]
'''
'''
pop()操作
s1=Set()
s1.add(1)
s1.add(6)
s1.add(4)
s1.add(9)
s1.add(3)
s1.add(8)
print(list(s1))
s1.pop()
print(list(s1))
s1.pop()
print(list(s1))
s1.pop()
print(list(s1))
s1.pop()
print(list(s1))
s1.pop()
print(list(s1))
输出:
[8, 1, 3, 4, 6, 9]
[1, 3, 4, 6, 9]
[3, 4, 6, 9]
[4, 6, 9]
[6, 9]
[9]
'''
'''
s1=Set()
s1.add(1)
s1.add(2)
s1.add(3)
print(1 in s1)
s2=Set()
s2.add(1)
s2.add(0)
s2.add(3)
s2.add(7)
s2.add(8)
s2.add(9)
print('s1和s2的交集:')
print(list(s1 & s2))
print('s1和s2的差集:')
print(list(s1 - s2))
print('s2和s1的差集:')
print(list(s2 - s1))
print('s1和s2的并集:')
print(list(s2 | s1))
print('s1和s2的对称差:')
print(list(s1 ^ s2))
输出:
True
s1和s2的交集:
[1, 3]
s1和s2的差集:
[2]
s2和s1的差集:
[0, 9, 8, 7]
s1和s2的并集:
[0, 1, 2, 3, 7, 8, 9]
s1和s2的对称差:
[0, 9, 2, 8, 7]
'''
阅读更多
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: