您的位置:首页 > 编程语言 > Python开发

贪心算法 赫夫曼编码 使用小顶堆性质 python 代码实现 示例

2018-01-01 00:00 811 查看
# utf-8=coding
import math
# 算法导论课本   贪心算法   赫夫曼编码 实现简单文本压缩  python 代码实现 示例
# 构造赫夫曼编码

# 赫夫曼实体类
class HuffmNode:
def __init__(self, data, freq):
self.data = data
self.freq = freq
self.left = None
self.right = None
self.code = ''

def addFreq(self):
self.freq += 1

def change_node(x, y, A):
# 可能涉及深浅copy,我们这里不做深究
temp = A[x]
A[x] = A[y]
A[y] = temp

# 维护小顶堆性质
def min_heapify(A, i):
heap_size = A[0]
left = 2 * i
right = 2 * i + 1
min = i
if left <= heap_size and A[left].freq < A[i].freq:
min = left
if right <= heap_size and A[right].freq < A[min].freq:
min = right
if min != i:
change_node(min, i, A)
min_heapify(A, min)

# 建小顶堆
def build_min_heap(A):
mid_num = math.floor(len(A) / 2)
for i in range(mid_num, 0, -1):
min_heapify(A, i)

# 堆排序
def heap_sort(A):
for i in range(len(A) - 1, 1, -1):
temp = A[1]
A[1] = A[i]
A[i] = temp
A[0] -= 1
min_heapify(A, 1)

# 返回最小元素值(即堆顶元素),继续保持小顶堆性质
def heap_extract_min(A):
if A[0] < 1:
print("堆为空!!!")
return 0
min = A[1]
# 把最后一个元素值 赋值给 A[1]
A[1] = A[A[0]]
# 删除最后一个元素
del A[A[0]]
A[0] -= 1
min_heapify(A, 1)
return min

# 插入一个小于 0 的元素后  保持小顶堆性质
def min_heap_insert(A, key):
A[0] += 1
A.append(key)
# 父节点 和  A[A[0]](插入的值) 作比较
i = A[0]
while i > 1 and A[math.floor(i / 2)].freq > A[i].freq:
temp = A[math.floor(i / 2)]
A[math.floor(i / 2)] = A[i]
A[i] = temp
i = math.floor(i / 2)

def get_huffm_code(root, code, leap_list):
# 往左走向下遍历,哈夫曼编码加0
if root.left != None:
get_huffm_code(root.left, code + "0", leap_list)
# 往右走,哈夫曼编码加1
if root.right != None:
get_huffm_code(root.right, code + "1", leap_list)
# 如果是叶子节点,则设置该叶子节点的哈夫曼编码code
if root.left == None and root.right == None:
root.code = code
temp = root
leap_list.append(temp)

# ————————————————————————————下面为    Huffman     主函数体-----------------------------
# 打开输入文件 文本内容存到 code
# filename = input("Enter name the grade file: ")
filename = "test.txt"
infile = open(filename, 'r')
code = ''
for line in infile:
code += line
print(code)
infile.close()
# 字符查重数组
index_list = [0, ]
# 节点数组  初始化,统计字符频率
hf_list = [0, ]
for i in range(0, len(code)):
if code[i] in index_list:
index = index_list.index(code[i])
hf_list[index].addFreq()
else:
index_list.append(code[i])
node = HuffmNode(code[i], 1)
hf_list.append(node)
print("统计不同字符有:")
index_list[0] = len(index_list) - 1
print(index_list)
# 节点个数存放在 hf_list[0]
hf_list[0] = len(hf_list) - 1
# 构造以属性freq为关键字的最小优先队列(建小顶堆)
build_min_heap(hf_list)
print("初始化的小顶堆为:")
for i in range(1,hf_list[0]):
print("  ", hf_list[i].freq, end='')
print("")
# 哈夫曼核心算法,构造最优编码二叉树
for i in range(1, hf_list[0]):
Z = HuffmN
7fe0
ode('-', 0)
Z.left = heap_extract_min(hf_list)
Z.right = heap_extract_min(hf_list)
Z.freq = Z.left.freq + Z.right.freq
min_heap_insert(hf_list, Z)

# 给叶子结点的 code 赋值,并集中叶子结点到新的leap_list
hf_code = ''
leap_list = []
get_huffm_code(hf_list[1], hf_code, leap_list)
print("叶子结点个数为(即文本中字符个数)", len(leap_list))

# 生成压缩后的01二进制编码
compress_code = ''
for k in range(0, len(code)):
char = code[k]
for i in range(0, len(leap_list)):
if leap_list[i].data == char:
compress_code += leap_list[i].code

print("原文本字符长度为:", len(code))
print("原文本不同字符个数为:", index_list[0])
need = 5 * len(code)
print("压缩前需要二进制位数:%d * %d = %d" % (5, len(code), 5 * len(code)))
print("生成压缩后的01二进制编码长度为:", len(compress_code))
print("因此此次该文本压缩率位", len(compress_code) / need)
print("生成压缩后的01二进制编码为:")
print(compress_code)
# 输出到压缩文本
outfile = open('compress_code.txt', 'w')
outfile.writelines(compress_code)
outfile.close()
print("生成压缩二进制文本compress_code.txt,暂告结束!!!!")
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息