您的位置:首页 > 其它

文件压缩总结-哈夫曼树

2017-08-18 16:20 120 查看
详细源代码请移步下载:https://github.com/HsTime/file-campress





项目:文件压缩流程图



建立小堆代码:

#pragma once

#include<iostream>
using namespace std;
#include<vector>
#include"huffman.h"

template<class T>
struct Less
{
bool operator()(const T & l, const T& n)
{
return l < n;
}
};
template<class T>
struct Greater
{
bool operator()(const T& l, const T& n)
{
return l > n;
}
};
template<class T, class Comper>
class Heap
{
public:
Heap()
:heap(NULL)
{}
Heap(T* arr, size_t n)
{
heap.reserve(n);
for (size_t i = 0; i < n; i++)
{
heap.push_back(arr[i]);
}
int parent = (heap.size() - 2) / 2;
while (parent >= 0)
{
_AdjustDown(parent, heap.size() - 1);
parent--;
}
}
T Top()
{
return heap[0];
}
Heap(const vector<T>& x)
{
heap.clear();
heap.resize(x.size(), 0);
for (size_t i = 0; i < x.size(); i++)
{
heap[i] = x[i];
}
int parent = (heap.size() - 2) / 2;
while (parent >= 0)
{
_AdjustDown(parent, heap.size() - 1);
parent--;
}
}

void push(const T& x)
{
heap.push_back(x);
_AdjistUp(heap.size() - 1);
}

void Pop()
{
swap(heap[0], heap[heap.size() - 1]);
heap.pop_back();
_AdjustDown(0, heap.size() - 1);
}
bool Empty()
{
return heap.empty();
}
size_t Size()
{
return heap.size();
}
void Print()
{
vector<int>::iterator it = heap.begin();
while (it != heap.end())
{
cout << *it << " ";
it++;
}
cout << endl;
}

protected:
void _AdjustDown(int root, int size)//Ïòϵ÷Õû
{
Comper comper;
int parent = root;
int child = root * 2 + 1;
while (child <= size)
{
if (child < size && comper(heap[child + 1], heap[child]))
{
child++;
}
if (comper(heap[child], heap[parent]))
{
swap(heap[parent], heap[child]);
parent = child;
child = child * 2 + 1;
}
else
{
break;
}
}
}

void _AdjistUp(int pos)
{
Comper comper;
int child = pos;
int parent = (child - 1) / 2;

while (child > 0)
{
if (comper(heap[child], heap[parent]))
{
swap(heap[child], heap[parent]);
child = parent;
parent = (parent - 1) / 2;
}
else
break;
}
}
private:
vector<T> heap;
};


压缩与解压缩:

#pragma once
#include"huffman.h"
#include<string>
#include<assert.h>

typedef unsigned long long TypeLong;

struct CharInfo
{
unsigned char _ch; //字符
TypeLong _count;//出现次数
string _code;//Huffman编码

CharInfo(TypeLong count = 0)
:_ch(0)
, _count(count)
, _code("")
{}
bool operator!=(const CharInfo& info)const
{
return this->_count != info._count;
}
bool operator<(const CharInfo& info)const
{
return this->_count < info._count;
}
CharInfo operator+(const CharInfo& info)const
{
return CharInfo(this->_count + info._count);
}
};

struct CountInfo
{
unsigned char _ch;        //字符
TypeLong _count;          //字符出现的次数
};

class FileCompress
{
public:
FileCompress();
void CompressHuffCode(const char* filename);
void UnCompressHuffCode(const char* filename);
void PrintCode()const;

protected:
static void GenerateHuffmanCode(HuffmanNode<CharInfo>* root, FileCompress& file, string& code);

protected:
CharInfo _info[256];

};
FileCompress::FileCompress()
{
size_t size = sizeof(this->_info) / sizeof(this->_info[0]);
for (size_t i = 0; i < size; ++i)
{
this->_info[i]._ch = i;
this->_info[i]._count = 0;
}
}

void FileCompress::CompressHuffCode(const char* filename)
{
assert(filename);
FILE* fOut = fopen(filename, "rb");
assert(fOut);
//统计字符出现的次数
char ch = fgetc(fOut);
while (!feof(fOut))
{
++this->_info[(unsigned char)ch]._count;
ch = fgetc(fOut);
}

//建立哈夫曼树
CharInfo invalid;
invalid._count = 0;
HuffmanTree<CharInfo> huffTree(this->_info, sizeof(this->_info) / sizeof(this->_info[0]), invalid);

//生成哈夫曼编码
string code;
HuffmanNode<CharInfo>* root = huffTree.GethuffmanNode();
GenerateHuffmanCode(root, *this, code);

//生成压缩文件名及配置文件名
string fileInName = (string)filename; //压缩文件名
string fileConfig = fileInName;//配置文件名
size_t last_ = fileInName.find_last_of('.');
if (last_ < fileInName.size())
{
fileInName.erase(last_);
fileConfig.erase(last_);
}
fileInName += ".huff";
fileConfig += ".config";

string tmp;
CountInfo info;
//生成压缩配置文件
FILE* fConfig = fopen(fileConfig.c_str(), "wb");
for (size_t i = 0; i < 256; ++i)
{
if (_info[i]._count)
{
info._ch = _info[i]._ch;
info._count = _info[i]._count;
fwrite(&info, sizeof(info), 1, fConfig);
}
}
info._count = -1;
fwrite(&info, sizeof(info), 1, fConfig);

//对文件进行压缩
FILE* fIn = fopen(fileInName.c_str(), "wb");
assert(fIn);
fseek(fOut, 0, SEEK_SET);
int pos = 0;
unsigned char putch = 0;
ch = fgetc(fOut);
while (!feof(fOut))
{
tmp = this->_info[(unsigned char)ch]._code;
for (size_t i = 0; i < tmp.size(); ++i)
{
putch <<= 1;
putch |= (tmp[i] - '0');
if (++pos == 8)
{
fputc(putch, fIn);
pos = 0;
putch = 0;
}
}
ch = fgetc(fOut);
}
if (pos > 0)
{
putch <<= (8 - pos);
fputc(putch, fIn);
}

fclose(fOut);
fclose(fIn);
fclose(fConfig);
}

void FileCompress::GenerateHuffmanCode(HuffmanNode<CharInfo>* root, FileCompress& file, string& code)
{
if (root == NULL)
{
return;
}
if (root->_left == NULL && root->_right == NULL)
{
file._info[root->_weight._ch]._code = code;
return;
}
code.push_back('0');
GenerateHuffmanCode(root->_left, file, code);
code.pop_back();
code.push_back('1');
GenerateHuffmanCode(root->_right, file, code);
code.pop_back();
}

void FileCompress::UnCompressHuffCode(const char* filename)
{
assert(filename);
FILE* fOut = fopen(filename, "rb");
assert(fOut);
//读取文件,
string fileConfig = (string)filename;
string fileInName = fileConfig;
size_t last_ = fileInName.find_last_of('.');
if (last_ < fileInName.size())
{
fileConfig.erase(last_);
fileInName.erase(last_);
}
fileConfig += ".config";
/*fileInName += "_Com.JPG";*/
fileInName += "_Com.mp3";
/*fileInName += "_Com.txt";*/
/*fileInName += "_Com.doc";*/

FILE* fIn = fopen(fileInName.c_str(), "wb");
assert(fIn);
FILE* fConfig = fopen(fileConfig.c_str(), "rb");
assert(fConfig);

CountInfo info;
//读配置信息
while (1)
{
fread(&info, sizeof(CountInfo), 1, fConfig);
if (info._count == -1)
{
break;
}
_info[(unsigned char)info._ch]._ch = info._ch;
_info[(unsigned char)info._ch]._count = info._count;
}

//重建哈夫曼树
HuffmanTree<CharInfo> tree(this->_info, sizeof(this->_info) / sizeof(this->_info[0]), 0);
HuffmanNode<CharInfo>* root = tree.GethuffmanNode();
HuffmanNode<CharInfo>* cur = root;
unsigned char ch = fgetc(fOut);
int pos = 7;
if (ch  == 255)
{
if (info._ch != 0)
while (_info[(unsigned char)info._ch]._count--)
{
{
fputc(info._ch, fIn);
}
}
}
else
{
TypeLong countSum = root->_weight._count;
while (countSum > 0)
{
while (pos >= 0)
{
if ((ch & (1 << pos)) == 0) //向左走
{
cur = cur->_left;
}
else
{
cur = cur->_right;
}
if (cur->_left == NULL && cur->_right == NULL)
{
fputc(cur->_weight._ch, fIn);
//cout << cur->_weight._ch;

if (--countSum == 0)//将没有写的字符的次数减1
break;
cur = root;
}
--pos;
}
pos = 7;
ch = fgetc(fOut);
}
}
fclose(fIn);
fclose(fOut);
fclose(fConfig);
}

void FileCompress::PrintCode()const
{
for (int i = 0; i < 256; ++i)
{
if (this->_info[i]._count != 0)
{
cout << this->_info[i]._ch << ":>" << this->_info[i]._code << endl;
}
}
}


(2)为什么要使用配置文件?

在项目中,将字符对应的编码转化为位,在unsigned char中填充位,填满后就写入到压缩文件中。

问题1:最后一个字节是不是很有可能没有填满,该如何判断他是否填满以及填了几个字符的编码?

问题2:若依次压缩一些文件,压缩完后再去解压,那么编码此时已经没有了,该如何解压?

上面的两个问题可通过配置文件解决,假如要压缩的文件叫xxx,那么可生成一个xxx.config的配置文件,在该配置文件中写入<文件的总长度>(恢复时知道应该应该恢复多少个字符),(字符以及其出现的次数,用于解压时重建哈夫曼树),利用该配置文件即可解决这两个问题。
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  压缩 哈夫曼编码