您的位置:首页 > 其它

文件压缩

2016-08-01 17:49 288 查看
项目名称《文件压缩》

开发环境:windows,vs2013

所用到的技术:heap,哈夫曼树,哈夫曼编码,c++

文件压缩项目的大致思路:

1.利用小堆进行构建哈夫曼树,然后利用哈夫曼树获得要压缩的每个字符的哈夫曼编码

2.利用哈夫曼编码对源文件进行压缩,就是在压缩文件中按顺序存入各字符对应的哈夫曼编码,并编写配置文件

3.利用配置文件获得各个字符及字符出现的次数,再次用小堆构建哈夫曼树

4.利用哈夫曼树对压缩文件进行解压。

下边是具体实现的代码:

Heap.h --->构建小堆

#pragma once
#include <vector>
#include<assert.h>

// 小堆
template<class T>
struct Less
{
bool operator() (const T& l, const T& r)
{
return l < r;
}
};

template<class T>
struct Greater
{
bool operator() (const T& l, const T& r)
{
return l > r;
}
};

template<class T, class Compare = Less<T>>
class Heap
{
public:
Heap()
{}

Heap(const T* a, size_t size)
{
for (size_t i = 0; i < size; ++i)
{
_arrays.push_back(a[i]);
}

// 建堆
for (int i = (_arrays.size() - 2) / 2; i >= 0; --i)
{
AdjustDown(i);
}
}

void Push(const T& x)
{
_arrays.push_back(x);
AdjustUp(_arrays.size() - 1);
}

void Pop()
{
assert(_arrays.size() > 0);
swap(_arrays[0], _arrays[_arrays.size() - 1]);
_arrays.pop_back();

AdjustDown(0);
}

T& Top()
{
assert(_arrays.size() > 0);
return _arrays[0];
}

bool Empty()
{
return _arrays.empty();
}

int Size()
{
return _arrays.size();
}

void AdjustDown(int root)
{
size_t child = root * 2 + 1;

Compare com;
while (child < _arrays.size())
{
if (child + 1<_arrays.size() &&
com(_arrays[child + 1], _arrays[child]))
{
++child;
}

if (com(_arrays[child], _arrays[root]))
{
swap(_arrays[child], _arrays[root]);
root = child;
child = 2 * root + 1;
}
else
{
break;
}
}
}

void AdjustUp(int child)
{
int parent = (child - 1) / 2;

while (child > 0)
{
if (Compare()(_arrays[child], _arrays[parent]))
{
swap(_arrays[parent], _arrays[child]);
child = parent;
parent = (child - 1) / 2;
}
else
{
break;
}
}
}

void Print()
{
for (size_t i = 0; i < _arrays.size(); ++i)
{
cout << _arrays[i] << " ";
}
cout << endl;
}

public:
/*T* _array;
size_t _size;
size_t _capacity;*/

vector<T> _arrays;
};


哈夫曼树的实现代码 HuffmanTree.h :

#pragma once
#include "Heap.h"
#include<assert.h>

template<class T>
struct HuffmanTreeNode
{
HuffmanTreeNode<T>* _left;
HuffmanTreeNode<T>* _right;
HuffmanTreeNode<T>* _parent;
T _weight;

HuffmanTreeNode(const T& x)
:_weight(x)
, _left(NULL)
, _right(NULL)
, _parent(NULL)
{}
};

template<class T>
class HuffmanTree
{
typedef HuffmanTreeNode<T> Node;

public:

HuffmanTree()
:_root(NULL)
{}

~HuffmanTree()
{
Destory(_root);
}

template <class T>
struct NodeCompare
{
bool operator()(Node *l, Node *r)
{
return l->_weight < r->_weight;
}
};

public:
void CreatTree(const T* a, size_t size, const T& invalid)
{
assert(a);
Heap<Node*, NodeCompare<T>> minHeap;
for (size_t i = 0; i < size; ++i)
{
if (a[i] != invalid)
{
Node* node = new Node(a[i]);
minHeap.Push(node);
}
}

while (minHeap.Size() > 1)
{
Node* left = minHeap.Top();
minHeap.Pop();
Node* right = minHeap.Top();
minHeap.Pop();

Node* parent = new Node(left->_weight + right->_weight);
parent->_left = left;
parent->_right = right;
left->_parent = parent;
right->_parent = parent;

minHeap.Push(parent);
}

_root = minHeap.Top();
}

Node* GetRootNode()
{
return _root;
}

void Destory(Node* root)
{
if (root)
{
Destory(root->_left);
Destory(root->_right);
delete root;
root = NULL;
}
}
private:
HuffmanTreeNode<T>* _root;
};


文件压缩的实现 ---> FileCompress.h

#pragma once
#include"HuffmanTree.h"
#include<algorithm>
#include<windows.h>
#include<string.h>
using namespace std;

typedef long long Longtype;//为了扩大其范围,int型能处理的范围已经不能满足,所以定义Long Long型予以表示

struct CharInfo
{
unsigned char _ch;//这里必须为unsigned,否则会造成截断,所以从-128~127调至0~255.
Longtype _count;
string _code;

CharInfo(unsigned char ch = 0)
:_ch(ch)
, _count(0)
{}

CharInfo operator+(CharInfo& file)
{
CharInfo tmp;
tmp._count = this->_count + file._count;
return tmp;
}

bool operator < (CharInfo& file)
{
return this->_count < file._count;
}

bool operator != (const CharInfo& file)const
{
return this->_count != file._count;
}
};

template<class T>
class FileCompress
{
public:
FileCompress()
{
for (int i = 0; i < 256; ++i)
{
_arr[i]._ch = i;
}
}

public:

bool Compress(const char* filename)
{
//1.打开文件,统计文件字符出现的次数
long long Charcount = 0;
assert(filename);
FILE* fOut = fopen(filename, "rb");//"rb"为以二进制方式读取文件,这里的b就是binary。"wb"为以二进制方式写入文件
assert(fOut);					//以二进制和文本打开方式区别在于:以文本打开方式会将\r\n
//转换为\n,二进制这不会有这样的转换
char ch = fgetc(fOut);

while (ch != EOF)
{
_arr[(unsigned char)ch]._count++;
ch = fgetc(fOut);
Charcount++;
}

//2.生成对应的huffman编码
GenerateHuffmanCode();

//3.文件压缩
string compressFile = filename;
compressFile += ".compress";
FILE* fwCompress = fopen(compressFile.c_str(), "wb");
assert(fwCompress);

fseek(fOut, 0, SEEK_SET);
ch = fgetc(fOut);
char inch = 0;
int index = 0;
while (!feof(fOut))
{
string& code = _arr[(unsigned char)ch]._code;
for (size_t i = 0; i < code.size(); ++i)
{
inch = inch << 1;
if (code[i] == '1')
{
inch |= 1;
}
if (++index == 8)//对于形成的长串字符编码的切割,每8个bit为一个字节,便于读取
{
fputc(inch, fwCompress);
inch = 0;
index = 0;
}
}
ch = fgetc(fOut);
}

if (index)//考虑到可能会有切割完,剩余的字符码不够填充8个bit位的情况
{
inch = inch << (8 - index);
fputc(inch, fwCompress);
}

//4.配置文件,方便后续的解压缩;
string configFile = filename;
configFile += ".config";
FILE *fconfig = fopen(configFile.c_str(), "wb");
assert(fconfig);

char CountStr[128];
_itoa(Charcount >> 32, CountStr, 10);
fputs(CountStr, fconfig);
fputc('\n', fconfig);
_itoa(Charcount & 0xffffffff, CountStr, 10);
fputs(CountStr, fconfig);
fputc('\n', fconfig);

CharInfo invalid;
for (int i = 0; i < 256; i++)
{
if (_arr[i] != invalid)
{
fputc(_arr[i]._ch, fconfig);
fputc(',', fconfig);
fputc(_arr[i]._count + '0', fconfig);
fputc('\n', fconfig);
}
}

fclose(fOut);
fclose(fwCompress);
fclose(fconfig);

return true;
}
//文件的解压
bool UnCompresss(const char* filename)
{
string configfile = filename;
configfile += ".config";
FILE* outConfig = fopen(configfile.c_str(), "rb");
assert(outConfig);
char ch;
long long Charcount = 0;
string line = ReadLine(outConfig);
Charcount = atoi(line.c_str());
Charcount <<= 32;
line.clear();
line = ReadLine(outConfig);
Charcount += atoi(line.c_str());
line.clear();

while (feof(outConfig))
//feof()遇到文件结束,函数值为非零值,否则为0。当把数据以二进制的形式进行存放时,可能会有-1值的出现,
//所以此时无法利用-1值(EOF)做为eof()函数判断二进制文件结束的标志。
{
line = ReadLine(outConfig);
if (!line.empty())
{
ch = line[0];
_arr[(unsigned char)ch]._count += atoi(line.substr(2).c_str());
line.clear();
}
else
{
line = '\n';
}
}

HuffmanTree<CharInfo> ht;
CharInfo invalid;
ht.CreatTree(_arr, 256, invalid);//重新建树

HuffmanTreeNode<CharInfo>* root = ht.GetRootNode();

string  UnCompressFile = filename;
UnCompressFile += ".uncompress";
FILE* fOut = fopen(UnCompressFile.c_str(), "wb");

string CompressFile = filename;
CompressFile += ".compress";
FILE* fIn = fopen(CompressFile.c_str(), "rb");

int pos = 8;
HuffmanTreeNode<CharInfo>* cur = root;
ch = fgetc(fIn);

while ((unsigned char)ch != EOF)
{
--pos;
if ((unsigned char)ch &(1 << pos))
{
cur = cur->_right;
}
else
{
cur = cur->_left;
}
if (cur->_left == NULL && cur->_right == NULL)
{
fputc(cur->_weight._ch, fOut);
cur = root;
Charcount--;
}
if (pos == 0)
{
ch = fgetc(fIn);
pos = 8;
}
if (Charcount == 0)
{
break;
}
}

fclose(outConfig);
fclose(fIn);
fclose(fOut);
return true;
}

protected:
string ReadLine(FILE* fConfig)
{
char ch = fgetc(fConfig);
if (ch == EOF)
{
return "";
}
string line;
while (ch != '\n' && ch != EOF)
{
line += ch;
ch = fgetc(fConfig);
}
return line;
}

void GenerateHuffmanCode()
{
HuffmanTree<CharInfo> hft;
CharInfo invalid;

hft.CreatTree(_arr, 256, invalid);
_GenerateHuffmanCode(hft.GetRootNode());
}

void _GenerateHuffmanCode(HuffmanTreeNode<CharInfo>* root)
{
if (root == NULL)
{
return;
}

_GenerateHuffmanCode(root->_left);
_GenerateHuffmanCode(root->_right);

if (root->_left == NULL && root->_right == NULL)
{
HuffmanTreeNode<CharInfo>* cur = root;
HuffmanTreeNode<CharInfo>* parent = cur->_parent;
string& code = _arr[cur->_weight._ch]._code;

while (parent)
{
if (parent->_left == cur)
{
code += '0';
}
else if (parent->_right == cur)
{
code += '1';
}
cur = parent;
parent = cur->_parent;
}

reverse(code.begin(), code.end());
}
}

private:
CharInfo _arr[256];
};

void TestFileCompress()
{

FileCompress<CharInfo> fc;
cout << "Input文件压缩中...." << endl;
cout << "压缩用时: ";
int begin1 = GetTickCount();
fc.Compress("Input");//
int end1 = GetTickCount();//
cout << end1 - begin1 << endl << endl;

cout << "Input文件解压中...." << endl;;
cout << "解压用时: ";
int begin2 = GetTickCount();
fc.UnCompresss("Input");
int end2 = GetTickCount();//用以测试解压用时
cout << end2 - begin2 << endl << endl;

FileCompress<CharInfo> fc1;

cout << "Input.BIG文件压缩中...." << endl;
cout << "压缩用时: ";
int begin3 = GetTickCount();
fc1.Compress("Input.BIG");//
int end3 = GetTickCount();//
cout << end3 - begin3 << endl << endl;

cout << "Input.BIG文件解压中...." << endl;
cout << "解压用时: ";
int begin4 = GetTickCount();
fc1.UnCompresss("Input.BIG");
int end4 = GetTickCount();
cout << end4 - begin4 << endl;
}


main函数:

#define _CRT_SECURE_NO_WARNINGS
#include <iostream>
using namespace std;
#include "FileCompress.h"

int main()
{
TestFileCompress();
return 0;
}


下边是项目运行结果截图:







ps:图中的比较软件为 BeyondCompare4
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息