文件压缩
2016-08-01 17:49
288 查看
项目名称《文件压缩》
开发环境:windows,vs2013
所用到的技术:heap,哈夫曼树,哈夫曼编码,c++
文件压缩项目的大致思路:
1.利用小堆进行构建哈夫曼树,然后利用哈夫曼树获得要压缩的每个字符的哈夫曼编码
2.利用哈夫曼编码对源文件进行压缩,就是在压缩文件中按顺序存入各字符对应的哈夫曼编码,并编写配置文件
3.利用配置文件获得各个字符及字符出现的次数,再次用小堆构建哈夫曼树
4.利用哈夫曼树对压缩文件进行解压。
下边是具体实现的代码:
Heap.h --->构建小堆
哈夫曼树的实现代码 HuffmanTree.h :
文件压缩的实现 ---> FileCompress.h
main函数:
下边是项目运行结果截图:
ps:图中的比较软件为 BeyondCompare4
开发环境:windows,vs2013
所用到的技术:heap,哈夫曼树,哈夫曼编码,c++
文件压缩项目的大致思路:
1.利用小堆进行构建哈夫曼树,然后利用哈夫曼树获得要压缩的每个字符的哈夫曼编码
2.利用哈夫曼编码对源文件进行压缩,就是在压缩文件中按顺序存入各字符对应的哈夫曼编码,并编写配置文件
3.利用配置文件获得各个字符及字符出现的次数,再次用小堆构建哈夫曼树
4.利用哈夫曼树对压缩文件进行解压。
下边是具体实现的代码:
Heap.h --->构建小堆
#pragma once #include <vector> #include<assert.h> // 小堆 template<class T> struct Less { bool operator() (const T& l, const T& r) { return l < r; } }; template<class T> struct Greater { bool operator() (const T& l, const T& r) { return l > r; } }; template<class T, class Compare = Less<T>> class Heap { public: Heap() {} Heap(const T* a, size_t size) { for (size_t i = 0; i < size; ++i) { _arrays.push_back(a[i]); } // 建堆 for (int i = (_arrays.size() - 2) / 2; i >= 0; --i) { AdjustDown(i); } } void Push(const T& x) { _arrays.push_back(x); AdjustUp(_arrays.size() - 1); } void Pop() { assert(_arrays.size() > 0); swap(_arrays[0], _arrays[_arrays.size() - 1]); _arrays.pop_back(); AdjustDown(0); } T& Top() { assert(_arrays.size() > 0); return _arrays[0]; } bool Empty() { return _arrays.empty(); } int Size() { return _arrays.size(); } void AdjustDown(int root) { size_t child = root * 2 + 1; Compare com; while (child < _arrays.size()) { if (child + 1<_arrays.size() && com(_arrays[child + 1], _arrays[child])) { ++child; } if (com(_arrays[child], _arrays[root])) { swap(_arrays[child], _arrays[root]); root = child; child = 2 * root + 1; } else { break; } } } void AdjustUp(int child) { int parent = (child - 1) / 2; while (child > 0) { if (Compare()(_arrays[child], _arrays[parent])) { swap(_arrays[parent], _arrays[child]); child = parent; parent = (child - 1) / 2; } else { break; } } } void Print() { for (size_t i = 0; i < _arrays.size(); ++i) { cout << _arrays[i] << " "; } cout << endl; } public: /*T* _array; size_t _size; size_t _capacity;*/ vector<T> _arrays; };
哈夫曼树的实现代码 HuffmanTree.h :
#pragma once #include "Heap.h" #include<assert.h> template<class T> struct HuffmanTreeNode { HuffmanTreeNode<T>* _left; HuffmanTreeNode<T>* _right; HuffmanTreeNode<T>* _parent; T _weight; HuffmanTreeNode(const T& x) :_weight(x) , _left(NULL) , _right(NULL) , _parent(NULL) {} }; template<class T> class HuffmanTree { typedef HuffmanTreeNode<T> Node; public: HuffmanTree() :_root(NULL) {} ~HuffmanTree() { Destory(_root); } template <class T> struct NodeCompare { bool operator()(Node *l, Node *r) { return l->_weight < r->_weight; } }; public: void CreatTree(const T* a, size_t size, const T& invalid) { assert(a); Heap<Node*, NodeCompare<T>> minHeap; for (size_t i = 0; i < size; ++i) { if (a[i] != invalid) { Node* node = new Node(a[i]); minHeap.Push(node); } } while (minHeap.Size() > 1) { Node* left = minHeap.Top(); minHeap.Pop(); Node* right = minHeap.Top(); minHeap.Pop(); Node* parent = new Node(left->_weight + right->_weight); parent->_left = left; parent->_right = right; left->_parent = parent; right->_parent = parent; minHeap.Push(parent); } _root = minHeap.Top(); } Node* GetRootNode() { return _root; } void Destory(Node* root) { if (root) { Destory(root->_left); Destory(root->_right); delete root; root = NULL; } } private: HuffmanTreeNode<T>* _root; };
文件压缩的实现 ---> FileCompress.h
#pragma once #include"HuffmanTree.h" #include<algorithm> #include<windows.h> #include<string.h> using namespace std; typedef long long Longtype;//为了扩大其范围,int型能处理的范围已经不能满足,所以定义Long Long型予以表示 struct CharInfo { unsigned char _ch;//这里必须为unsigned,否则会造成截断,所以从-128~127调至0~255. Longtype _count; string _code; CharInfo(unsigned char ch = 0) :_ch(ch) , _count(0) {} CharInfo operator+(CharInfo& file) { CharInfo tmp; tmp._count = this->_count + file._count; return tmp; } bool operator < (CharInfo& file) { return this->_count < file._count; } bool operator != (const CharInfo& file)const { return this->_count != file._count; } }; template<class T> class FileCompress { public: FileCompress() { for (int i = 0; i < 256; ++i) { _arr[i]._ch = i; } } public: bool Compress(const char* filename) { //1.打开文件,统计文件字符出现的次数 long long Charcount = 0; assert(filename); FILE* fOut = fopen(filename, "rb");//"rb"为以二进制方式读取文件,这里的b就是binary。"wb"为以二进制方式写入文件 assert(fOut); //以二进制和文本打开方式区别在于:以文本打开方式会将\r\n //转换为\n,二进制这不会有这样的转换 char ch = fgetc(fOut); while (ch != EOF) { _arr[(unsigned char)ch]._count++; ch = fgetc(fOut); Charcount++; } //2.生成对应的huffman编码 GenerateHuffmanCode(); //3.文件压缩 string compressFile = filename; compressFile += ".compress"; FILE* fwCompress = fopen(compressFile.c_str(), "wb"); assert(fwCompress); fseek(fOut, 0, SEEK_SET); ch = fgetc(fOut); char inch = 0; int index = 0; while (!feof(fOut)) { string& code = _arr[(unsigned char)ch]._code; for (size_t i = 0; i < code.size(); ++i) { inch = inch << 1; if (code[i] == '1') { inch |= 1; } if (++index == 8)//对于形成的长串字符编码的切割,每8个bit为一个字节,便于读取 { fputc(inch, fwCompress); inch = 0; index = 0; } } ch = fgetc(fOut); } if (index)//考虑到可能会有切割完,剩余的字符码不够填充8个bit位的情况 { inch = inch << (8 - index); fputc(inch, fwCompress); } //4.配置文件,方便后续的解压缩; string configFile = filename; configFile += ".config"; FILE *fconfig = fopen(configFile.c_str(), "wb"); assert(fconfig); char CountStr[128]; _itoa(Charcount >> 32, CountStr, 10); fputs(CountStr, fconfig); fputc('\n', fconfig); _itoa(Charcount & 0xffffffff, CountStr, 10); fputs(CountStr, fconfig); fputc('\n', fconfig); CharInfo invalid; for (int i = 0; i < 256; i++) { if (_arr[i] != invalid) { fputc(_arr[i]._ch, fconfig); fputc(',', fconfig); fputc(_arr[i]._count + '0', fconfig); fputc('\n', fconfig); } } fclose(fOut); fclose(fwCompress); fclose(fconfig); return true; } //文件的解压 bool UnCompresss(const char* filename) { string configfile = filename; configfile += ".config"; FILE* outConfig = fopen(configfile.c_str(), "rb"); assert(outConfig); char ch; long long Charcount = 0; string line = ReadLine(outConfig); Charcount = atoi(line.c_str()); Charcount <<= 32; line.clear(); line = ReadLine(outConfig); Charcount += atoi(line.c_str()); line.clear(); while (feof(outConfig)) //feof()遇到文件结束,函数值为非零值,否则为0。当把数据以二进制的形式进行存放时,可能会有-1值的出现, //所以此时无法利用-1值(EOF)做为eof()函数判断二进制文件结束的标志。 { line = ReadLine(outConfig); if (!line.empty()) { ch = line[0]; _arr[(unsigned char)ch]._count += atoi(line.substr(2).c_str()); line.clear(); } else { line = '\n'; } } HuffmanTree<CharInfo> ht; CharInfo invalid; ht.CreatTree(_arr, 256, invalid);//重新建树 HuffmanTreeNode<CharInfo>* root = ht.GetRootNode(); string UnCompressFile = filename; UnCompressFile += ".uncompress"; FILE* fOut = fopen(UnCompressFile.c_str(), "wb"); string CompressFile = filename; CompressFile += ".compress"; FILE* fIn = fopen(CompressFile.c_str(), "rb"); int pos = 8; HuffmanTreeNode<CharInfo>* cur = root; ch = fgetc(fIn); while ((unsigned char)ch != EOF) { --pos; if ((unsigned char)ch &(1 << pos)) { cur = cur->_right; } else { cur = cur->_left; } if (cur->_left == NULL && cur->_right == NULL) { fputc(cur->_weight._ch, fOut); cur = root; Charcount--; } if (pos == 0) { ch = fgetc(fIn); pos = 8; } if (Charcount == 0) { break; } } fclose(outConfig); fclose(fIn); fclose(fOut); return true; } protected: string ReadLine(FILE* fConfig) { char ch = fgetc(fConfig); if (ch == EOF) { return ""; } string line; while (ch != '\n' && ch != EOF) { line += ch; ch = fgetc(fConfig); } return line; } void GenerateHuffmanCode() { HuffmanTree<CharInfo> hft; CharInfo invalid; hft.CreatTree(_arr, 256, invalid); _GenerateHuffmanCode(hft.GetRootNode()); } void _GenerateHuffmanCode(HuffmanTreeNode<CharInfo>* root) { if (root == NULL) { return; } _GenerateHuffmanCode(root->_left); _GenerateHuffmanCode(root->_right); if (root->_left == NULL && root->_right == NULL) { HuffmanTreeNode<CharInfo>* cur = root; HuffmanTreeNode<CharInfo>* parent = cur->_parent; string& code = _arr[cur->_weight._ch]._code; while (parent) { if (parent->_left == cur) { code += '0'; } else if (parent->_right == cur) { code += '1'; } cur = parent; parent = cur->_parent; } reverse(code.begin(), code.end()); } } private: CharInfo _arr[256]; }; void TestFileCompress() { FileCompress<CharInfo> fc; cout << "Input文件压缩中...." << endl; cout << "压缩用时: "; int begin1 = GetTickCount(); fc.Compress("Input");// int end1 = GetTickCount();// cout << end1 - begin1 << endl << endl; cout << "Input文件解压中...." << endl;; cout << "解压用时: "; int begin2 = GetTickCount(); fc.UnCompresss("Input"); int end2 = GetTickCount();//用以测试解压用时 cout << end2 - begin2 << endl << endl; FileCompress<CharInfo> fc1; cout << "Input.BIG文件压缩中...." << endl; cout << "压缩用时: "; int begin3 = GetTickCount(); fc1.Compress("Input.BIG");// int end3 = GetTickCount();// cout << end3 - begin3 << endl << endl; cout << "Input.BIG文件解压中...." << endl; cout << "解压用时: "; int begin4 = GetTickCount(); fc1.UnCompresss("Input.BIG"); int end4 = GetTickCount(); cout << end4 - begin4 << endl; }
main函数:
#define _CRT_SECURE_NO_WARNINGS #include <iostream> using namespace std; #include "FileCompress.h" int main() { TestFileCompress(); return 0; }
下边是项目运行结果截图:
ps:图中的比较软件为 BeyondCompare4
相关文章推荐
- 如何重装TCP/IP协议
- Windows 8 官方高清壁纸欣赏与下载
- 谁是桌面王者?Win PK Linux三大镇山之宝
- 对《大家都在点赞 Windows Terminal,我决定给你泼一盆冷水》一文的商榷
- Windows Clang开发环境备忘
- 从Windows系统下访问Linux分区相关软件
- 对《大家都在点赞 Windows Terminal,我决定给你泼一盆冷水》一文的商榷
- Windows下搭建本地SVN服务器
- 使用Windows原生命令一键清空剪贴板
- windows用windeployqt发布qt quick application程序
- 利用开源软件打造自己的全功能远程工具
- Windows 8虚拟机不能全屏的解决方法
- 虚拟化基础架构Windows 2008篇之1-虚拟化基础服务概述
- 虚拟化基础架构Windows 2008篇之5-安装Windows部署服务
- 虚拟化基础架构Windows 2008篇之7-添加其他操作系统的安装镜像
- 虚拟化基础架构Windows 2008篇之9-配置Windows部署服务
- 虚拟化基础架构Windows 2008篇之12-WSUS工作站端配置
- Visual Studio 2012 示例代码浏览器 - 数以千计的开发示例近在手边,唾手可得