文件压缩总结-哈夫曼树
2017-08-18 16:20
120 查看
详细源代码请移步下载:https://github.com/HsTime/file-campress
项目:文件压缩流程图
建立小堆代码:
压缩与解压缩:
(2)为什么要使用配置文件?
在项目中,将字符对应的编码转化为位,在unsigned char中填充位,填满后就写入到压缩文件中。
问题1:最后一个字节是不是很有可能没有填满,该如何判断他是否填满以及填了几个字符的编码?
问题2:若依次压缩一些文件,压缩完后再去解压,那么编码此时已经没有了,该如何解压?
上面的两个问题可通过配置文件解决,假如要压缩的文件叫xxx,那么可生成一个xxx.config的配置文件,在该配置文件中写入<文件的总长度>(恢复时知道应该应该恢复多少个字符),(字符以及其出现的次数,用于解压时重建哈夫曼树),利用该配置文件即可解决这两个问题。
项目:文件压缩流程图
建立小堆代码:
#pragma once #include<iostream> using namespace std; #include<vector> #include"huffman.h" template<class T> struct Less { bool operator()(const T & l, const T& n) { return l < n; } }; template<class T> struct Greater { bool operator()(const T& l, const T& n) { return l > n; } }; template<class T, class Comper> class Heap { public: Heap() :heap(NULL) {} Heap(T* arr, size_t n) { heap.reserve(n); for (size_t i = 0; i < n; i++) { heap.push_back(arr[i]); } int parent = (heap.size() - 2) / 2; while (parent >= 0) { _AdjustDown(parent, heap.size() - 1); parent--; } } T Top() { return heap[0]; } Heap(const vector<T>& x) { heap.clear(); heap.resize(x.size(), 0); for (size_t i = 0; i < x.size(); i++) { heap[i] = x[i]; } int parent = (heap.size() - 2) / 2; while (parent >= 0) { _AdjustDown(parent, heap.size() - 1); parent--; } } void push(const T& x) { heap.push_back(x); _AdjistUp(heap.size() - 1); } void Pop() { swap(heap[0], heap[heap.size() - 1]); heap.pop_back(); _AdjustDown(0, heap.size() - 1); } bool Empty() { return heap.empty(); } size_t Size() { return heap.size(); } void Print() { vector<int>::iterator it = heap.begin(); while (it != heap.end()) { cout << *it << " "; it++; } cout << endl; } protected: void _AdjustDown(int root, int size)//Ïòϵ÷Õû { Comper comper; int parent = root; int child = root * 2 + 1; while (child <= size) { if (child < size && comper(heap[child + 1], heap[child])) { child++; } if (comper(heap[child], heap[parent])) { swap(heap[parent], heap[child]); parent = child; child = child * 2 + 1; } else { break; } } } void _AdjistUp(int pos) { Comper comper; int child = pos; int parent = (child - 1) / 2; while (child > 0) { if (comper(heap[child], heap[parent])) { swap(heap[child], heap[parent]); child = parent; parent = (parent - 1) / 2; } else break; } } private: vector<T> heap; };
压缩与解压缩:
#pragma once #include"huffman.h" #include<string> #include<assert.h> typedef unsigned long long TypeLong; struct CharInfo { unsigned char _ch; //字符 TypeLong _count;//出现次数 string _code;//Huffman编码 CharInfo(TypeLong count = 0) :_ch(0) , _count(count) , _code("") {} bool operator!=(const CharInfo& info)const { return this->_count != info._count; } bool operator<(const CharInfo& info)const { return this->_count < info._count; } CharInfo operator+(const CharInfo& info)const { return CharInfo(this->_count + info._count); } }; struct CountInfo { unsigned char _ch; //字符 TypeLong _count; //字符出现的次数 }; class FileCompress { public: FileCompress(); void CompressHuffCode(const char* filename); void UnCompressHuffCode(const char* filename); void PrintCode()const; protected: static void GenerateHuffmanCode(HuffmanNode<CharInfo>* root, FileCompress& file, string& code); protected: CharInfo _info[256]; }; FileCompress::FileCompress() { size_t size = sizeof(this->_info) / sizeof(this->_info[0]); for (size_t i = 0; i < size; ++i) { this->_info[i]._ch = i; this->_info[i]._count = 0; } } void FileCompress::CompressHuffCode(const char* filename) { assert(filename); FILE* fOut = fopen(filename, "rb"); assert(fOut); //统计字符出现的次数 char ch = fgetc(fOut); while (!feof(fOut)) { ++this->_info[(unsigned char)ch]._count; ch = fgetc(fOut); } //建立哈夫曼树 CharInfo invalid; invalid._count = 0; HuffmanTree<CharInfo> huffTree(this->_info, sizeof(this->_info) / sizeof(this->_info[0]), invalid); //生成哈夫曼编码 string code; HuffmanNode<CharInfo>* root = huffTree.GethuffmanNode(); GenerateHuffmanCode(root, *this, code); //生成压缩文件名及配置文件名 string fileInName = (string)filename; //压缩文件名 string fileConfig = fileInName;//配置文件名 size_t last_ = fileInName.find_last_of('.'); if (last_ < fileInName.size()) { fileInName.erase(last_); fileConfig.erase(last_); } fileInName += ".huff"; fileConfig += ".config"; string tmp; CountInfo info; //生成压缩配置文件 FILE* fConfig = fopen(fileConfig.c_str(), "wb"); for (size_t i = 0; i < 256; ++i) { if (_info[i]._count) { info._ch = _info[i]._ch; info._count = _info[i]._count; fwrite(&info, sizeof(info), 1, fConfig); } } info._count = -1; fwrite(&info, sizeof(info), 1, fConfig); //对文件进行压缩 FILE* fIn = fopen(fileInName.c_str(), "wb"); assert(fIn); fseek(fOut, 0, SEEK_SET); int pos = 0; unsigned char putch = 0; ch = fgetc(fOut); while (!feof(fOut)) { tmp = this->_info[(unsigned char)ch]._code; for (size_t i = 0; i < tmp.size(); ++i) { putch <<= 1; putch |= (tmp[i] - '0'); if (++pos == 8) { fputc(putch, fIn); pos = 0; putch = 0; } } ch = fgetc(fOut); } if (pos > 0) { putch <<= (8 - pos); fputc(putch, fIn); } fclose(fOut); fclose(fIn); fclose(fConfig); } void FileCompress::GenerateHuffmanCode(HuffmanNode<CharInfo>* root, FileCompress& file, string& code) { if (root == NULL) { return; } if (root->_left == NULL && root->_right == NULL) { file._info[root->_weight._ch]._code = code; return; } code.push_back('0'); GenerateHuffmanCode(root->_left, file, code); code.pop_back(); code.push_back('1'); GenerateHuffmanCode(root->_right, file, code); code.pop_back(); } void FileCompress::UnCompressHuffCode(const char* filename) { assert(filename); FILE* fOut = fopen(filename, "rb"); assert(fOut); //读取文件, string fileConfig = (string)filename; string fileInName = fileConfig; size_t last_ = fileInName.find_last_of('.'); if (last_ < fileInName.size()) { fileConfig.erase(last_); fileInName.erase(last_); } fileConfig += ".config"; /*fileInName += "_Com.JPG";*/ fileInName += "_Com.mp3"; /*fileInName += "_Com.txt";*/ /*fileInName += "_Com.doc";*/ FILE* fIn = fopen(fileInName.c_str(), "wb"); assert(fIn); FILE* fConfig = fopen(fileConfig.c_str(), "rb"); assert(fConfig); CountInfo info; //读配置信息 while (1) { fread(&info, sizeof(CountInfo), 1, fConfig); if (info._count == -1) { break; } _info[(unsigned char)info._ch]._ch = info._ch; _info[(unsigned char)info._ch]._count = info._count; } //重建哈夫曼树 HuffmanTree<CharInfo> tree(this->_info, sizeof(this->_info) / sizeof(this->_info[0]), 0); HuffmanNode<CharInfo>* root = tree.GethuffmanNode(); HuffmanNode<CharInfo>* cur = root; unsigned char ch = fgetc(fOut); int pos = 7; if (ch == 255) { if (info._ch != 0) while (_info[(unsigned char)info._ch]._count--) { { fputc(info._ch, fIn); } } } else { TypeLong countSum = root->_weight._count; while (countSum > 0) { while (pos >= 0) { if ((ch & (1 << pos)) == 0) //向左走 { cur = cur->_left; } else { cur = cur->_right; } if (cur->_left == NULL && cur->_right == NULL) { fputc(cur->_weight._ch, fIn); //cout << cur->_weight._ch; if (--countSum == 0)//将没有写的字符的次数减1 break; cur = root; } --pos; } pos = 7; ch = fgetc(fOut); } } fclose(fIn); fclose(fOut); fclose(fConfig); } void FileCompress::PrintCode()const { for (int i = 0; i < 256; ++i) { if (this->_info[i]._count != 0) { cout << this->_info[i]._ch << ":>" << this->_info[i]._code << endl; } } }
(2)为什么要使用配置文件?
在项目中,将字符对应的编码转化为位,在unsigned char中填充位,填满后就写入到压缩文件中。
问题1:最后一个字节是不是很有可能没有填满,该如何判断他是否填满以及填了几个字符的编码?
问题2:若依次压缩一些文件,压缩完后再去解压,那么编码此时已经没有了,该如何解压?
上面的两个问题可通过配置文件解决,假如要压缩的文件叫xxx,那么可生成一个xxx.config的配置文件,在该配置文件中写入<文件的总长度>(恢复时知道应该应该恢复多少个字符),(字符以及其出现的次数,用于解压时重建哈夫曼树),利用该配置文件即可解决这两个问题。
相关文章推荐
- 哈夫曼树以及文件压缩的实现
- Linux文件查找及压缩常用知识总结
- Android压缩文件访问总结
- linux 压缩文件的命令总结
- 哈夫曼树的应用:压缩文件
- 哈夫曼树压缩、解压缩文件
- Linux基础知识总结(2): 文件的压缩与打包
- linux 压缩文件的命令总结
- linux下压缩文件总结
- 【数据结构与算法】 利用哈夫曼树进行文件压缩 (部分借鉴网上内容)
- Ubuntu 下压缩文件的压缩和解压方式总结
- C++ 哈夫曼树对文件压缩、加密实现代码
- Linux各种文件压缩命令总结
- linux 压缩文件的命令总结
- linux 压缩文件的命令总结
- linux 压缩文件的命令总结
- linux 压缩文件的命令总结
- linux压缩文件总结
- linux下tar gz bz2 tgz z 压缩文件的压缩与解压方法总结
- linux下各类压缩文件的解压命令,总结一下