Bloom Filter算法实现
2015-12-23 19:27
218 查看
#include<iostream> #include<fstream> #include<boost/filesystem.hpp> #include<boost/filesystem/path.hpp> #include<list> #include<vector> #include "hashFun.h.h" //#include<boost/uuid/uuid_generators.hpp> //#include<boost/uuid/uuid.hpp> //#include<boost/uuid/uuid_io.hpp> /* * bloom.h * * Created on: 2012-2-22 * Author: xiaojay */ #ifndef BLOOM_H_ #define BLOOM_H_ class Bloom { public: Bloom(int size, std::vector<HashFun*> hashfunclist); ~Bloom(); void add(const char * text); bool check(const char * text); private: const static int CHARBITSIZE = 8; int size; char * arr; std::vector<HashFun*> hashfunclist; inline void setbit(long pos); inline bool getbit(long pos); }; #endif Bloom::Bloom(int size, std::vector<HashFun*> hashfunclist) { assert(hashfunclist.size()>0); this->size = size; this->hashfunclist = hashfunclist; this->arr = new char[size]; } Bloom::~Bloom() { if (this->arr != NULL) { delete this->arr; } } void Bloom::add(const char * text) { int nfunc = hashfunclist.size(); long code = 0; for (int i = 0; i<nfunc; i++) { code = hashfunclist.at(i)->gethashval(text); if (code / CHARBITSIZE>size) return; else { setbit(code); } } } bool Bloom::check(const char * text) { int nfunc = hashfunclist.size(); long code = 0; for (int i = 0; i<nfunc; i++) { code = hashfunclist.at(i)->gethashval(text); if (code / CHARBITSIZE>size) return false; else { if (getbit(code)) continue; else return false; } } return true; } inline void Bloom::setbit(long code) { arr[code / CHARBITSIZE] |= (1 << (code%CHARBITSIZE)); } inline bool Bloom::getbit(long code) { if (!(arr[code / CHARBITSIZE] & (1 << (code%CHARBITSIZE)))) { return false; } return true; } class HashFunA : public HashFun { public: virtual long gethashval(const char * key) { unsigned int h = 0; while (*key) h ^= (h << 5) + (h >> 2) + (unsigned char)*key++; return h % 80000; } }; class HashFunB : public HashFun { public: virtual long gethashval(const char * key) { unsigned int h = 0; while (*key) h = (unsigned char)*key++ + (h << 6) + (h << 16) - h; return h % 80000; } }; using namespace std; int main() { HashFunA *funa = new HashFunA(); HashFunB *funb = new HashFunB(); vector<HashFun*> hashfunclist; hashfunclist.push_back(funa); hashfunclist.push_back(funb); /* * Create Bloom object with two parameters : * size of the store array and list of hash functions */ Bloom bloom(10000, hashfunclist); ///Add some words to bloom filter bloom.add("hello"); bloom.add("world"); bloom.add("ipad"); bloom.add("iphone4"); bloom.add("ipod"); bloom.add("apple"); bloom.add("banana"); bloom.add("hello"); /* * Test */ char word[20]; while (true) { cout << "Please input a word : " << endl; cin >> word; if (bloom.check(word)) { cout << "Word :" << word << " has been set in bloom filter." << endl; } else { cout << "Word :" << word << " not exist !" << endl; } } }
相关文章推荐
- C#正则表达式
- Xcode升级后,遇到的告警和解决方法
- 督主cocos2d学习笔记
- 一元多项式化简
- OpenCV的一些基础知识
- 适配器模式(Adapter)—山下的养牛场
- CentOS根分区扩容方法
- iOS完全自学手册——[一]Ready?No!
- 2015-12-23日记:稳步的走向稳定与互联网加教育的理念阐述
- IOS开发之MapKit学习笔记
- Xen 和 KVM 下如何关闭 virbr0
- SQL Server完全删除问题
- Objective-C:@class和#import
- VC写的读写配置文件的类
- KVC
- Java 9中新的货币API
- x86_64系统 yum安装i386包
- 关于easyUI在子页面增加显示tabs的一个问题
- python urllib 和urllib2的区别
- 模型驱动开发-GME(The Generic Modeling Environment) - 前言