您的位置:首页 > 其它

Bloom Filter算法实现

2015-12-23 19:27 218 查看
#include<iostream>
#include<fstream>
#include<boost/filesystem.hpp>
#include<boost/filesystem/path.hpp>
#include<list>
#include<vector>
#include "hashFun.h.h"
//#include<boost/uuid/uuid_generators.hpp>
//#include<boost/uuid/uuid.hpp>
//#include<boost/uuid/uuid_io.hpp>

/*
* bloom.h
*
*  Created on: 2012-2-22
*      Author: xiaojay
*/

#ifndef BLOOM_H_
#define BLOOM_H_

class Bloom
{
public:
Bloom(int size, std::vector<HashFun*> hashfunclist);
~Bloom();
void add(const char * text);
bool check(const char * text);

private:
const static int CHARBITSIZE = 8;
int size;
char * arr;
std::vector<HashFun*> hashfunclist;
inline void setbit(long pos);
inline bool getbit(long pos);
};
#endif

Bloom::Bloom(int size, std::vector<HashFun*> hashfunclist)
{
assert(hashfunclist.size()>0);
this->size = size;
this->hashfunclist = hashfunclist;
this->arr = new char[size];
}

Bloom::~Bloom()
{
if (this->arr != NULL)
{
delete this->arr;
}
}

void Bloom::add(const char * text)
{
int nfunc = hashfunclist.size();
long code = 0;
for (int i = 0; i<nfunc; i++)
{
code = hashfunclist.at(i)->gethashval(text);

if (code / CHARBITSIZE>size) return;
else
{
setbit(code);
}
}
}

bool Bloom::check(const char * text)
{
int nfunc = hashfunclist.size();
long code = 0;
for (int i = 0; i<nfunc; i++)
{
code = hashfunclist.at(i)->gethashval(text);
if (code / CHARBITSIZE>size)
return false;
else
{
if (getbit(code))
continue;
else
return false;
}
}
return true;
}

inline void Bloom::setbit(long code)
{
arr[code / CHARBITSIZE] |= (1 << (code%CHARBITSIZE));
}

inline bool Bloom::getbit(long code)
{
if (!(arr[code / CHARBITSIZE] & (1 << (code%CHARBITSIZE))))
{
return false;
}
return true;
}
class HashFunA : public HashFun
{
public:
virtual long gethashval(const char * key)
{
unsigned int h = 0;
while (*key) h ^= (h << 5) + (h >> 2) + (unsigned char)*key++;
return h % 80000;
}
};
class HashFunB : public HashFun
{
public:
virtual long gethashval(const char * key)
{
unsigned int h = 0;
while (*key) h = (unsigned char)*key++ + (h << 6) + (h << 16) - h;
return h % 80000;
}
};
using namespace std;
int main()
{

HashFunA *funa = new HashFunA();
HashFunB *funb = new HashFunB();
vector<HashFun*> hashfunclist;
hashfunclist.push_back(funa);
hashfunclist.push_back(funb);

/*
* Create Bloom object with two parameters :
* size of the store array and list of hash functions
*/
Bloom bloom(10000, hashfunclist);

///Add some words to bloom filter

bloom.add("hello");
bloom.add("world");
bloom.add("ipad");
bloom.add("iphone4");
bloom.add("ipod");
bloom.add("apple");
bloom.add("banana");
bloom.add("hello");

/*
* Test
*/
char word[20];
while (true)
{
cout << "Please input a word : " << endl;
cin >> word;
if (bloom.check(word))
{
cout << "Word :" << word << " has been set in bloom filter." << endl;
}
else
{
cout << "Word :" << word << " not exist !" << endl;
}
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: