您的位置:首页 > 编程语言 > C语言/C++

用c++封装一个Hash Table,并与STL map 进行操作性能上的比较

2014-05-03 22:36 691 查看

问题描述:

1.在计算机科学中,hash table 是一种常用的数据结构,它本质上是一种关联容器,它映射一个key 到value。它通过一个hash function把key映射成一个整数值,这个整数值对应存放value值的容器的下标。

2.它主要支持三种操作:插入key,value对(insert),查询(search)给定key, 删除key, value对(delete);

3.它三种操作的平均时间复杂度为O(1),最糟糕情况下的时间复杂度为O(n);

4.hash table要处理核心问题是选择好的hash function,这能使得key映射出的index足够分散均匀,尽量减少碰撞(不同的key映射到同一个index),为了消除碰撞产生,一般常用几种方法:Separate chaining, Linear probing,Quadratic probing, Rehash, double Hashing,详细细节请参看维基百科;

5.本文给出利用Rehash的策略来消除碰撞;

6.本文设计测试用例,比较了hashtable 和STL map 的操作性能。发现执行相同的操作,hashtable 所消耗时间为STL map 九分之一;

程序代码:

#ifndef _HASH_TABLE_H_
#define _HASH_TABLE_H_

#include <stdlib.h>
#include <stdio.h>
#include <map>
#include <assert.h>
#include <string>

#include "windows.h"

/*
* Compare function
*
*/
static bool StrCompare( const char* first, const char* second )
{
size_t firstLen = strlen(first);
size_t secondLen = strlen(second);

return firstLen == secondLen && !strcmp( first, second );
}

/*
* Hash function
*
*/
static unsigned int ImplHashFunc( const char* buf, int len )
{
unsigned int hash = 5381;

while(len--)
{
hash = ((hash << 5) + hash) + (*buf++); /* hash * 33 + c */
}

return hash;
}

/*
* Hash function
*
*/
static unsigned int ImplCaseHashFunc( const unsigned char* buf, int len )
{
unsigned int hash = 5381;

while(len--)
{
hash = ((hash << 5) + hash) + tolower(*buf++); /* hash * 33 + c */
}

return hash;
}

/*
* Hash function
*
*/
static unsigned int ImplHashFuncSimple( unsigned int key )
{
key += ~(key << 15);
key ^=  (key >> 10);
key +=  (key << 3);
key ^=  (key >> 6);
key += ~(key << 11);
key ^=  (key >> 16);
return key;
}

/*
* encapsulate the hash table
* advantage:
*          good performance;
*          terse interface to make more easy for outstanding and to employ
*
*/
template<class T>
class HashTable
{
public:
typedef unsigned int (*HashFunctor)( const char* key, int len);
typedef bool (*KeyCompare)( const char* keyFirst, const char* keySecond );

static const int INIT_TABLE_SIZE = 689981;

typedef struct tagEntryNode
{
char*  key;
T      value;
tagEntryNode* next;

tagEntryNode():key(0), value(), next(0)
{

}

tagEntryNode( const char* _key, const T& val ):
value(val), next(0)
{
size_t len = strlen(_key) + 1;
key = new char[len];
strncpy( key, _key, len - 1);
key[len - 1] = '\0';
}

~tagEntryNode()
{
delete [] key;
key = 0;
}

}EntryNode, *pEntryNode;

typedef struct tagHashNode
{
EntryNode** table;
size_t      used;
size_t      size;
size_t      sizeMask;

tagHashNode():table(0), used(0), size(0),sizeMask(0)
{

}

tagHashNode( size_t _size ):table(0), used(0), size(_size),
sizeMask(0)
{
Init( _size );

}

~tagHashNode()
{
}

void Init( size_t _size )
{
size = _size;
sizeMask = size - 1;
table = new EntryNode*[size];
memset( table, 0x00, sizeof(EntryNode*)*size );
}

}HashNode, *pHashNode;

/*
*
*
*/
HashTable( HashFunctor functor = ImplHashFunc, KeyCompare cmpFunctor = StrCompare ):m_hashFunctor(functor),
m_keyCmpFunctor(cmpFunctor),
m_hashTable(new HashNode), m_resizeRatio(2)
{
Init( INIT_TABLE_SIZE );
}

/*
*
*
*/
~HashTable()
{
Clear();
}

/*
* Clear all node and entity
*
*/
void Clear()
{
Clear( m_hashTable );
}

/*
* Inset the pair of key and value
*
*/
bool Insert( const char* key, const T& value )
{
Rehash();

return Insert( m_hashTable, key, value );
}

/*
* Retrieve the pointer of value for given key
*
*/
T* Find( const char* key )
{
unsigned int hash = m_hashFunctor(key, strlen(key));
unsigned int idx = hash % m_hashTable->size;

EntryNode* entry = m_hashTable->table[idx];
while( entry )
{
if( m_keyCmpFunctor( entry->key, key) )
{
return &entry->value;
}

entry = entry->next;
}

return NULL;
}

/*
* Delete hashEntry for given key
*
*/
void Delete( const char* key )
{
unsigned int hash = m_hashFunctor(key, strlen(key));
unsigned int idx = hash % m_hashTable->size;

EntryNode* entry = m_hashTable->table[idx];
EntryNode* preEntry = 0;
while( entry )
{
if( m_keyCmpFunctor( entry->key, key ) )
{
if( preEntry )
{
preEntry->next = entry->next;
}
else
{
m_hashTable->table[idx] = entry->next;
}

delete entry;
entry = 0;
m_hashTable->used--;
return;
}
else
{
preEntry = entry;
entry  = entry->next;
}

}
}

protected:

/*
* Fink the index of corresponding of key value in the table
*
*/
int FindKeyIndex( pHashNode hashNode, const char* key )
{
unsigned int hash = m_hashFunctor( key, strlen( key ) );
unsigned int idx = hash % hashNode->size;

EntryNode* entry = hashNode->table[idx];
if( 0 == entry )
hashNode->used++;

while( entry )
{
if( m_keyCmpFunctor( entry->key, key) )
{
return -1;
}

entry = entry->next;
}

return idx;
}

/*
* Implement insert operation
*
*/
bool Insert( pHashNode hashNode, const char* key, const T& value )
{
int idx = FindKeyIndex( hashNode, key );
if( idx != -1 )
{
EntryNode* newNode = new EntryNode( key, value );
newNode->next = hashNode->table[idx];
hashNode->table[idx] = newNode;

return true;
}

return false;
}

/*
* Rehash double store memory to make more root then remake insert operation
* very important
*/
void Rehash()
{
if( m_hashTable->used >= m_hashTable->size ||
(m_hashTable->used > 0 && (m_hashTable->size / m_hashTable->used) < m_resizeRatio ) )
{
size_t newSize = NextPrime( m_hashTable->size * 2 );
pHashNode newHashNode = new HashNode( newSize );

for( size_t i = 0; i < m_hashTable->size; i++ )
{
pEntryNode entryNode = m_hashTable->table[i];
while( entryNode )
{
Insert( newHashNode, entryNode->key, entryNode->value );
entryNode = entryNode->next;
}

}

Clear( m_hashTable );
m_hashTable = newHashNode;

}
}

/*
* Implement clear operation
*
*/
void Clear( pHashNode hashNode )
{
for( size_t i = 0; i < m_hashTable->size; i++ )
{
pEntryNode entryNode = m_hashTable->table[i];
while( entryNode )
{
pEntryNode next = entryNode->next;
delete entryNode;
entryNode = next;
}

}

delete [] m_hashTable->table;
}

/*
* Initialization
*
*/
void Init( size_t tableSize )
{
m_hashTable->Init( tableSize );
}

/*
* Helper function
* check prime
*/
bool IsPrime( size_t x)
{
for( std::size_t i = 3; true; i += 2 )
{
std::size_t q = x / i;
if( q < i )
return true;

if( x == q * i )
return false;
}
return true;
}

/*
* Find next prime for given x
*
*/
size_t	NextPrime( size_t x )
{
if( x <= 2 )
return 2;

if(!(x & 1))
++x;

for(; !IsPrime(x); x += 2 );

return x;
}

protected:

HashFunctor m_hashFunctor;

KeyCompare  m_keyCmpFunctor;
4000

pHashNode   m_hashTable;

size_t      m_resizeRatio;

};

/*
* Test hash table
*
*/
void TestHashTable()
{
unsigned long start = GetTickCount();

HashTable<int> hashTable;
const int Len = 500000;
for( int i = 0; i < Len; i++ )
{
char key[16] = {0};
sprintf(key, "%s_%d", "china", i );

assert(hashTable.Insert( key, i ));
}

for( int i = 0; i < Len; i++ )
{
char key[16] = {0};
sprintf(key, "%s_%d", "china", i );

if( i > 0 && !(i % 50) )
{
hashTable.Delete( key );
assert( !hashTable.Find( key ) );
}
else
{
assert(i == *hashTable.Find( key));
}

}

unsigned long interval = GetTickCount() - start;
printf(" hash table consume time is %d \n", interval );
}

/*
* Test STL map
*
*/
void TestHTSTLMap()
{
unsigned long start = GetTickCount();

std::map<std::string, int > strMap;
const int Len = 500000;
for( int i = 0; i < Len; i++ )
{
char key[16] = {0};
sprintf(key, "%s_%d", "china", i );
std::string keyStr(key);

strMap.insert( std::make_pair(keyStr, i )) ;
}

std::map<std::string, int >::iterator iter = strMap.begin();
for( int i = 0; i < Len; i++ )
{
char key[16] = {0};
sprintf(key, "%s_%d", "china", i );
std::string keyStr(key);

if( i > 0 && !(i % 50) )
{
strMap.erase( key );
assert( strMap.find( key ) == strMap.end() );
}
else
{
iter = strMap.find( keyStr );
assert( iter->second == i );
}

}

unsigned long interval = GetTickCount() - start;
printf(" STL map consume time is %d \n", interval );
}

/*
* Test suite and compare performance
*
*/
void TestSuiteHashTable()
{
TestHashTable();
TestHTSTLMap();

}

#endif


compile and run in visual studio 2005



内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
相关文章推荐