|
本帖最后由 serverdev2012 于 2012-12-7 17:29 编辑
hash算法的复杂度是O(n),作用就是计算数据在数组中的下标
而数组操作复杂度是O(1)
也就是hash表的复杂度是O(n)
其实就是胖client瘦server的设计
远程hash表利用将hash计算的任务丢给client做的思想,
将nosql服务器端的hash操作复杂度从O(n)降到O(1),大大提升效率
核心差别就是以下红色部分,设置了远程模式,则不计算hash,直接使用client传递过来的hashValue参数。
实际测试效果是redis dict效率的10倍 md5算法hash表的4倍
RHTable::ELEMENT* RHTable::Find(unsigned char* key, unsigned int size, unsigned int hashValue, bool bInsert )
{
BUCKET *pSaveBucket = NULL;
ELEMENT *pFrontE = NULL;
ELEMENT *pFindE = NULL;
unsigned char hashKey[256];
unsigned int hashSize;
if ( !m_bRemote ) HashFunction( hashKey, hashSize, key, size );
unsigned int idx = 0;
HASH_TABLE *pHashTable = m_pHashTable;
do
以下是完整代码
RHTable.h- // RHTable.h: interface for the RHTable class.
- //
- //远程哈稀表
- //hash算法可以由远端程序计算好,直接传递int型hash值到hash服务器,
- //可实现hash值的分布式计算
- //
- //不使用远程模式,则与redis内置哈稀表(dict.c/h)类似,但效率更高
- //
- //使用远程模式实现Nosql服务器,可大大提高查询效率
- // 根据选用的hash算法不同,提高效果不同
- // 选用md5算法,查询效率为普通模式4倍
- // 选用sha1算法,查询效率为普通模式9倍
- //
- //////////////////////////////////////////////////////////////////////
- #ifndef HASHTABLE_H
- #define HASHTABLE_H
- #include "FixLengthInt.h"
- #ifndef NULL
- #define NULL 0
- #endif
- #ifdef WIN32
- #include <windows.h>
- #else
- #endif
- #define MAX_HASH_64 0x7fffffffffffffff
- #define MAX_HASH_32 0x7fffffff
- typedef void (*pHashFunction)(unsigned char *hashKey, unsigned int &hashSize, unsigned char *key, unsigned int size );
- class RHTable
- {
- struct ELEMENT;
- struct HASH_TABLE;
- public:
- //操作结果
- typedef struct OP_R
- {
- ELEMENT *pInsert;
- bool bSuccess;
- }OP_R;
- //迭代器
- typedef struct Iterator
- {
- Iterator& operator++();
- Iterator& operator++(int);
- Iterator& operator--();
- Iterator& operator--(int);
- bool operator==(Iterator it);
- bool operator!=(Iterator it);
- ELEMENT *pElement;
- private:
- friend class RHTable;
- unsigned int idx;
- HASH_TABLE *pHashTable;
- HASH_TABLE *pHeadHashTable;
- }Iterator;
- private:
- //元素
- typedef struct ELEMENT
- {
- unsigned int hashValue;
- unsigned char *key;
- unsigned short keySize;
- void *value;
- private:
- friend class RHTable;
- friend struct Iterator;
- bool isDel;
- ELEMENT *next;//同一个桶内下一个元素
- }ELEMENT;
- //桶,保存发生hash碰撞的元素
- typedef struct BUCKET
- {
- ELEMENT *head;//桶中元素链表头
- }BUCKET;
- //哈稀表
- typedef struct HASH_TABLE
- {
- BUCKET *buckets;//hash数组
- unsigned long size;//hash数组大小
- unsigned long sizemask;//掩码size的全1表示
- HASH_TABLE *pre;//前一个表
- HASH_TABLE *next;//下一个表(旧hash表)
- unsigned long count;//实际元素数量
- }HASH_TABLE;
- public:
- RHTable();
- RHTable( unsigned long size );
- virtual ~RHTable();
- void SetHashFunction( pHashFunction hf );
- unsigned int RemoteHash(unsigned char *key, unsigned int size);//远程计算hash,胖client瘦Server模式
- void SetRemoteMode( bool bRemote );
-
- public:
- OP_R* Insert(unsigned char *key, unsigned int size, void *value, unsigned int hashValue = 0);
- void* Find(unsigned char *key, unsigned int size, unsigned int hashValue = 0 );
- bool Update(unsigned char *key, unsigned int size, void *value, unsigned int hashValue = 0);
- void Delete(unsigned char *key, unsigned int size, unsigned int hashValue = 0);
- unsigned long Count();
- bool IsEmpty();
- void Clear();
- Iterator Begin();
- Iterator End();
- public:
- unsigned long NextPower(unsigned long size);//比size大的最小的2的n次幂数
- unsigned int DJBHash(const unsigned char *buf, int len);//C33算法hash转换函数
- bool KeyCmp( unsigned char *key1, int size1, unsigned char *key2, int size2 );//相同返回true
- bool Expand(unsigned long size);
- ELEMENT* Find(unsigned char *key, unsigned int size, unsigned int hashValue, bool bInsert );
- void ReleaseOldHashTable();//旧hash表如果为null则释放
- void ReleaseHashTable();
- //哈稀算法函数指针(默认md5)
- void (*HashFunction)(unsigned char *hashKey, unsigned int &hashSize,
- unsigned char *key, unsigned int size );
-
- private:
- HASH_TABLE *m_pHashTable;
- bool m_onBit64;
- unsigned long m_maxHash;
- Iterator m_it;
- bool m_bRemote;
- };
- #endif // !defined(HASHTABLE_H)
复制代码 RHTable.cpp
- // RHTable.cpp: implementation of the RHTable class.
- //
- //////////////////////////////////////////////////////////////////////
- #include "RHTable.h"
- #include <cstdio>
- #include <cstring>
- #include <cstdlib>
- #include "md5.h"
- //////////////////////////////////////////////////////////////////////
- // Construction/Destruction
- //////////////////////////////////////////////////////////////////////
- RHTable::RHTable()
- {
- m_onBit64 = false;
- int *bit = NULL;
- if ( 8 == sizeof(bit) ) m_onBit64 = true;
- #if (m_onBit64)
- if ( m_onBit64 ) m_maxHash = MAX_HASH_64;
- #else
- m_maxHash = MAX_HASH_32;
- #endif
- m_bRemote = false;
- HashFunction = MD5HashFunction;
- m_pHashTable = NULL;
- Expand(0);
- }
- RHTable::RHTable(unsigned long size)
- {
- m_onBit64 = false;
- int *bit = NULL;
- if ( 8 == sizeof(bit) ) m_onBit64 = true;
- #if (m_onBit64)
- if ( m_onBit64 ) m_maxHash = MAX_HASH_64;
- #else
- m_maxHash = MAX_HASH_32;
- #endif
- m_bRemote = false;
- HashFunction = MD5HashFunction;
- m_pHashTable = NULL;
- Expand(size);
- }
- RHTable::~RHTable()
- {
- ReleaseHashTable();
- }
- void RHTable::SetHashFunction( pHashFunction hf )
- {
- HashFunction = hf;
- }
- unsigned long RHTable::NextPower(unsigned long size)
- {
- unsigned long i = 8;
- if (size >= m_maxHash) return m_maxHash;
- while(1)
- {
- if (i >= size) return i;
- i *= 2;
- }
- }
- unsigned int RHTable::DJBHash(const unsigned char *buf, int len)
- {
- unsigned int hash = 5381;
- while (len--)
- {
- hash = ((hash << 5) + hash) + (*buf++); /* hash * 33 + c */
- }
- return hash;
- }
- bool RHTable::KeyCmp( unsigned char *key1, int size1, unsigned char *key2, int size2 )
- {
- if ( size1 != size2 ) return false;
- int i = 0;
- for ( i = 0; i < size1; i++ )
- {
- if ( key1[i] != key2[i] ) return false;
- }
-
- return true;
- }
- bool RHTable::Expand(unsigned long size)
- {
- HASH_TABLE *pHashTable = new HASH_TABLE;
- if ( NULL == pHashTable ) return false;
- memset( pHashTable, 0, sizeof(HASH_TABLE) );
- size = NextPower(size);
- if ( NULL != m_pHashTable && m_pHashTable->size >= size )
- {
- if ( m_pHashTable->size * 16 > m_maxHash ) size = m_maxHash;
- else size = m_pHashTable->size * 16;
- }
-
- pHashTable->size = size;
- pHashTable->sizemask = size - 1;
- pHashTable->buckets = (BUCKET*)malloc(sizeof(BUCKET)*size);
- if ( NULL == pHashTable->buckets )
- {
- delete pHashTable;
- return false;
- }
- memset(pHashTable->buckets, 0, sizeof(BUCKET)*size);
- pHashTable->next = m_pHashTable;
- pHashTable->pre = NULL;
- if ( NULL != m_pHashTable )
- {
- m_pHashTable->pre = pHashTable;
- }
- m_pHashTable = pHashTable;
-
- return true;
- }
- void RHTable::SetRemoteMode( bool bRemote )
- {
- m_bRemote = bRemote;
- }
- unsigned int RHTable::RemoteHash( unsigned char *key, unsigned int size )
- {
- unsigned char hashKey[256];
- unsigned int hashSize;
- HashFunction( hashKey, hashSize, key, size );
- return DJBHash( hashKey, hashSize );
- }
- RHTable::ELEMENT* RHTable::Find(unsigned char* key, unsigned int size, unsigned int hashValue, bool bInsert )
- {
- BUCKET *pSaveBucket = NULL;
- ELEMENT *pFrontE = NULL;
- ELEMENT *pFindE = NULL;
- unsigned char hashKey[256];
- unsigned int hashSize;
- if ( !m_bRemote ) HashFunction( hashKey, hashSize, key, size );
- unsigned int idx = 0;
- HASH_TABLE *pHashTable = m_pHashTable;
- do
- {
- idx = hashValue&pHashTable->sizemask;
- if (NULL == pSaveBucket) pSaveBucket = &pHashTable->buckets[idx];
- pFindE = pHashTable->buckets[idx].head;
- while ( NULL != pFindE )
- {
- if ( pFindE->isDel
- || hashValue != pFindE->hashValue
- || !KeyCmp( pFindE->key, pFindE->keySize, key, size ) )
- {
- pFrontE = pFindE;
- pFindE = pFindE->next;
- continue;
- }
- break;
- }
- if ( NULL != pFindE ) break;//找到对象
- pHashTable = pHashTable->next;//旧表中查询
- }
- while (NULL != pHashTable);
- if ( NULL == pFindE ) //没有找到
- {
- if ( !bInsert ) return NULL;
- ELEMENT *pNew = pSaveBucket->head;
- while ( NULL != pNew )
- {
- if ( pNew->isDel ) break;
- pNew = pNew->next;
- }
- bool bNew = false;
- if ( NULL == pNew )
- {
- pNew = new ELEMENT;
- if ( NULL == pNew ) return NULL;
- pNew->next = pSaveBucket->head;
- bNew = true;
- }
- pNew->value = NULL;
- pNew->key = new unsigned char[size];
- if ( NULL == pNew->key )
- {
- delete pNew;
- return NULL;
- }
- pNew->hashValue = hashValue;
- memcpy( pNew->key, key, size );
- pNew->keySize = size;
- pNew->isDel = false;
- if ( bNew ) pSaveBucket->head = pNew;
- return pNew;
- }
- if ( pSaveBucket == &pHashTable->buckets[idx] ) return pFindE;
- //数据迁移
- if ( pHashTable->buckets[idx].head == pFindE ) pHashTable->buckets[idx].head = pFindE->next;
- else pFrontE->next = pFindE->next;
- pFindE->next = pSaveBucket->head;
- pSaveBucket->head = pFindE;
- pHashTable->count--;
- m_pHashTable->count++;
- if ( 0 == pHashTable->count ) ReleaseOldHashTable();
- return pFindE;
- }
- void RHTable::ReleaseOldHashTable()
- {
- HASH_TABLE *pEmpty = NULL;
- HASH_TABLE *pHashTable = m_pHashTable;
- while ( NULL != pHashTable->next )
- {
- if ( 0 < pHashTable->next->count )
- {
- pHashTable = pHashTable->next;
- continue;
- }
- pEmpty = pHashTable->next;
- if ( NULL != pHashTable->next->next )
- {
- pHashTable->next->next->pre = pHashTable->next->pre;
- }
- pHashTable->next = pHashTable->next->next;
- /*
- 不用遍历buckets中head
- 因为使count减小到 = 0的操作只有1种,就是3参数的Find(key,size,insert)
- 而Find就会将旧hash表中元素移到新表
- 所以count = 0就一定是所有head指针=NULL
- */
- delete pEmpty->buckets;
- delete pEmpty;
- }
- }
- RHTable::OP_R* RHTable::Insert(unsigned char *key, unsigned int size, void *value, unsigned int hashValue)
- {
- static OP_R res;
- res.bSuccess = false;
- res.pInsert = Find( key, size, hashValue, true );
- if ( NULL == res.pInsert ) return &res;
- if ( NULL != res.pInsert->value ) return &res;
- res.pInsert->value = value;
- res.bSuccess = true;
- m_pHashTable->count++;
- if ( 5 <= m_pHashTable->count / m_pHashTable->size )Expand(0);//平均碰撞最大容忍为5
- return &res;
- }
- void* RHTable::Find(unsigned char *key, unsigned int size, unsigned int hashValue )
- {
- ELEMENT *pFindE = Find( key, size, hashValue, false );
- if ( NULL == pFindE ) return NULL;
- return pFindE->value;
- }
- bool RHTable::Update(unsigned char *key, unsigned int size, void *value, unsigned int hashValue)
- {
- ELEMENT *pFindE = Find( key, size, hashValue, false );
- if ( NULL == pFindE ) return false;
- pFindE->value = value;
- return true;
- }
- void RHTable::Delete(unsigned char *key, unsigned int size, unsigned int hashValue)
- {
- ELEMENT *pFindE = Find( key, size, hashValue, false );
- if ( NULL == pFindE ) return;
- pFindE->isDel = true;
- pFindE->value = NULL;
- delete pFindE->key;
- pFindE->key = NULL;
- pFindE->keySize = 0;
- m_pHashTable->count--;
- return;
- }
- unsigned long RHTable::Count()
- {
- unsigned long count = 0;
- HASH_TABLE *pHashTable = m_pHashTable;
- for ( ; NULL != pHashTable; pHashTable = pHashTable->next )
- {
- count += pHashTable->count;
- }
-
- return count;
- }
- bool RHTable::IsEmpty()
- {
- return 0 == Count();
- }
- RHTable::Iterator RHTable::Begin()
- {
- m_it.pHeadHashTable = m_pHashTable;
- m_it.pHashTable = m_pHashTable;
- m_it.pElement = NULL;
- m_it.idx = 0;
-
- while ( NULL != m_it.pHashTable )
- {
- for ( ; m_it.idx < m_it.pHashTable->size; m_it.idx++ )
- {
- m_it.pElement = m_it.pHashTable->buckets[m_it.idx].head;
- while ( NULL != m_it.pElement )
- {
- if ( m_it.pElement->isDel )
- {
- m_it.pElement = m_it.pElement->next;
- continue;
- }
- return m_it;
- }
- }
- m_it.idx = 0;
- m_it.pHashTable = m_it.pHashTable->next;
- m_it.pElement = NULL;
- }
-
- return m_it;
- }
- RHTable::Iterator RHTable::End()
- {
- m_it.pHeadHashTable = m_pHashTable;
- m_it.pHashTable = NULL;
- m_it.pElement = NULL;
- m_it.idx = 0;
- return m_it;
- }
- void RHTable::Clear()
- {
- HASH_TABLE *pHashTable = m_pHashTable;
- ELEMENT *pElement = NULL;
- unsigned int idx = 0;
-
- while ( NULL != pHashTable )
- {
- for ( ; idx < pHashTable->size; idx++ )
- {
- pElement = pHashTable->buckets[idx].head;
- while ( NULL != pElement )
- {
- if ( !pElement->isDel )
- {
- pElement->isDel = true;
- pElement->value = NULL;
- delete pElement->key;
- pElement->key = NULL;
- pElement->keySize = 0;
- pHashTable->count--;
- }
- pElement = pElement->next;
- }
- }
- idx = 0;
- pHashTable = pHashTable->next;
- }
-
- return;
- }
- void RHTable::ReleaseHashTable()
- {
- HASH_TABLE *pEmpty = NULL;
- unsigned int i = 0;
- ELEMENT *pDelE = NULL;
- ELEMENT *pHead = NULL;
- while ( NULL != m_pHashTable )
- {
- for ( i = 0; i < m_pHashTable->size; i++ )
- {
- pHead = m_pHashTable->buckets[i].head;
- while ( NULL != pHead )
- {
- if ( NULL != pHead->key )
- {
- delete pHead->key;
- pHead->key = NULL;
- }
- pDelE = pHead;
- pHead = pHead->next;
- delete pDelE;
- pDelE = NULL;
- }
- }
- pEmpty = m_pHashTable;
- m_pHashTable = m_pHashTable->next;
- delete pEmpty->buckets;
- delete pEmpty;
- }
- }
- RHTable::Iterator& RHTable::Iterator::operator++(int)
- {
- ++(*this);
- return *this;
- }
- RHTable::Iterator& RHTable::Iterator::operator++()
- {
- if ( NULL == pElement ) return *this;
- pElement = pElement->next;
- while ( NULL != pHashTable )
- {
- for ( ; idx < pHashTable->size; )
- {
- while ( NULL != pElement )
- {
- if ( pElement->isDel )
- {
- pElement = pElement->next;
- continue;
- }
- return *this;
- }
- idx++;
- pElement = pHashTable->buckets[idx].head;
- }
- idx = 0;
- pElement = NULL;
- pHashTable = pHashTable->next;
- if ( NULL == pHashTable ) break;
- pElement = pHashTable->buckets[idx].head;
- }
- return *this;
- }
- RHTable::Iterator& RHTable::Iterator::operator--(int)
- {
- --(*this);
- return *this;
- }
- RHTable::Iterator& RHTable::Iterator::operator--()
- {
- unsigned int i = idx;
- HASH_TABLE *pCurTable = pHashTable;
- if ( NULL == pElement && 0 == idx && NULL == pHashTable )
- {
- pCurTable = pHeadHashTable;
- while ( NULL != pCurTable->next ) pCurTable = pCurTable->next;
- i = pCurTable->sizemask;
- }
- ELEMENT *pCur = pCurTable->buckets[i].head;
- ELEMENT *pPre = NULL;
- while ( NULL != pCurTable )
- {
- for ( ; 0 <= i && i < pCurTable->size; )
- {
- while ( NULL != pCur && pCur != pElement )
- {
- if ( !pCur->isDel ) pPre = pCur;
- pCur = pCur->next;
- }
- if ( NULL != pPre || 0 == i ) break;
- i--;
- pCur = pCurTable->buckets[i].head;
- }
- if ( NULL != pPre || NULL == pCurTable->pre ) break;
- pCurTable = pCurTable->pre;
- i = pCurTable->sizemask;
- pCur = pCurTable->buckets[i].head;
- }
- if ( NULL != pPre )
- {
- pHashTable = pCurTable;
- pElement = pPre;
- idx = i;
- }
- return *this;
- }
- bool RHTable::Iterator::operator==(RHTable::Iterator it)
- {
- if ( pElement != it.pElement ) return false;
- if ( idx != it.idx ) return false;
- if ( pHashTable != it.pHashTable ) return false;
- return true;
- }
- bool RHTable::Iterator::operator!=(Iterator it)
- {
- if ( pElement != it.pElement ) return true;
- if ( idx != it.idx ) return true;
- if ( pHashTable != it.pHashTable ) return true;
- return false;
- }
复制代码 FixLengthInt.h
- // Copyright [2012] <xiaoxie820125@sina.com>
- #ifndef MDK_FIXLENGTHINT_H
- #define MDK_FIXLENGTHINT_H
- #ifndef WIN32
- #include <sys/types.h>
- #endif
- namespace mdk
- {
-
- typedef char int8;
- typedef unsigned char uint8;
- typedef short int16;
- typedef unsigned short uint16;
- typedef int int32;
- typedef unsigned int uint32;
- #ifdef WIN32
- typedef __int64 int64;
- typedef unsigned __int64 uint64;
- #else
- #include <sys/types.h>
- typedef int64_t int64;
- typedef u_int64_t uint64;
- #endif
- }//namespace mdk
-
- #endif // MDK_FIXLENGTHINT_H
复制代码 |
本帖子中包含更多资源
您需要 登录 才可以下载或查看,没有账号?用户注册
×
|