远程hash表,打造比redis更快的nosql内核
本帖最后由 serverdev2012 于 2012-12-7 17:29 编辑hash算法的复杂度是O(n),作用就是计算数据在数组中的下标
而数组操作复杂度是O(1)
也就是hash表的复杂度是O(n)
其实就是胖client瘦server的设计
远程hash表利用将hash计算的任务丢给client做的思想,
将nosql服务器端的hash操作复杂度从O(n)降到O(1),大大提升效率
核心差别就是以下红色部分,设置了远程模式,则不计算hash,直接使用client传递过来的hashValue参数。
实际测试效果是redis dict效率的10倍 md5算法hash表的4倍
RHTable::ELEMENT* RHTable::Find(unsigned char* key, unsigned int size, unsigned int hashValue,bool bInsert )
{
BUCKET *pSaveBucket = NULL;
ELEMENT *pFrontE = NULL;
ELEMENT *pFindE = NULL;
unsigned char hashKey;
unsigned int hashSize;
if ( !m_bRemote ) HashFunction( hashKey, hashSize, key, size );
unsigned int idx = 0;
HASH_TABLE *pHashTable = m_pHashTable;
do
以下是完整代码
RHTable.h// RHTable.h: interface for the RHTable class.
//
//远程哈稀表
//hash算法可以由远端程序计算好,直接传递int型hash值到hash服务器,
//可实现hash值的分布式计算
//
//不使用远程模式,则与redis内置哈稀表(dict.c/h)类似,但效率更高
//
//使用远程模式实现Nosql服务器,可大大提高查询效率
// 根据选用的hash算法不同,提高效果不同
// 选用md5算法,查询效率为普通模式4倍
// 选用sha1算法,查询效率为普通模式9倍
//
//////////////////////////////////////////////////////////////////////
#ifndef HASHTABLE_H
#define HASHTABLE_H
#include "FixLengthInt.h"
#ifndef NULL
#define NULL 0
#endif
#ifdef WIN32
#include <windows.h>
#else
#endif
#define MAX_HASH_64 0x7fffffffffffffff
#define MAX_HASH_32 0x7fffffff
typedef void (*pHashFunction)(unsigned char *hashKey, unsigned int &hashSize, unsigned char *key, unsigned int size );
class RHTable
{
struct ELEMENT;
struct HASH_TABLE;
public:
//操作结果
typedef struct OP_R
{
ELEMENT *pInsert;
bool bSuccess;
}OP_R;
//迭代器
typedef struct Iterator
{
Iterator& operator++();
Iterator& operator++(int);
Iterator& operator--();
Iterator& operator--(int);
bool operator==(Iterator it);
bool operator!=(Iterator it);
ELEMENT *pElement;
private:
friend class RHTable;
unsigned int idx;
HASH_TABLE *pHashTable;
HASH_TABLE *pHeadHashTable;
}Iterator;
private:
//元素
typedef struct ELEMENT
{
unsigned int hashValue;
unsigned char *key;
unsigned short keySize;
void *value;
private:
friend class RHTable;
friend struct Iterator;
bool isDel;
ELEMENT *next;//同一个桶内下一个元素
}ELEMENT;
//桶,保存发生hash碰撞的元素
typedef struct BUCKET
{
ELEMENT *head;//桶中元素链表头
}BUCKET;
//哈稀表
typedef struct HASH_TABLE
{
BUCKET *buckets;//hash数组
unsigned long size;//hash数组大小
unsigned long sizemask;//掩码size的全1表示
HASH_TABLE *pre;//前一个表
HASH_TABLE *next;//下一个表(旧hash表)
unsigned long count;//实际元素数量
}HASH_TABLE;
public:
RHTable();
RHTable( unsigned long size );
virtual ~RHTable();
void SetHashFunction( pHashFunction hf );
unsigned int RemoteHash(unsigned char *key, unsigned int size);//远程计算hash,胖client瘦Server模式
void SetRemoteMode( bool bRemote );
public:
OP_R* Insert(unsigned char *key, unsigned int size, void *value, unsigned int hashValue = 0);
void* Find(unsigned char *key, unsigned int size, unsigned int hashValue = 0 );
bool Update(unsigned char *key, unsigned int size, void *value, unsigned int hashValue = 0);
void Delete(unsigned char *key, unsigned int size, unsigned int hashValue = 0);
unsigned long Count();
bool IsEmpty();
void Clear();
Iterator Begin();
Iterator End();
public:
unsigned long NextPower(unsigned long size);//比size大的最小的2的n次幂数
unsigned int DJBHash(const unsigned char *buf, int len);//C33算法hash转换函数
bool KeyCmp( unsigned char *key1, int size1, unsigned char *key2, int size2 );//相同返回true
bool Expand(unsigned long size);
ELEMENT* Find(unsigned char *key, unsigned int size, unsigned int hashValue, bool bInsert );
void ReleaseOldHashTable();//旧hash表如果为null则释放
void ReleaseHashTable();
//哈稀算法函数指针(默认md5)
void (*HashFunction)(unsigned char *hashKey, unsigned int &hashSize,
unsigned char *key, unsigned int size );
private:
HASH_TABLE *m_pHashTable;
bool m_onBit64;
unsigned long m_maxHash;
Iterator m_it;
bool m_bRemote;
};
#endif // !defined(HASHTABLE_H)
RHTable.cpp
// RHTable.cpp: implementation of the RHTable class.
//
//////////////////////////////////////////////////////////////////////
#include "RHTable.h"
#include <cstdio>
#include <cstring>
#include <cstdlib>
#include "md5.h"
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
RHTable::RHTable()
{
m_onBit64 = false;
int *bit = NULL;
if ( 8 == sizeof(bit) ) m_onBit64 = true;
#if (m_onBit64)
if ( m_onBit64 ) m_maxHash = MAX_HASH_64;
#else
m_maxHash = MAX_HASH_32;
#endif
m_bRemote = false;
HashFunction = MD5HashFunction;
m_pHashTable = NULL;
Expand(0);
}
RHTable::RHTable(unsigned long size)
{
m_onBit64 = false;
int *bit = NULL;
if ( 8 == sizeof(bit) ) m_onBit64 = true;
#if (m_onBit64)
if ( m_onBit64 ) m_maxHash = MAX_HASH_64;
#else
m_maxHash = MAX_HASH_32;
#endif
m_bRemote = false;
HashFunction = MD5HashFunction;
m_pHashTable = NULL;
Expand(size);
}
RHTable::~RHTable()
{
ReleaseHashTable();
}
void RHTable::SetHashFunction( pHashFunction hf )
{
HashFunction = hf;
}
unsigned long RHTable::NextPower(unsigned long size)
{
unsigned long i = 8;
if (size >= m_maxHash) return m_maxHash;
while(1)
{
if (i >= size) return i;
i *= 2;
}
}
unsigned int RHTable::DJBHash(const unsigned char *buf, int len)
{
unsigned int hash = 5381;
while (len--)
{
hash = ((hash << 5) + hash) + (*buf++); /* hash * 33 + c */
}
return hash;
}
bool RHTable::KeyCmp( unsigned char *key1, int size1, unsigned char *key2, int size2 )
{
if ( size1 != size2 ) return false;
int i = 0;
for ( i = 0; i < size1; i++ )
{
if ( key1 != key2 ) return false;
}
return true;
}
bool RHTable::Expand(unsigned long size)
{
HASH_TABLE *pHashTable = new HASH_TABLE;
if ( NULL == pHashTable ) return false;
memset( pHashTable, 0, sizeof(HASH_TABLE) );
size = NextPower(size);
if ( NULL != m_pHashTable && m_pHashTable->size >= size )
{
if ( m_pHashTable->size * 16 > m_maxHash ) size = m_maxHash;
else size = m_pHashTable->size * 16;
}
pHashTable->size = size;
pHashTable->sizemask = size - 1;
pHashTable->buckets = (BUCKET*)malloc(sizeof(BUCKET)*size);
if ( NULL == pHashTable->buckets )
{
delete pHashTable;
return false;
}
memset(pHashTable->buckets, 0, sizeof(BUCKET)*size);
pHashTable->next = m_pHashTable;
pHashTable->pre = NULL;
if ( NULL != m_pHashTable )
{
m_pHashTable->pre = pHashTable;
}
m_pHashTable = pHashTable;
return true;
}
void RHTable::SetRemoteMode( bool bRemote )
{
m_bRemote = bRemote;
}
unsigned int RHTable::RemoteHash( unsigned char *key, unsigned int size )
{
unsigned char hashKey;
unsigned int hashSize;
HashFunction( hashKey, hashSize, key, size );
return DJBHash( hashKey, hashSize );
}
RHTable::ELEMENT* RHTable::Find(unsigned char* key, unsigned int size, unsigned int hashValue,bool bInsert )
{
BUCKET *pSaveBucket = NULL;
ELEMENT *pFrontE = NULL;
ELEMENT *pFindE = NULL;
unsigned char hashKey;
unsigned int hashSize;
if ( !m_bRemote ) HashFunction( hashKey, hashSize, key, size );
unsigned int idx = 0;
HASH_TABLE *pHashTable = m_pHashTable;
do
{
idx = hashValue&pHashTable->sizemask;
if (NULL == pSaveBucket) pSaveBucket = &pHashTable->buckets;
pFindE = pHashTable->buckets.head;
while ( NULL != pFindE )
{
if ( pFindE->isDel
|| hashValue != pFindE->hashValue
|| !KeyCmp( pFindE->key, pFindE->keySize, key, size ) )
{
pFrontE = pFindE;
pFindE = pFindE->next;
continue;
}
break;
}
if ( NULL != pFindE ) break;//找到对象
pHashTable = pHashTable->next;//旧表中查询
}
while (NULL != pHashTable);
if ( NULL == pFindE) //没有找到
{
if ( !bInsert ) return NULL;
ELEMENT *pNew = pSaveBucket->head;
while ( NULL != pNew )
{
if ( pNew->isDel ) break;
pNew = pNew->next;
}
bool bNew = false;
if ( NULL == pNew )
{
pNew = new ELEMENT;
if ( NULL == pNew ) return NULL;
pNew->next = pSaveBucket->head;
bNew = true;
}
pNew->value = NULL;
pNew->key = new unsigned char;
if ( NULL == pNew->key )
{
delete pNew;
return NULL;
}
pNew->hashValue = hashValue;
memcpy( pNew->key, key, size );
pNew->keySize = size;
pNew->isDel = false;
if ( bNew ) pSaveBucket->head = pNew;
return pNew;
}
if ( pSaveBucket == &pHashTable->buckets ) return pFindE;
//数据迁移
if ( pHashTable->buckets.head == pFindE ) pHashTable->buckets.head = pFindE->next;
else pFrontE->next = pFindE->next;
pFindE->next = pSaveBucket->head;
pSaveBucket->head = pFindE;
pHashTable->count--;
m_pHashTable->count++;
if ( 0 == pHashTable->count ) ReleaseOldHashTable();
return pFindE;
}
void RHTable::ReleaseOldHashTable()
{
HASH_TABLE *pEmpty = NULL;
HASH_TABLE *pHashTable = m_pHashTable;
while ( NULL != pHashTable->next )
{
if ( 0 < pHashTable->next->count )
{
pHashTable = pHashTable->next;
continue;
}
pEmpty = pHashTable->next;
if ( NULL != pHashTable->next->next )
{
pHashTable->next->next->pre = pHashTable->next->pre;
}
pHashTable->next = pHashTable->next->next;
/*
不用遍历buckets中head
因为使count减小到 = 0的操作只有1种,就是3参数的Find(key,size,insert)
而Find就会将旧hash表中元素移到新表
所以count = 0就一定是所有head指针=NULL
*/
delete pEmpty->buckets;
delete pEmpty;
}
}
RHTable::OP_R* RHTable::Insert(unsigned char *key, unsigned int size, void *value, unsigned int hashValue)
{
static OP_R res;
res.bSuccess = false;
res.pInsert = Find( key, size, hashValue, true );
if ( NULL == res.pInsert ) return &res;
if ( NULL != res.pInsert->value ) return &res;
res.pInsert->value = value;
res.bSuccess = true;
m_pHashTable->count++;
if ( 5 <= m_pHashTable->count / m_pHashTable->size )Expand(0);//平均碰撞最大容忍为5
return &res;
}
void* RHTable::Find(unsigned char *key, unsigned int size, unsigned int hashValue )
{
ELEMENT *pFindE = Find( key, size, hashValue, false );
if ( NULL == pFindE ) return NULL;
return pFindE->value;
}
bool RHTable::Update(unsigned char *key, unsigned int size, void *value, unsigned int hashValue)
{
ELEMENT *pFindE = Find( key, size, hashValue, false );
if ( NULL == pFindE ) return false;
pFindE->value = value;
return true;
}
void RHTable::Delete(unsigned char *key, unsigned int size, unsigned int hashValue)
{
ELEMENT *pFindE = Find( key, size, hashValue, false );
if ( NULL == pFindE ) return;
pFindE->isDel = true;
pFindE->value = NULL;
delete pFindE->key;
pFindE->key = NULL;
pFindE->keySize = 0;
m_pHashTable->count--;
return;
}
unsigned long RHTable::Count()
{
unsigned long count = 0;
HASH_TABLE *pHashTable = m_pHashTable;
for ( ; NULL != pHashTable; pHashTable = pHashTable->next )
{
count += pHashTable->count;
}
return count;
}
bool RHTable::IsEmpty()
{
return 0 == Count();
}
RHTable::Iterator RHTable::Begin()
{
m_it.pHeadHashTable = m_pHashTable;
m_it.pHashTable = m_pHashTable;
m_it.pElement = NULL;
m_it.idx = 0;
while ( NULL != m_it.pHashTable )
{
for ( ; m_it.idx < m_it.pHashTable->size; m_it.idx++ )
{
m_it.pElement = m_it.pHashTable->buckets.head;
while ( NULL != m_it.pElement )
{
if ( m_it.pElement->isDel )
{
m_it.pElement = m_it.pElement->next;
continue;
}
return m_it;
}
}
m_it.idx = 0;
m_it.pHashTable = m_it.pHashTable->next;
m_it.pElement = NULL;
}
return m_it;
}
RHTable::Iterator RHTable::End()
{
m_it.pHeadHashTable = m_pHashTable;
m_it.pHashTable = NULL;
m_it.pElement = NULL;
m_it.idx = 0;
return m_it;
}
void RHTable::Clear()
{
HASH_TABLE *pHashTable = m_pHashTable;
ELEMENT *pElement = NULL;
unsigned int idx = 0;
while ( NULL != pHashTable )
{
for ( ; idx < pHashTable->size; idx++ )
{
pElement = pHashTable->buckets.head;
while ( NULL != pElement )
{
if ( !pElement->isDel )
{
pElement->isDel = true;
pElement->value = NULL;
delete pElement->key;
pElement->key = NULL;
pElement->keySize = 0;
pHashTable->count--;
}
pElement = pElement->next;
}
}
idx = 0;
pHashTable = pHashTable->next;
}
return;
}
void RHTable::ReleaseHashTable()
{
HASH_TABLE *pEmpty = NULL;
unsigned int i = 0;
ELEMENT *pDelE = NULL;
ELEMENT *pHead = NULL;
while ( NULL != m_pHashTable )
{
for ( i = 0; i < m_pHashTable->size; i++ )
{
pHead = m_pHashTable->buckets.head;
while ( NULL != pHead )
{
if ( NULL != pHead->key )
{
delete pHead->key;
pHead->key = NULL;
}
pDelE = pHead;
pHead = pHead->next;
delete pDelE;
pDelE = NULL;
}
}
pEmpty = m_pHashTable;
m_pHashTable = m_pHashTable->next;
delete pEmpty->buckets;
delete pEmpty;
}
}
RHTable::Iterator& RHTable::Iterator::operator++(int)
{
++(*this);
return *this;
}
RHTable::Iterator& RHTable::Iterator::operator++()
{
if ( NULL == pElement ) return *this;
pElement = pElement->next;
while ( NULL != pHashTable )
{
for ( ; idx < pHashTable->size; )
{
while ( NULL != pElement )
{
if ( pElement->isDel )
{
pElement = pElement->next;
continue;
}
return *this;
}
idx++;
pElement = pHashTable->buckets.head;
}
idx = 0;
pElement = NULL;
pHashTable = pHashTable->next;
if ( NULL == pHashTable ) break;
pElement = pHashTable->buckets.head;
}
return *this;
}
RHTable::Iterator& RHTable::Iterator::operator--(int)
{
--(*this);
return *this;
}
RHTable::Iterator& RHTable::Iterator::operator--()
{
unsigned int i = idx;
HASH_TABLE *pCurTable = pHashTable;
if ( NULL == pElement && 0 == idx && NULL == pHashTable )
{
pCurTable = pHeadHashTable;
while ( NULL != pCurTable->next ) pCurTable = pCurTable->next;
i = pCurTable->sizemask;
}
ELEMENT *pCur = pCurTable->buckets.head;
ELEMENT *pPre = NULL;
while ( NULL != pCurTable )
{
for ( ; 0 <= i && i < pCurTable->size; )
{
while ( NULL != pCur && pCur != pElement )
{
if ( !pCur->isDel ) pPre = pCur;
pCur = pCur->next;
}
if ( NULL != pPre || 0 == i ) break;
i--;
pCur = pCurTable->buckets.head;
}
if ( NULL != pPre || NULL == pCurTable->pre ) break;
pCurTable = pCurTable->pre;
i = pCurTable->sizemask;
pCur = pCurTable->buckets.head;
}
if ( NULL != pPre )
{
pHashTable = pCurTable;
pElement = pPre;
idx = i;
}
return *this;
}
bool RHTable::Iterator::operator==(RHTable::Iterator it)
{
if ( pElement != it.pElement ) return false;
if ( idx != it.idx ) return false;
if ( pHashTable != it.pHashTable ) return false;
return true;
}
bool RHTable::Iterator::operator!=(Iterator it)
{
if ( pElement != it.pElement ) return true;
if ( idx != it.idx ) return true;
if ( pHashTable != it.pHashTable ) return true;
return false;
}
FixLengthInt.h
//Copyright <xiaoxie820125@sina.com>
#ifndef MDK_FIXLENGTHINT_H
#define MDK_FIXLENGTHINT_H
#ifndef WIN32
#include <sys/types.h>
#endif
namespace mdk
{
typedef char int8;
typedef unsigned char uint8;
typedef short int16;
typedef unsigned short uint16;
typedef int int32;
typedef unsigned int uint32;
#ifdef WIN32
typedef __int64 int64;
typedef unsigned __int64 uint64;
#else
#include <sys/types.h>
typedef int64_t int64;
typedef u_int64_t uint64;
#endif
}//namespace mdk
#endif// MDK_FIXLENGTHINT_H
不错!支持这种研究,非常有益。 winston 发表于 2012-12-5 19:30 static/image/common/back.gif
不错!支持这种研究,非常有益。
其实就一点小动作而已:lol
真正提高nosql效率,光这点还不够,io这个大头在哪里摆着
以下自吹不喜者请无视
虽然是小动作,不过很多时候决定胜负的其实就是一个简简单单的小动作:lol
serverdev2012 发表于 2012-12-5 20:08 static/image/common/back.gif
其实就一点小动作而已
真正提高nosql效率,光这点还不够,io这个大头在哪里摆着
细节是魔鬼。 winston 发表于 2012-12-5 22:10 static/image/common/back.gif
细节是魔鬼。
严重同意,魔鬼就是要让对手意想不到,而不是多高超
页:
[1]