杜鹃散列

xiaoxiao2021-02-28  107

杜鹃散列表通常通过一个拥有多个散列函数的大表来实现,这些散列函数探测整个大表。

插入时首先依次尝试所有散列函数,一旦找到空位便插入。如果试完所有散列函数仍未找到空位,便随机踢出一个,记录最后被踢出的位置(如果下次随机出来的位置与最后被踢出的位置相同,则重新随机)然后将被踢出去的项插入散列表,重复上述步骤。

如果经过一定次数的尝试,仍未成功插入,则更换一组散列函数(不改变表的大小)。若更换五组散列函数后仍未成功,则扩大表的大小(不改变散列函数),重新尝试。

生成一组散列函数的类接口

template <typename AnyType> class CuckooHashFamily { public: size_t hash(const AnyType & x,int which)const; int getNumberOfFunction(); void generateNewFunction(); };

字符串散列

template <int count> class StringHashFamily { public: StringHashFamily():MULTIPLIERS(count)//每组散列函数个数count { generateNewFunctions(); } int getNumberOfFunctions()const { return count; } void generateNewFunctions() { for(auto & mult:MULTIPLIERS) mult=r.nextInt(); } size_t hash(const std::string & x,int which)const//散列函数 { const int multiplier=MULTIPLIERS[which]; size_t hashVal=0; for(auto ch:x) hashVal=multiplier*hashVal+ch; return hashVal; } private: std::vector<int> MULTIPLIERS: UniformRandom r; };

完整代码

#include <vector> bool isPrime(int size) { for(int i=3;i*i<=size;i+=2) { if(size%i==0) return false; } return true; } int nextPrime(int size) { if(size<=2) return 2; if(size%2==0) size++; while(!isPrime(size)) { size+=2; } return size; } template <typename AnyType> class CuckooHashFamily { public: size_t hash(const AnyType & x,int which)const; int getNumberOfFunction(); void generateNewFunction(); }; template <int count> class StringHashFamily { public: StringHashFamily():MULTIPLIERS(count) { generateNewFunctions(); } int getNumberOfFunctions()const { return count; } void generateNewFunctions() { for(auto & mult:MULTIPLIERS) mult=r.nextInt(); } size_t hash(const std::string & x,int which)const { const int multiplier=MULTIPLIERS[which]; size_t hashVal=0; for(auto ch:x) hashVal=multiplier*hashVal+ch; return hashVal; } private: std::vector<int> MULTIPLIERS: UniformRandom r; }; template <typename AnyType,typename HashFamily> class CuckooHashTable { public: explicit CuckooHashTable(int size=101):array(nextPrime(size)) { numHashFunctions=hashFunctions.getNumberOfFunctions(); rehashes=0; makeEmpty(); } void makeEmpty() { currentSize=0; for(auto & entry:array) { entry.isActive=false; } } bool contains(const AnyType & x)const { return findPos(x)!=-1; } bool remove(const AnyType & x) { int currentPos=findPos(x); if(currentPos==-1) return false; array[currentPos].isActive=false; --currentPos; return true; } bool insert(const AnyType & x) { if(contains(x)) return false; if(currentSize>=array.size()*MAX_lOAD) expand(); return insertHelper1(x); } bool insert(AnyType && x); private: struct HashEntry { AnyType element; bool isActive; HashEntry(const AnyType & e=AnyType{},bool a=false):element(e),isActive(a){} HashEntry(AnyType && e,bool a=false):element(std::move(e)),isActive(a){} }; //插入例程 bool insertHelper1(const AnyType & xx) { const int COUNT_LIMIT=100;//踢出尝试次数 AnyType x=xx; while(true) { int lastPos=-1; int pos; for(int count=0;count<COUNT_LIMIT;++count) { for(int i=0;i<numHashFunctions;++i) { pos=myhash(x, i); if(!isActive(pos)) { array[pos]=std::move(HashEntry{std::move(x),true}); ++currentSize; return true; } } int i=0;//如果只有两个散列函数而且两个散列函数计算的位置相同,则可能陷入无限循环,因此需要限制循环次数。 do { pos=myhash(x,r.nextInt(numHashFunctions));//随机找出一项踢出 }while(pos==lastPos&&i++<5); lastPos=pos;//记录最后被踢出的位置 std::swap(x,array[pos].element);//踢出一项 } if(++rehashes>ALLOWED_REHASHS) { expand(); rehashes=0; } else rehash(); } } bool insertHelper1(AnyType && xx); bool isActive(int currentPos)const { return array[currentPos].isActive; } size_t myhash(const AnyType & x,int which)const { return hashFunctions.hash(x,which)%array.size(); } int findPos(const AnyType & x)const { for(int i=0;i<numHashFunctions;++i) { int pos=myhash(x,i); if(isActive(pos)&&array[pos].element==x) return pos; } return -1; } void expand() { rehash(static_cast<int>(array.size()/MAX_lOAD)); } void rehash() { hashFunctions.generateNewFunctions(); rehash(array.size()); } void rehash(int newSize) { std::vector<HashEntry> oldArray=array; array.resize(nextPrime(newSize)); for(auto & entry:array) entry.isActive=false; currentSize=0; for(auto & entry:oldArray) { if(entry.isActive) insert(std::move(entry.element)); } } constexpr static const double MAX_lOAD=0.40; static const int ALLOWED_REHASHS=5; std::vector<HashEntry> array; int currentSize; int numHashFunctions; int rehashes; UniformRandom r; HashFamily hashFunctions; };
转载请注明原文地址: https://www.6miu.com/read-21828.html

最新回复(0)