C++ 哈希表封装实战：unordered_map/set 底层原理与完整实现

前言

STL 中的 unordered_map 和 unordered_set 以高效的增删查性能（平均 O(1) 时间复杂度）成为高频使用的关联式容器，其底层核心是哈希表（哈希桶）。但很多开发者只知其然不知其所以然 —— 如何基于哈希表封装出支持 key-value 存储和 key-only 存储的两种容器？如何解决哈希冲突？如何保证 key 的唯一性？本文结合核心思路，从哈希表的泛型设计入手，一步步拆解 myunordered_map 和 myunordered_set 的封装逻辑，包括哈希函数适配、冲突解决、迭代器实现、key 约束等关键细节，附完整可运行代码，帮你吃透哈希表在容器封装中的实战应用。

一。源码及框架分析

SGI-STL30 版本源代码中没有 unordered_map 和 unordered_set，SGI-STL30 版本是 C++11 之前的 STL 版本，这两个容器是 C++11 之后才更新的。但是 SGI-STL30 实现了哈希表，只容器的名字是 hash_map 和 hash_set，他是作为非标准的容器出现的，非标准是指非 C++ 标准规定必须实现的，源代码在 hash_map/hash_set/stl_hash_map/stl_hash_set/stl_hashtable.h 中。

hash_map 和 hash_set 的实现结构框架核心部分截取出来如下：

// stl_hash_set
template<class Value,class HashFcn= hash<Value>,class EqualKey= equal_to<Value>,class Alloc= alloc>
class hash_set{
private:
    typedef hashtable<Value, Value, HashFcn, identity<Value>, EqualKey, Alloc> ht;
    ht rep;
public:
    typedef typename ht::key_type key_type;
    typedef typename ht::value_type value_type;
    typedef typename ht::hasher hasher;
    typedef typename ht::key_equal key_equal;
    typedef typename ht::const_iterator iterator;
    typedef typename ht::const_iterator const_iterator;
    hasher hash_funct()const{return rep.hash_funct();}
    { rep.();}
};


< , , = hash<Key>, EqualKey= equal_to<Key>, Alloc= alloc>
 hash_map{
:
     hashtable<pair< Key, T>, Key, HashFcn, select1st<pair< Key, T>>, EqualKey, Alloc> ht;
    ht rep;
:
      ht::key_type key_type;
     T data_type;
     T mapped_type;
      ht::value_type value_type;
      ht::hasher hasher;
      ht::key_equal key_equal;
      ht::iterator iterator;
      ht::const_iterator const_iterator;
};


< , , , , , >
 {
:
     Key key_type;
     Value value_type;
     HashFcn hasher;
     EqualKey key_equal;
:
    hasher hash;
    key_equal equals;
    ExtractKey get_key;
     __hashtable_node<Value> node;
    vector<node*, Alloc> buckets;
    size_type num_elements;
:
     __hashtable_iterator<Value, Key, HashFcn, ExtractKey, EqualKey, Alloc> iterator;
    pair<iterator,>( value_type& obj);
    ;
};
< >
struct__hashtable_node{
    __hashtable_node* next;
    Value val;
};

// HashTable.h #pragma once #include<iostream> #include<vector> #include<algorithm> using namespace std; // 质数表 (SGI STL 同款，用于扩容) static const int __stl_num_primes = 28; static const unsigned long __stl_prime_list[__stl_num_primes] = { 53,97,193,389,769,1543,3079,6151,12289,24593,49157,98317, 196613,393241,786433,1572869,3145739,6291469,12582917, 25165843,50331653,100663319,201326611,402653189,805306457, 1610612741,3221225473,4294967291 }; inline unsigned long __stl_next_prime(unsigned long n){ const unsigned long* first = __stl_prime_list; const unsigned long* last = __stl_prime_list + __stl_num_primes; // >= n const unsigned long* pos = lower_bound(first, last, n); return pos == last ? *(last - 1) : *pos; } // 哈希函数仿函数 template<class K> struct HashFunc{ size_t operator()(const K& key){ return (size_t)key; // 默认直接转换 } }; // 特化 string 类型的哈希函数 template<> struct HashFunc<string>{ // BKDR 字符串哈希算法 size_t operator()(const string& key){ size_t hash = 0; for(auto ch : key){ hash += ch; // 累加字符 ASCII 码 hash *= 131; // 乘质数 131，减少冲突 } return hash; } }; namespace hash_bucket { template<class T> struct HashNode{ T _data; HashNode<T>* _next; HashNode(const T& data):_data(data),_next(nullptr){} }; template<class K,class T,class KeyofT,class Hash= HashFunc<K>> class HashTable{ typedef HashNode<T> Node; HashTable():_tables(__stl_next_prime(1),nullptr),_n(0){} ~HashTable(){ for(size_t i = 0; i < _tables.size(); i++){ Node* cur = _tables[i]; while(cur){ Node* next = cur->_next; delete cur; cur = next; } _tables[i]=nullptr; } _n = 0; } bool Insert(const T& data){ KeyofT kot; Hash hs; // 先查找，避免重复插入 if(Find(kot(data))) return false; // 负载因子 == 1 就开始扩容 if(_n == _tables.size()){ std::vector<Node*> newtables(__stl_next_prime(_tables.size()+1),nullptr); for(size_t i = 0; i < _tables.size(); i++){ // 遍历旧表，旧表节点重新映射，挪动到新表 Node* cur = _tables[i]; while(cur){ Node* next = cur->_next; // 头插 size_t hashi = hs(kot(cur->_data)) % newtables.size(); cur->_next = newtables[hashi]; newtables[hashi] = cur; cur = next; } _tables[i]=nullptr; } _tables.swap(newtables); } size_t hashi = hs(kot(data)) % _tables.size(); // 头插 Node* newnode = new Node(data); newnode->_next = _tables[hashi]; _tables[hashi] = newnode; ++_n; return true; } private: std::vector<Node*> _tables; size_t _n; }; }

#pragma once #include<iostream> #include<vector> #include<algorithm> using namespace std; // 质数表 (SGI STL 同款，用于扩容) static const int __stl_num_primes = 28; static const unsigned long __stl_prime_list[__stl_num_primes] = { 53,97,193,389,769,1543,3079,6151,12289,24593,49157,98317, 196613,393241,786433,1572869,3145739,6291469,12582917, 25165843,50331653,100663319,201326611,402653189,805306457, 1610612741,3221225473,4294967291 }; inline unsigned long __stl_next_prime(unsigned long n){ const unsigned long* first = __stl_prime_list; const unsigned long* last = __stl_prime_list + __stl_num_primes; // >= n const unsigned long* pos = lower_bound(first, last, n); return pos == last ? *(last - 1) : *pos; } // 哈希函数仿函数 template<class K> struct HashFunc{ size_t operator()(const K& key){ return (size_t)key; // 默认直接转换 } }; // 特化 string 类型的哈希函数 template<> struct HashFunc<string>{ // BKDR 字符串哈希算法 size_t operator()(const string& key){ size_t hash = 0; for(auto ch : key){ hash += ch; // 累加字符 ASCII 码 hash *= 131; // 乘质数 131，减少冲突 } return hash; } }; namespace hash_bucket { template<class T> struct HashNode{ T _data; HashNode<T>* _next; HashNode(const T& data):_data(data),_next(nullptr){} }; // 前置声明 template<class K,class T,class KeyofT,class Hash> class HashTable; template<class K,class T,class Ref,class Ptr,class KeyofT,class Hash> struct HTIterator{ typedef HashNode<T> Node; typedef HashTable<K, T, KeyofT, Hash> HT; typedef HTIterator<K, T, Ref, Ptr, KeyofT, Hash> Self; Node* _node; const HT* _pht; HTIterator(Node* node,const HT* pht):_node(node),_pht(pht){} Ref operator*(){return _node->_data;} Ptr operator->(){return&_node->_data;} Self& operator++(){ if(_node->_next)//当前桶没走完 { _node = _node->_next; }else//当前桶走完了，找到下一个桶的第一个结点 { KeyofT kot; Hash hs; // 算出当前位置 size_t hashi = hs(kot(_node->_data)) % _pht->_tables.size(); // ++到下一个位置 ++hashi; while(hashi < _pht->_tables.size()){ if(_pht->_tables[hashi])// 找到下一个不为空的桶 { _node = _pht->_tables[hashi]; break; }else{ ++hashi; } } if(hashi == _pht->_tables.size())// 最后一个桶走完了，要++到 end() 位置 {// end() 中_node 是空 _node = nullptr; } } return *this; } bool operator!=(const Self& s)const{return _node != s._node;} bool operator==(const Self& s)const{return _node == s._node;} }; template<class K,class T,class KeyofT,class Hash= HashFunc<K>> class HashTable{ // 友元声明 template<class K,class T,class Ref,class Ptr,class KeyofT,class Hash> friend struct HTIterator; typedef HashNode<T> Node; public: typedef HTIterator<K, T, T&, T*, KeyofT, Hash> Iterator; typedef HTIterator<K, T,const T&,const T*, KeyofT, Hash> ConstIterator; Iterator Begin(){ if(_n == 0){return End();} for(size_t i = 0; i < _tables.size(); i++){ if(_tables[i]){ return Iterator(_tables[i],this); } } return End(); } Iterator End(){return Iterator(nullptr,this);} ConstIterator Begin()const{ if(_n == 0){return End();} for(size_t i = 0; i < _tables.size(); i++){ if(_tables[i]){ return ConstIterator(_tables[i],this); } } return End(); } ConstIterator End()const{return ConstIterator(nullptr,this);} HashTable():_tables(__stl_next_prime(1),nullptr),_n(0){} ~HashTable(){ for(size_t i = 0; i < _tables.size(); i++){ Node* cur = _tables[i]; while(cur){ Node* next = cur->_next; delete cur; cur = next; } _tables[i]=nullptr; } _n = 0; } pair<Iterator,bool>Insert(const T& data){ KeyofT kot; Hash hs; // 先查找，避免重复插入 if(auto it = Find(kot(data));it!=End())return{it,false}; // 负载因子 == 1 就开始扩容 if(_n == _tables.size()){ std::vector<Node*> newtables(__stl_next_prime(_tables.size()+1),nullptr); for(size_t i = 0; i < _tables.size(); i++){ // 遍历旧表，旧表节点重新映射，挪动到新表 Node* cur = _tables[i]; while(cur){ Node* next = cur->_next; // 头插 size_t hashi = hs(kot(cur->_data)) % newtables.size(); cur->_next = newtables[hashi]; newtables[hashi] = cur; cur = next; } _tables[i]=nullptr; } _tables.swap(newtables); } size_t hashi = hs(kot(data)) % _tables.size(); // 头插 Node* newnode = new Node(data); newnode->_next = _tables[hashi]; _tables[hashi] = newnode; ++_n; return{Iterator(newnode,this),true}; } Iterator Find(const K& key){ KeyofT kot; Hash hs; size_t hashi = hs(key) % _tables.size(); Node* cur = _tables[hashi]; while(cur){ if(kot(cur->_data)== key){return{ cur ,this};} cur = cur->_next; } return{nullptr,this}; } bool Erase(const K& key){ KeyofT kot; Hash hs; size_t hashi = hs(key) % _tables.size(); Node* prev = nullptr; Node* cur = _tables[hashi]; while(cur){ if(kot(cur->_data)== key){ // 删除 if(prev == nullptr){ // 桶中第一个节点 _tables[hashi] = cur->_next; }else{ prev->_next = cur->_next; } --_n; delete cur; return true; } prev = cur; cur = cur->_next; } return false; } private: std::vector<Node*> _tables; size_t _n; }; }

C++ 哈希表封装实战：unordered_map/set 底层原理与完整实现