C++ 哈希表原理与实现详解

unordered 系列关联式容器简介

在使用 unordered_map 和 unordered_set 之前，我们先回顾一下它们与红黑树实现的 map 和 set 的区别。两者底层结构不同：前者基于哈希表，后者基于平衡二叉搜索树。

主要差异在于：

迭代器：unordered 系列通常只支持单向迭代器，没有反向迭代器（rbegin/rend）。
有序性：unordered 系列遍历结果是无序的，而红黑树版本是有序的。
性能：哈希表在平均情况下查找、插入、删除的时间复杂度为 O(1)，优于红黑树的 O(logN)。

虽然用法相似，但理解其底层哈希机制对于掌握 STL 至关重要。

哈希基础概念

哈希（Hash），又称散列，本质上是将存储的值与存储位置建立映射关系。常见的搜索方式包括暴力查找（慢）、二分查找（需排序，增删不便）和平衡搜索树。哈希则通过函数计算直接定位数据位置。

哈希冲突与解决策略

理想情况下，每个键值都能映射到唯一位置，但实际中不同的 Key 可能计算出相同的 Hash 值，这称为哈希冲突。解决冲突主要有两种方法：

闭散列（开放定址法）：当发生冲突时，按照某种规则寻找下一个空闲位置。常见策略包括线性探测和二次探测。
开散列（拉链法/哈希桶）：将数组中的每个元素作为一个链表的头节点，冲突的元素挂在链表上。

负载因子

哈希表不能无限填充，否则冲突概率激增，效率下降。负载因子 = 已存元素个数 / 哈希表总大小。通常控制负载因子在 0.7~1.0 之间，超过阈值即触发扩容。

闭散列实现：线性探测

在闭散列中，我们需要处理三个状态：

EXIST：该位置有有效数据。
EMPTY：该位置从未被使用过。
DELETE：该位置曾经有数据，但已被删除。

注意：删除操作不能简单置空，否则会导致后续查找中断。必须标记为 DELETE，这样查找时遇到 DELETE 继续向后探测，遇到 EMPTY 才停止。

核心逻辑

插入：计算 Hash 值，若位置为 EXIST 则线性探测下一位；若为 EMPTY 或 DELETE 则填入数据。
查找：从 Hash 位置开始，若找到 Key 则返回；若遇到 EMPTY 则说明不存在。
删除：找到 Key 后，将状态改为 DELETE。
扩容：当负载因子达到阈值（如 0.7），创建新表，将所有有效数据重新映射到新表中（Rehash）。

// HashTable.h (Open Addressing Version)
#pragma once
#include <vector>



< >
  {
    {
         ()key;
    }
};


<>
 <string> {
    {
         hash = ;
         ( ch : str) {
            hash *= ; 
            hash += ch;
        }
         hash;
    }
};

 open_address {
      {
        EXIST,
        EMPTY,
        DELETE
    };

    < ,  >
      {
        pair<K, V> _kv;
        STATE _state = EMPTY;
    };

    < ,  ,   = DefaultHashFunc<K>>
     HashTable {
    :
        () {
            _table.();
        }

        {
             ((kv.first)) {
                 ;
            }
            
             (_n *  / _table.() >= ) {
                 newSize = _table.() * ;
                HashTable<K, V, HashFunc> newHT;
                newHT._table.(newSize);
                
                 ( i = ; i < _table.(); i++) {
                     (_table[i]._state == EXIST) {
                        newHT.(_table[i]._kv);
                    }
                }
                _table.(newHT._table);
            }

            HashFunc hf;
             hashi = (kv.first) % _table.();
             (_table[hashi]._state == EXIST) {
                ++hashi;
                hashi %= _table.();
            }
            _table[hashi]._kv = kv;
            _table[hashi]._state = EXIST;
            ++_n;
             ;
        }

        {
            HashFunc hf;
             hashi = (key) % _table.();
             (_table[hashi]._state != EMPTY) {
                 (_table[hashi]._state == EXIST && _table[hashi]._kv.first == key) {
                     (HashData< K, V>*)&_table[hashi];
                }
                ++hashi;
                hashi %= _table.();
            }
             ;
        }

        {
            HashData< K, V>* ret = (key);
             (ret) {
                ret->_state = DELETE;
                --_n;
                 ;
            }
             ;
        }

    :
        vector<HashData<K, V>> _table;
         _n = ;
    };
}

// HashTable.h (Chaining Version) namespace hash_bucket { template<class T> struct HashNode { T _data; HashNode<T>* _next; HashNode(const T& data) :_data(data), _next(nullptr) {} }; template<class K, class T, class KeyOfT, class HashFunc = DefaultHashFunc<K>> class HashTable { typedef HashNode<T> Node; public: HashTable() { _table.resize(10, nullptr); } ~HashTable() { for (size_t i = 0; i < _table.size(); i++) { Node* cur = _table[i]; while (cur) { Node* next = cur->_next; delete cur; cur = next; } _table[i] = nullptr; } } pair<iterator, bool> Insert(const T& data) { KeyOfT kot; iterator it = Find(kot(data)); if (it != end()) { return make_pair(it, false); } HashFunc hf; // 负载因子为 1 时扩容 if (_n == _table.size()) { size_t newSize = _table.size() * 2; vector<Node*> newTable(newSize, nullptr); for (size_t i = 0; i < _table.size(); i++) { Node* cur = _table[i]; while (cur) { Node* next = cur->_next; size_t hashi = hf(kot(cur->_data)) % newSize; cur->_next = newTable[hashi]; newTable[hashi] = cur; cur = next; } _table[i] = nullptr; } _table.swap(newTable); } size_t hashi = hf(kot(data)) % _table.size(); Node* newnode = new Node(data); newnode->_next = _table[hashi]; _table[hashi] = newnode; ++_n; return make_pair(iterator(newnode, this), true); } iterator Find(const K& key) { HashFunc hf; KeyOfT kot; size_t hashi = hf(key) % _table.size(); Node* cur = _table[hashi]; while (cur) { if (kot(cur->_data) == key) { return iterator(cur, this); } cur = cur->_next; } return end(); } bool Erase(const K& key) { HashFunc hf; KeyOfT kot; size_t hashi = hf(key) % _table.size(); Node* prev = nullptr; Node* cur = _table[hashi]; while (cur) { if (kot(cur->_data) == key) { if (prev == nullptr) { _table[hashi] = cur->_next; } else { prev->_next = cur->_next; } delete cur; --_n; return true; } prev = cur; cur = cur->_next; } return false; } private: vector<Node*> _table; size_t _n = 0; }; }

C++ 哈希表原理与实现详解