C++ 哈希表详解：概念、哈希函数与冲突解决

QQ20250923-120240

1. 哈希表的概念

哈希表（又称散列表）是一种基于「键值对（Key-Value）」存储的数据结构，其核心目标是通过哈希函数将「键（Key）」直接映射到对应的存储位置，从而实现 O(1) 级别的平均查找、插入和删除效率，是计算机科学中效率最高的数据结构之一。

1.1 哈希函数（Hash Function）

哈希函数是哈希表的核心组件，其本质是一个数学函数，作用是将「任意类型、任意长度的关键字（Key）」转换为「固定范围、可直接作为底层数组索引的整数（哈希值 / Hash Value）」，从而实现'通过 Key 快速定位存储位置'的目标。

1.2 哈希冲突（Hash Collision）

哈希冲突是指不同的 Key 经过哈希函数计算后，得到了相同的哈希值的现象。它不是'设计失误'，而是数学上的必然结果。

1.3 负载因子（Load Factor）

假设哈希表中已经映射存储了 N 个值，哈希表的大小为 M，那么 $\text{Load Factor} = N/M$。负载因子有些地方也翻译为载荷因子/装载因子等。负载因子越大，哈希冲突的概率越高，空间利用率越高；负载因子越小，哈希冲突的概率越低，空间利用率越低。负载因子的关键应用是触发哈希表进行扩容。

2. 哈希函数

哈希函数是哈希表的'核心引擎'，作用是：把任意类型的'键（Key）'（比如整数、字符串、对象），转换成一个固定范围的整数（称为'哈希值'或'索引'），这个索引直接对应底层存储数组的位置。

哈希函数的设计要求：

确定性：同一个 Key 每次输入哈希函数，必须得到相同的索引（若结果随机，则无法查找）。
均匀性：尽量将不同的 Key 映射到不同的索引，减少「哈希冲突」。
高效性：哈希函数的计算过程必须快速（如简单的取模、位运算），否则会抵消哈希表的效率优势。

2.1 直接定址法（Direct Addressing）

直接定址法是最直观的哈希函数构造方式，其核心是'关键字与哈希地址直接关联'，无需复杂计算，是理解哈希函数设计的基础。

直接定址法通过关键字本身或关键字的线性变换直接作为哈希地址，它的本质是建立关键字与哈希地址的线性映射关系：每个关键字通过公式计算后，会映射到唯一的哈希地址（数组索引），且不同关键字的哈希地址一定不同，这种'一一对应'的特性决定了：直接定址法不会产生哈希冲突（这是它与其他哈希函数的核心区别）。

理解映射过程：例如我们要统计一个字符串中每个字符出现的次数（确保字符串中都是小写字母），我们可以以字符的 ASCII 码值作为关键字，因为小写字母的 ASCII 码值是从 97 到 123，所以我们可以通过简单的线性变换将其关键字映射到大小为 26 的数组中：

QQ20250927-002527

优点：

计算高效：仅需一次线性运算（或直接使用关键字），几乎无额外开销，是所有哈希函数中计算最快的；
无冲突：由于映射关系是一一对应，完全避免哈希冲突，无需设计冲突解决机制；

namespace open_address { enum State { EXIST, EMPTY, DELETE }; template<class K, class V> struct HashData { pair<K, V> _kv; State _state = EMPTY; }; template<class K, class V, class Hash = HashFunc<K>> class HashTable { public: inline unsigned long __stl_next_prime(unsigned long n) { static const int __stl_num_primes = 28; static const unsigned long __stl_prime_list[__stl_num_primes] = { 53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593, 49157, 98317, 196613, 393241, 786433, 1572869, 3145739, 6291469, 12582917, 25165843, 50331653, 100663319, 201326611, 402653189, 805306457, 1610612741, 3221225473, 4294967291 }; const unsigned long* first = __stl_prime_list; const unsigned long* last = __stl_prime_list + __stl_num_primes; const unsigned long* pos = lower_bound(first, last, n); return pos == last ? *(last - 1) : *pos; } HashTable() { _tables.resize(__stl_next_prime(0)); } bool Insert(const pair<K, V>& kv) { if (Find(kv.first)) return false; // 负载因子大于 0.7 就扩容 if (_n * 10 / _tables.size() >= 7) { // 这里利用类似深拷贝现代写法的思想插入后交换解决 HashTable<K, V, Hash> newHT; newHT._tables.resize(__stl_next_prime(_tables.size() + 1)); for (size_t i = 0; i < _tables.size(); i++) { if (_tables[i]._state == EXIST) { newHT.Insert(_tables[i]._kv); } } _tables.swap(newHT._tables); } Hash hash; size_t hash0 = hash(kv.first) % _tables.size(); size_t hashi = hash0; size_t i = 1; while (_tables[hashi]._state == EXIST) { // 线性探测 hashi = (hash0 + i) % _tables.size(); // 二次探测就变成 +- i^2 ++i; } _tables[hashi]._kv = kv; _tables[hashi]._state = EXIST; ++_n; return true; } HashData<K, V>* Find(const K& key) { Hash hash; size_t hash0 = hash(key) % _tables.size(); size_t hashi = hash0; size_t i = 1; while (_tables[hashi]._state != EMPTY) { if (_tables[hashi]._state == EXIST && _tables[hashi]._kv.first == key) { return &_tables[hashi]; } // 线性探测 hashi = (hash0 + i) % _tables.size(); ++i; } return nullptr; } bool Erase(const K& key) { HashData<K, V>* ret = Find(key); if (ret == nullptr) { return false; } else { ret->_state = DELETE; --_n; return true; } } private: vector<HashData<K, V>> _tables; size_t _n = 0; // 表中存储数据个数 }; }

namespace hash_bucket { template<class K, class V> struct HashNode { pair<K, V> _kv; HashNode<K, V>* _next; HashNode(const pair<K, V>& kv) : _kv(kv), _next(nullptr) {} }; template<class K, class V, class Hash = HashFunc<K>> class HashTable { typedef HashNode<K, V> Node; inline unsigned long __stl_next_prime(unsigned long n) { static const int __stl_num_primes = 28; static const unsigned long __stl_prime_list[__stl_num_primes] = { 53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593, 49157, 98317, 196613, 393241, 786433, 1572869, 3145739, 6291469, 12582917, 25165843, 50331653, 100663319, 201326611, 402653189, 805306457, 1610612741, 3221225473, 4294967291 }; const unsigned long* first = __stl_prime_list; const unsigned long* last = __stl_prime_list + __stl_num_primes; const unsigned long* pos = lower_bound(first, last, n); return pos == last ? *(last - 1) : *pos; } public: HashTable() { _tables.resize(__stl_next_prime(0), nullptr); } // 拷贝构造和赋值拷贝需要实现深拷贝，有兴趣的同学可以自行实现~ ~HashTable() { // 依次把每个桶释放 for (size_t i = 0; i < _tables.size(); i++) { Node* cur = _tables[i]; while (cur) { Node* next = cur->_next; delete cur; cur = next; } _tables[i] = nullptr; } } bool Insert(const pair<K, V>& kv) { Hash hs; size_t hashi = hs(kv.first) % _tables.size(); // 负载因子==1 扩容 if (_n == _tables.size()) { /*HashTable<K, V> newHT; newHT._tables.resize(__stl_next_prime(_tables.size()+1); for (size_t i = 0; i < _tables.size(); i++) { Node* cur = _tables[i]; while(cur) { newHT.Insert(cur->_kv); cur = cur->_next; } } _tables.swap(newHT._tables);*/ // 这里如果使用上面的方法，扩容时创建新的结点，后面还要使用旧结点，浪费了 // 下面的方法，直接移动旧表的结点到新表，效率更好 vector<Node*> newtables(__stl_next_prime(_tables.size() + 1), nullptr); for (size_t i = 0; i < _tables.size(); i++) { Node* cur = _tables[i]; while (cur) { Node* next = cur->_next; // 旧表中节点，挪动新表重新映射的位置 size_t hashi_new = hs(cur->_kv.first) % newtables.size(); // 头插到新表 cur->_next = newtables[hashi_new]; newtables[hashi_new] = cur; cur = next; } _tables[i] = nullptr; } _tables.swap(newtables); } // 头插 Node* newnode = new Node(kv); newnode->_next = _tables[hashi]; _tables[hashi] = newnode; ++_n; return true; } Node* Find(const K& key) { Hash hs; size_t hashi = hs(key) % _tables.size(); Node* cur = _tables[hashi]; while (cur) { if (cur->_kv.first == key) { return cur; } cur = cur->_next; } return nullptr; } bool Erase(const K& key) { Hash hs; size_t hashi = hs(key) % _tables.size(); Node* prev = nullptr; Node* cur = _tables[hashi]; while (cur) { if (cur->_kv.first == key) { if (prev == nullptr) { _tables[hashi] = cur->_next; } else { prev->_next = cur->_next; } delete cur; --_n; return true; } prev = cur; cur = cur->_next; } return false; } private: vector<Node*> _tables; // 指针数组 size_t _n = 0; // 表中存储数据个数 }; }

C++ 哈希表详解：概念、哈希函数与冲突解决

1. 哈希表的概念

1.1 哈希函数（Hash Function）

1.2 哈希冲突（Hash Collision）

1.3 负载因子（Load Factor）

2. 哈希函数

2.1 直接定址法（Direct Addressing）

更多推荐文章

相关免费在线工具

2.2 除留余数法（Division Method）

2.3 其他方法

全域散列法

乘法散列法

3. 哈希冲突

3.1 开放寻址法（Open Addressing）

简单的代码实现：

3.2 链地址法（Chaining）

简单的代码实现：

4. 小结

更多推荐文章

相关免费在线工具

C++ 哈希表详解：概念、哈希函数与冲突解决

1. 哈希表的概念

1.1 哈希函数（Hash Function）

1.2 哈希冲突（Hash Collision）

1.3 负载因子（Load Factor）

2. 哈希函数

2.1 直接定址法（Direct Addressing）

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具

2.2 除留余数法（Division Method）

2.3 其他方法

全域散列法

乘法散列法

3. 哈希冲突

3.1 开放寻址法（Open Addressing）

简单的代码实现：

3.2 链地址法（Chaining）

简单的代码实现：

4. 小结

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具