C++ 哈希表核心机制：从冲突到负载因子 | 极客日志

C++算法

C++ 哈希表核心机制：从冲突到负载因子

C++ 哈希表通过哈希函数建立关键字与存储位置的映射关系。文章讲解了直接定址法、除法散列法等哈希函数设计，以及哈希冲突和负载因子的概念。重点介绍了处理哈希冲突的两种方法：开放定址法（线性探测等）和链地址法（哈希桶）。包含完整的 C++ 代码实现，涵盖插入、查找、删除及扩容逻辑，帮助理解哈希表核心机制。

岁月神偷发布于 2026/3/15更新于 2026/7/3034 浏览

C++ 的两个参考文档

老朋友（非官方文档）： cplusplus

准官方文档（同步更新）： C++ 官方参考文档

set 和 multiset 的参考文档： set、multiset

map 和 multimap 的参考文档： map、multimap

unordered_set 和 unordered_multiset 的参考文档： unordered_set、unordered_multiset

前情提示

1 ~> 初始哈希

哈希 (hash) 又称散列，故哈希表又称散列表，是一种组织数据的方式。哈希是音译名，从译名来看，有散乱排列（散列）的意思。哈希的本质就是通过哈希函数把关键字 Key 跟存储位置建立一个映射关系，查找时通过这个哈希函数计算出 Key 存储的位置，进行快速查找。

2 ~> 直接定址法

2.1 概念

当关键字的范围比较集中时，直接定址法是非常简单高效的方法：比如一组关键字都在 [0, 99] 之间，那么我们开一个 100 个数的数组，每个关键字的值直接就是存储位置的下标。再比如一组关键字值都在 [a,z] 的小写字母，那么我们开一个 26 个数的数组，每个关键字 ascii 码 - 'a'ascii 码就是存储位置的下标。也就是说直接定址法本质就是用关键字计算出一个绝对位置或者相对位置。这个方法我们不仅在计数排序部分用过，在 string 部分的 OJ 题目那里也用过了。

2.2 示例：字符串中的第一个唯一字符

力扣链接： 387. 字符串中的第一个唯一字符

题目描述：

class Solution { 
public: 
    int firstUniqChar(string s) { 
        
         count[] = {  }; 
        
         ( ch : s) { 
            count[ch - ]++; 
        } 
         ( i = ; i < s.(); ++i) { 
             (count[s[i] - ] == )  i; 
        } 
         ; 
    } 
};

相关免费在线工具

加密/解密文本
使用加密算法（如AES、TripleDES、Rabbit或RC4）加密和解密文本明文。在线工具，加密/解密文本在线工具，online
Gemini 图片去水印
基于开源反向 Alpha 混合算法去除 Gemini/Nano Banana 图片水印，支持批量处理与下载。在线工具，Gemini 图片去水印在线工具，online
Base64 字符串编码/解码
将字符串编码和解码为其 Base64 格式表示形式即可。在线工具，Base64 字符串编码/解码在线工具，online
Base64 文件转换器
将字符串、文件或图像转换为其 Base64 表示形式。在线工具，Base64 文件转换器在线工具，online
Markdown转HTML
将 Markdown（GFM）转为 HTML 片段，浏览器内 marked 解析；与 HTML转Markdown 互为补充。在线工具，Markdown转HTML在线工具，online
HTML转Markdown
将 HTML 片段转为 GitHub Flavored Markdown，支持标题、列表、链接、代码块与表格等；浏览器内处理，可链接预填。在线工具，HTML转Markdown在线工具，online

enum State { EXIST, EMPTY, DELETE }; 
template<class K, class V> struct HashData { 
    pair<K, V> _kv; 
    State _state = EMPTY; 
}; 
template<class K, class V> class HashTable { 
private: 
    vector<HashData<K, V>> _tables; 
    size_t _n = 0; // 表中存储数据个数 
};

template<class K> struct HashFunc { 
    size_t operator()(const K& key) { return (size_t)key; } 
}; 
// 特化 
template<> struct HashFunc<string> { 
    // 字符串转换成整形，可以把字符 ascii 码相加即可 
    // 但是直接相加的话，类似"abcd"和"bcad"这样的字符串计算出是相同的 
    // 这里我们使用 BKDR 哈希的思路，用上次的计算结果去乘以一个质数，这个质数一般去 31, 131 等效果会比较好 
    size_t operator()(const string& key) { 
        size_t hash = 0; 
        for (auto e : key) { 
            hash *= 131; 
            hash += e; 
        } 
        return hash; 
    } 
}; 
template<class K, class V, class Hash = HashFunc<K>> class HashTable { 
public: 
private: 
    vector<HashData<K, V>> _tables; 
    size_t _n = 0; // 表中存储数据个数 
};

#pragma once 
#include<vector> 
static const int __stl_num_primes = 28; 
static const unsigned long __stl_prime_list[__stl_num_primes] = { 53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593, 49157, 98317, 196613, 393241, 786433, 1572869, 3145739, 6291469, 12582917, 25165843, 50331653, 100663319, 201326611, 402653189, 805306457, 1610612741, 3221225473, 4294967291 }; 
inline unsigned long __stl_next_prime(unsigned long n) { 
    const unsigned long* first = __stl_prime_list; 
    const unsigned long* last = __stl_prime_list + __stl_num_primes; 
    // >= n 
    const unsigned long* pos = lower_bound(first, last, n); 
    return pos == last ? *(last - 1) : *pos; 
} 
template<class K> struct HashFunc { 
    size_t operator()(const K& key) { return (size_t)key; } 
}; 
// 特化 
template<> struct HashFunc<string> { 
    size_t operator()(const string& key) { 
        size_t hash = 0; 
        for (auto ch : key) { 
            hash += ch; 
            hash *= 131; 
        } 
        return hash; 
    } 
}; 
namespace open_address { 
    enum State { EMPTY, EXIST, DELETE }; 
    template<class K, class V> struct HashData { 
        pair<K, V> _kv; 
        State _state = EMPTY; 
    }; 
    template<class K, class V, class Hash = HashFunc<K>> class HashTable { 
    public: 
        HashTable() :_tables(__stl_next_prime(1)) { } 
        bool Insert(const pair<K, V>& kv) { 
            if (Find(kv.first)) return false; // 满了 / 快满了就要扩容，负载因子 >= 0.7 就要扩容 
            if ((double)_n / (double)_tables.size() >= 0.7) // 至少强转一个 
            { 
                //vector<HashData<K, V>> newTables(__stl_next_prime(_tables.size() * 2)); 
                //_tables.resize()； // 扩容可以直接 resize 吗？哈希表的扩容不是那么简单的，要重新分配 
                // std::vector<HashData> newtables(_tables.size()); 
                // for (size_t i = 0; i < _tables, size(); i++) 
                // { 
                //     if (_tables[i]._state == EXIST) 
                //     { 
                //         // 重新映射到新表 
                //         // ... 
                //     } 
                // } 
                // _tables.swap(newtables); 
                HashTable<K, V, Hash> newht; 
                newht._tables.resize(__stl_next_prime(_tables.size() + 1)); 
                for (size_t i = 0; i < _tables.size(); i++) { 
                    // 遍历旧表，旧表数据插入到 newht 
                    if (_tables[i]._state == EXIST) { 
                        newht.Insert(_tables[i]._kv); 
                    } 
                } 
                _tables.swap(newht._tables); 
            } 
            Hash hs; // 插入的逻辑 
            size_t hash0 = hs(kv.first) % _tables.size(); // 只能模 size，不能模 capacity 
            // []访问必须要在 size 访问之内，模 capacity 放不进去（尽可能让 capacity 和 size 一致） 
            // 线性探测 
            size_t i = 1; // 第一次就不加了 
            size_t hashi = hash0; 
            while (_tables[hashi]._state == EXIST) { 
                hashi = (hash0 + i) % _tables.size(); // 取模，回绕回去 
                ++i; // 不断加 i 
            } 
            _tables[hashi]._kv = kv; 
            _tables[hashi]._state = EXIST; 
            ++_n; 
            return true; 
        } 
        HashData<K, V>* Find(const K& key) { 
            Hash hs; 
            size_t hash0 = hs(key) % _tables.size(); //线性探测 
            size_t i = 1; 
            size_t hashi = hash0; 
            while (_tables[hashi]._state != EMPTY) { 
                if (_tables[hashi]._state != DELETE && _tables[hashi]._kv.first == key) { 
                    return &_tables[hashi]; 
                } 
                hashi = (hash0 + i) % _tables.size(); 
                ++i; 
            } 
            return nullptr; 
        } 
        bool Erase(const K& key) { 
            HashData<K, V>* ret = Find(key); 
            if (ret) { 
                ret->_state = DELETE; 
                --_n; 
                return true; 
            } else { 
                return false; 
            } 
        } 
    private: 
        std::vector<HashData<K, V>> _tables; // 指针数组 
        size_t _n = 0; // 存储的有效数据个数 
    }; 
} 
namespace Hash_bucket { 
    template<class K,class V> struct HashNode { 
        pair<K, V> _kv; 
        HashNode<K, V>* _next; 
        HashNode(const pair<K,V>& kv) :_kv(kv) ,_next(nullptr) {} 
    }; 
    template<class K, class V,class Hash = HashFunc<K>> class HashTable { 
        typedef HashNode<K, V> Node; 
    public: 
        HashTable() :_tables(__stl_next_prime(1),nullptr) ,_n(0) { } 
        // 析构，要单独实现 
        ~HashTable() { 
            for (size_t i = 0; i < _tables.size(); i++) { 
                Node* cur = _tables[i]; 
                while (cur) { 
                    Node* next = cur->_next; 
                    delete cur; 
                    cur = next; 
                } 
                _tables[i] = nullptr; 
            } 
            _n = 0; 
        } 
        bool Insert(const pair<K, V>& kv) { 
            if (Find(kv.first)) return false; 
            Hash hs; // 除余都要套上 hs 
            // 负载因子 == 1 就开始扩容 
            if (_n == _tables.size()) { 
                //HashTable<K, V> newht; 
                //newht._tables.resize(_tables.size() * 2); 
                //for (size_t i = 0; i < _tables.size(); i++) 
                //{ 
                //     // 遍历旧表，旧表数据插入到 newht 
                //     Node* cur = _tables[i]; 
                //     while (cur) 
                //     { 
                //         newht.Insert(cur->_kv); 
                //         cur = cur->_next; 
                //     } 
                //} 
                //_tables.swap(newht._tables); 
                std::vector<Node*> newtables(__stl_next_prime(_tables.size() + 1), nullptr); 
                for (size_t i = 0; i < _tables.size(); i++) { 
                    // 遍历旧表，旧表节点重新映射，挪动到新表 
                    Node* cur = _tables[i]; 
                    while (cur) { 
                        Node* next = cur->_next; 
                        // 头插 
                        size_t hashi = hs(cur->_kv.first) % newtables.size(); 
                        cur->_next = newtables[hashi]; 
                        newtables[hashi] = cur; 
                        cur = next; 
                    } 
                    _tables[i] = nullptr; 
                } 
                _tables.swap(newtables); 
            } 
            size_t hashi = hs(kv.first) % _tables.size(); 
            // 头插 
            Node* newnode = new Node(kv); 
            newnode->_next = _tables[hashi]; 
            _tables[hashi] = newnode; 
            ++_n; // 插入，_n 是有效数据个数，要++ 
            return true; 
        } 
        Node* Find(const K& key) { 
            Hash hs; 
            size_t hashi = hs(key) % _tables.size(); 
            Node* cur = _tables[hashi]; 
            while (cur) { 
                if (cur->_kv.first == key) { 
                    return cur; 
                } 
                cur = cur->_next; 
            } 
            return nullptr; 
        } 
        bool Erase(const K& key) { 
            Hash hs; 
            size_t hashi = hs(key) % _tables.size(); 
            Node* prev = nullptr; 
            Node* cur = _tables[hashi]; 
            while (cur) { 
                if (cur->_kv.first == key) { 
                    // 删除 
                    if (prev == nullptr) { // 哈希桶中的第一个节点 
                        _tables[hashi] = cur->_next; 
                    } else { 
                        prev->_next = cur->_next; 
                    } 
                    --_n; // _n 是有效数据个数，每次删除之后都要减减 
                    delete cur; 
                    return true; 
                } 
                prev = cur; 
                cur = cur->_next; 
            } 
            return false; 
        } 
    private: 
        std::vector<Node*> _tables; // 指针数组 
        size_t _n; // 存储的有效数据个数 
        //std::vector<std::list<K, V>> _tables; // 不是实现不了，而是这种实现太绕了，而且比较抽象，现阶段对我们来说还是太难了 
    }; 
}

#define _CRT_SECURE_NO_WARNINGS 1 
#include<iostream> 
#include<unordered_map> 
using namespace std; 
#include"HashTable.h" 
namespace open_address { 
    void TestHT1() { 
        int a[] = { 19,30,5,36,13,20,21,12,58 }; 
        HashTable<int, int> ht; 
        for (auto e : a) { 
            ht.Insert({ e,e }); 
        } 
        ht.Insert({ 2,2 }); 
        ht.Insert({ 22,22 }); 
        cout << ht.Find(5) << endl; 
        cout << ht.Find(58) << endl; 
        ht.Erase(5); 
        cout << ht.Find(5) << endl; 
        cout << ht.Find(58) << endl; 
        //for (size_t i = 0; i < 100; i++) 
        //{ 
        //     ht.Insert({ rand(),i }); 
        //} 
    } 
    struct HashFuncString { 
        // BKDR 
        size_t operator()(const string& key) { 
            size_t hash = 0; 
            for (auto ch : key) { 
                hash += ch; 
                hash *= 131; 
            } 
            return hash; 
        } 
    }; 
    void TestHT2() { 
        //HashTable<string, string, HashFuncString> dict; 
        HashTable<string, string> dict; 
        dict.Insert({ "string","字符串" }); // string 无法取模 
        dict.Insert({ "string","字符串 1" }); 
        dict.Insert({ "left","左边" }); 
        dict.Insert({ "right","右边" }); 
        cout << dict.Find("string") << endl; 
        cout << dict.Find("left") << endl; 
        cout << dict.Find("left ") << endl; 
        HashFuncString hfs; 
        cout << hfs("abcd") << endl; 
        cout << hfs("acbd") << endl; 
        cout << hfs("aadd") << endl; 
        unordered_map<string, string> dictmap; 
        dictmap.insert({ "string","字符串" }); // 编译报错，需要自己实现 Hash 的仿函数把 key 转成整形 
        //unordered_map<pair<string, int>, string> um; 
        //um.insert({ {"string", 1}, "字符串" }); 
    } 
} 
namespace Hash_bucket { 
    void TestHT1() { 
        int a[] = { 19,30,5,36,13,20,21,12,58 }; 
        HashTable<int, int> ht; 
        for (auto e : a) { 
            ht.Insert({ e,e }); 
        } 
        ht.Insert({ 2,2 }); 
        ht.Insert({ 22,22 }); 
        ht.Insert({ 44,44 }); 
        // 这两个过了就说明代码没问题了：先删 58 再删 36 
        ht.Erase(58); 
        ht.Erase(36); 
    } 
    void TestHT2() { 
        HashTable<string, string> dict; 
        dict.Insert({ "string","字符串" }); // string 无法取模 
        dict.Insert({ "string","字符串 1" }); 
        dict.Insert({ "left","左边" }); 
        dict.Insert({ "right","右边" }); 
        cout << dict.Find("string") << endl; 
        cout << dict.Find("left") << endl; 
        cout << dict.Find("left ") << endl; 
    } 
} 
int main() { 
    // open_address::TestHT1(); 
    //open_address::TestHT2(); 
    Hash_bucket::TestHT1(); 
    //Hash_bucket::TestHT2(); 
    return 0; 
}

C++ 哈希表核心机制：从冲突到负载因子

C++ 的两个参考文档

前情提示

1 ~> 初始哈希

2 ~> 直接定址法

2.1 概念

2.2 示例：字符串中的第一个唯一字符

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具

3 ~> 哈希的一些概念

3.1 哈希冲突

3.2 负载因子

3.3 将关键字转为整数

4 ~> 哈希函数

4.1 除法散列法 / 除留余数法

4.2 乘法散列法（了解即可）

4.3 全域散列法（了解即可）

4.4 其他方法（了解即可）

5 ~> 处理哈希冲突

5.1 开放定址法

5.1.1 线性探测（含堆积问题）

5.1.2 二次探测

5.1.3 双重探测

5.2 详解开放定址法代码

5.2.1 哈希表结构

5.2.2 扩容问题

5.2.3 key 不能取模的问题

5.3 链地址法

5.3.1 对比开放定址法和链地址法

5.3.2 哈希桶概念及其示例

5.3.3 扩容

5.3.4 极端场景

5.4 哈希桶代码实现

5.4.0 在私有定义成员变量

5.4.1 插入

5.4.2 查找

5.4.3 删除

完整代码示例与实践演示

HashTable.h：

Test.cpp：

运行结果

open_address::TestHT1()：

open_address::TestHT2()：

Hash_bucket::TestHT1()：

Hash_bucket::TestHT2()：

结尾

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具