C++ 哈希表底层实现：unordered_map/set、位图与布隆过滤器 | 极客日志

C++算法

C++ 哈希表底层实现：unordered_map/set、位图与布隆过滤器

C++ 哈希表底层原理详解。涵盖闭散列与开散列冲突解决策略，模拟实现 unordered_set 与 unordered_map。深入讲解位图数据结构及其在海量数据处理中的应用，如交集查找与唯一性判断。介绍布隆过滤器的多哈希函数机制及空间优化方案。最后通过哈希切割解决大文件内存限制下的数据匹配问题，并提供相关算法习题解析。

DevStack发布于 2026/3/21更新于 2026/6/219 浏览

unordered 系列关联式容器

包括 unordered_map, unordered_set, unordered_multimap, unordered_multiset。这些容器基于哈希表实现，用法与 set 类似，接口基本相同，支持范围 for 遍历。

与 set 的区别：

unordered 系列容器的迭代器是单向迭代器。
unordered 系列中序遍历结果无序。
性能方面，unordered 系列通常优于 set；但在有序数据插入场景下，set 表现更好。

注意：比较性能应在 Release 模式下进行。

哈希

哈希（散列）是建立存储值与存储位置对应关系的方法，类似于计数排序。模拟实现时不要存入相同的 key。哈希以牺牲空间为代价提高查询效率。

常用方法：

直接定址法：适用于值分布集中的情况，如统计字符出现次数。
除留余数法：适用于值分布分散的情况，如 value % n。

哈希冲突

不同值映射到相同位置导致冲突。解决方式：

闭散列（开放定址法）：当前位置被占用则按规则找下一个位置（如线性探测、二次探测）。
开散列（链地址法/哈希桶）：每个位置挂一个链表。

闭散列的模拟实现

采用除留余数法加线性探测。

线性探测：下标加 i。
二次探测：下标加 i^2。

enum State { EXIST, EMPTY, DELETE };

template<class K, class V>
struct HashData {
    pair<K, V> _kv;
    State _state = EMPTY;
};

template<class K>
struct DefaultHashFunc {
    size_t operator()(const K& key) {
        return (size_t)key;
    }
};

template<class K, class ,   = DefaultHashFunc<K>>
 HashTable {
:
    () { _table.(); }

    {
         (_n *  / _table.() >= ) {
             newSize = _table.() * ;
            HashTable<K, V, HashFunc> newHT;
            newHT._table.(newSize);
             ( i = ; i < _table.(); ++i) {
                 (_table[i]._state == EXIST) {
                    newHT.(_table[i]._kv);
                }
            }
            _table.(newHT._table);
        }

        HashFunc hf;
         hashi = (kv.first) % _table.();
         (_table[hashi]._state == EXIST) {
            ++hashi;
            hashi %= _table.();
        }
        _table[hashi]._kv = kv;
        _table[hashi]._state = EXIST;
        ++_n;
         ;
    }

    {
        HashFunc hf;
         hashi = (key) % _table.();
         (_table[hashi]._state != EMPTY) {
             (_table[hashi]._state == EXIST && _table[hashi]._kv.first == key) {
                 (HashData< K, V>*)&_table[hashi];
            }
            ++hashi;
            hashi %= _table.();
        }
         ;
    }

    {
        HashData< K, V>* ret = (key);
         (ret) {
            ret->_state = DELETE;
            --_n;
             ;
        }
         ;
    }

:
    vector<HashData<K, V>> _table;
     _n = ;
};

相关免费在线工具

加密/解密文本
使用加密算法（如AES、TripleDES、Rabbit或RC4）加密和解密文本明文。在线工具，加密/解密文本在线工具，online
Gemini 图片去水印
基于开源反向 Alpha 混合算法去除 Gemini/Nano Banana 图片水印，支持批量处理与下载。在线工具，Gemini 图片去水印在线工具，online
Base64 字符串编码/解码
将字符串编码和解码为其 Base64 格式表示形式即可。在线工具，Base64 字符串编码/解码在线工具，online
Base64 文件转换器
将字符串、文件或图像转换为其 Base64 表示形式。在线工具，Base64 文件转换器在线工具，online
Markdown转HTML
将 Markdown（GFM）转为 HTML 片段，浏览器内 marked 解析；与 HTML转Markdown 互为补充。在线工具，Markdown转HTML在线工具，online
HTML转Markdown
将 HTML 片段转为 GitHub Flavored Markdown，支持标题、列表、链接、代码块与表格等；浏览器内处理，可链接预填。在线工具，HTML转Markdown在线工具，online

template<class K, class T, class KeyOfT, class HashFunc = DefaultHashFunc<K>>
class HashTable {
    typedef HashNode<T> Node;
    template<class K, class T, class Ptr, class Ref, class KeyOfT, class HashFunc>
    friend struct HTIterator;

public:
    typedef HTIterator<K, T, T*, T&, KeyOfT, HashFunc> iterator;
    typedef HTIterator<K, T, const T*, const T&, KeyOfT, HashFunc> const_iterator;

    iterator begin() {
        for (size_t i = 0; i < _table.size(); ++i) {
            Node* cur = _table[i];
            if (cur) return iterator(cur, this);
        }
        return iterator(nullptr, this);
    }

    iterator end() { return iterator(nullptr, this); }

    const_iterator begin() const {
        for (size_t i = 0; i < _table.size(); ++i) {
            Node* cur = _table[i];
            if (cur) return const_iterator(cur, this);
        }
        return const_iterator(nullptr, this);
    }

    const_iterator end() const { return const_iterator(nullptr, this); }

    HashTable() { _table.resize(10, nullptr); }

    ~HashTable() {
        for (size_t i = 0; i < _table.size(); ++i) {
            Node* cur = _table[i];
            while (cur) {
                Node* next = cur->_next;
                delete cur;
                cur = next;
            }
            _table[i] = nullptr;
        }
    }

    bool Insert(const T& data) {
        KeyOfT kot;
        iterator it = Find(kot(data));
        if (it != end()) return make_pair(it, false);

        HashFunc hf;
        if (_n == _table.size()) {
            size_t newSize = _table.size() * 2;
            vector<Node*> newTable;
            newTable.resize(newSize, nullptr);
            for (size_t i = 0; i < _table.size(); ++i) {
                Node* cur = _table[i];
                while (cur) {
                    Node* next = cur->_next;
                    size_t hashi = hf(kot(cur->_data)) % newSize;
                    cur->_next = newTable[hashi];
                    newTable[hashi] = cur;
                    cur = next;
                }
                _table[i] = nullptr;
            }
            _table.swap(newTable);
        }

        size_t hashi = hf(kot(data)) % _table.size();
        Node* newnode = new Node(data);
        newnode->_next = _table[hashi];
        _table[hashi] = newnode;
        ++_n;
        return make_pair(iterator(newnode, this), true);
    }

    Node* Find(const K& key) {
        HashFunc hf;
        KeyOfT kot;
        size_t hashi = hf(key) % _table.size();
        Node* cur = _table[hashi];
        while (cur) {
            if (kot(cur->_data) == key) return cur;
            cur = cur->_next;
        }
        return nullptr;
    }

    bool Erase(const K& key) {
        HashFunc hf;
        KeyOfT kot;
        size_t hashi = hf(key) % _table.size();
        Node* prev = nullptr;
        Node* cur = _table[hashi];
        while (cur) {
            if (kot(cur->_data) == key) {
                if (prev == nullptr) _table[hashi] = cur->_next;
                else prev->_next = cur->_next;
                --_n;
                delete cur;
                return true;
            }
            prev = cur;
            cur = cur->_next;
        }
        return false;
    }

private:
    vector<Node*> _table;
    size_t _n = 0;
};

template<class K, class T, class Ptr, class Ref, class KeyOfT, class HashFunc>
struct HTIterator {
    typedef HashNode<T> Node;
    typedef HTIterator<K, T, Ptr, Ref, KeyOfT, HashFunc> Self;
    typedef HTIterator<K, T, T*, T&, KeyOfT, HashFunc> Iterator;

    Node* _node;
    HashTable<K, T, KeyOfT, HashFunc>* _pht;

    HTIterator(Node* node, const HashTable<K, T, KeyOfT, HashFunc>* pht)
        : _node(node), _pht((HashTable<K, T, KeyOfT, HashFunc>*)pht) {}

    HTIterator(const Iterator& it) : _node(it._node), _pht(it._pht) {}

    Ref operator*() { return _node->_data; }
    Ptr operator->() { return &_node->_data; }

    Self& operator++() {
        if (_node->_next) {
            _node = _node->_next;
        } else {
            KeyOfT kot;
            HashFunc hf;
            size_t hashi = hf(kot(_node->_data)) % _pht->_table.size();
            ++hashi;
            while (hashi < _pht->_table.size()) {
                if (_pht->_table[hashi]) {
                    _node = _pht->_table[hashi];
                    return *this;
                } else {
                    ++hashi;
                }
            }
            _node = nullptr;
        }
        return *this;
    }

    bool operator!=(const Self& s) { return _node != s._node; }
    bool operator==(const Self& s) { return _node == s._node; }
};

namespace renshen {
template<class K>
class unordered_set {
    struct SetKeyOfT {
        const K& operator()(const K& key) { return key; }
    };
public:
    typedef typename hash_bucket::HashTable<K, K, SetKeyOfT>::const_iterator iterator;
    typedef typename hash_bucket::HashTable<K, K, SetKeyOfT>::const_iterator const_iterator;

    iterator begin() { return _ht.begin(); }
    iterator end() { return _ht.end(); }

    pair<const_iterator, bool> insert(const K& key) {
        pair<typename hash_bucket::HashTable<K, K, SetKeyOfT>::iterator, bool> ret = _ht.Insert(key);
        return pair<const_iterator, bool>(ret.first, ret.second);
    }

private:
    hash_bucket::HashTable<K, K, SetKeyOfT> _ht;
};
}

namespace renshen {
template<class K, class V>
class unordered_map {
    struct MapKeyOfT {
        const K& operator()(const pair<K, V>& kv) { return kv.first; }
    };
public:
    typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT>::iterator iterator;
    typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT>::const_iterator const_iterator;

    iterator begin() { return _ht.begin(); }
    iterator end() { return _ht.end(); }
    const_iterator begin() const { return _ht.begin(); }
    const_iterator end() const { return _ht.end(); }

    pair<iterator, bool> insert(const pair<K, V>& kv) { return _ht.Insert(kv); }

    V& operator[](const K& key) {
        pair<iterator, bool> ret = _ht.Insert(make_pair(key, V()));
        return ret.first->second;
    }

private:
    hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT> _ht;
};
}

template<size_t N>
class bitset {
public:
    bitset() { _a.resize(N / 32 + 1); }

    void set(size_t x) {
        size_t i = x / 32;
        size_t j = x % 32;
        _a[i] |= (1 << j);
    }

    void reset(size_t x) {
        size_t i = x / 32;
        size_t j = x % 32;
        _a[i] &= (~(1 << j));
    }

    bool test(size_t x) {
        size_t i = x / 32;
        size_t j = x % 32;
        return _a[i] & (1 << j);
    }

private:
    vector<int> _a;
};

template<size_t N, class K, class Hash1, class Hash2, class Hash3>
class BloomFilter {
public:
    void Set(const K& key) {
        size_t hash1 = Hash1()(key) % N;
        _bs.set(hash1);
        size_t hash2 = Hash2()(key) % N;
        _bs.set(hash2);
        size_t hash3 = Hash3()(key) % N;
        _bs.set(hash3);
    }

    bool Test(const K& key) {
        size_t hash1 = Hash1()(key) % N;
        if (!_bs.test(hash1)) return false;
        size_t hash2 = Hash2()(key) % N;
        if (!_bs.test(hash2)) return false;
        size_t hash3 = Hash3()(key) % N;
        if (!_bs.test(hash3)) return false;
        return true;
    }

private:
    bitset<N> _bs;
};

struct BKDRHash {
    size_t operator()(const string& str) {
        size_t hash = 0;
        for (auto ch : str) {
            hash = hash * 131 + ch;
        }
        return hash;
    }
};

class Solution {
public:
    vector<int> intersect(vector<int>& nums1, vector<int>& nums2) {
        unordered_map<int, int> hash1;
        unordered_map<int, int> hash2;
        vector<int> v;
        for (auto e : nums1) hash1[e]++;
        for (auto e : nums2) hash2[e]++;
        for (auto e : nums1) {
            if (hash1.count(e) && hash2.count(e)) {
                for (int i = 0; i < min(hash1[e], hash2[e]); ++i) {
                    v.push_back(e);
                }
                hash1[e] = 0;
                hash2[e] = 0;
            }
        }
        return v;
    }
};

C++ 哈希表底层实现：unordered_map/set、位图与布隆过滤器

unordered 系列关联式容器

哈希

哈希冲突

闭散列的模拟实现

更多推荐文章

相关免费在线工具

开散列的模拟实现

哈希桶里面迭代器的模拟实现

unordered_set 的封装

unordered_map 的封装

位图

应用

布隆过滤器

布隆过滤器的模拟实现

哈希切割

哈希切割的应用

作业部分

更多推荐文章

相关免费在线工具

C++ 哈希表底层实现：unordered_map/set、位图与布隆过滤器

unordered 系列关联式容器

哈希

哈希冲突

闭散列的模拟实现

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具

开散列的模拟实现

哈希桶里面迭代器的模拟实现

unordered_set 的封装

unordered_map 的封装

位图

应用

布隆过滤器

布隆过滤器的模拟实现

哈希切割

哈希切割的应用

作业部分

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具