C++ 哈希表核心机制：unordered 系列容器、位图与布隆过滤器实战 | 极客日志

C++算法

C++ 哈希表核心机制：unordered 系列容器、位图与布隆过滤器实战

深入解析 C++ 中 unordered_map 和 unordered_set 的底层哈希表实现。涵盖闭散列（线性探测）与开散列（链地址法）的冲突解决策略，以及迭代器模拟细节。进一步探讨位图在海量数据处理中的应用，如去重与交集计算，并介绍布隆过滤器的原理及其在空间效率上的优势。最后通过哈希切割解决大数据量下的文件交集问题，辅以经典面试题巩固理解。

清酒独酌发布于 2026/3/27更新于 2026/7/2025 浏览

unordered 系列关联式容器

unordered_map, unordered_set, unordered_multimap, unordered_multiset 均基于哈希表实现。它们的接口与 set/map 类似，支持范围 for 遍历。

主要区别：

迭代器为单向迭代器。
遍历时不保证有序。
性能通常优于红黑树实现的容器（如 std::set），但在数据已排序插入场景下，树结构可能更优。

注意：性能对比建议在 Release 模式下进行。

哈希基础

哈希（散列）通过建立值与存储位置的映射关系来提高查询效率，本质是以空间换时间。常用方法包括：

直接定址法：适用于值分布集中的场景（如统计字符出现次数）。
除留余数法：适用于值分布分散的场景，公式通常为 key % n。

哈希冲突处理

当不同键映射到同一位置时发生冲突，常见解决方案有：

闭散列（开放定址法）：当前位置被占用时，按规则寻找下一个空位。包括线性探测和二次探测。
开散列（链地址法）：即哈希桶，每个位置挂一个链表。

闭散列模拟实现

采用除留余数法配合线性探测。状态标记使用 EXIST, EMPTY, DELETE，其中 DELETE 用于解决删除后填充空洞的问题，扩容时需跳过 DELETE 状态。

enum STATE { EXIST, EMPTY, DELETE };

template<class K, class V>
struct HashData {
    pair<K, V> _kv;
    STATE _state = EMPTY;
};

template<class K>
struct DefaultHashFunc {
    size_t operator()(const K& key) {
        return (size_t)key;
    }
};

template<class K,  ,   = DefaultHashFunc<K>>
 HashTable {
:
    () { _table.(); }

    {
        
         (_n *  / _table.() >= ) {
             newSize = _table.() * ;
            HashTable<K, V, HashFunc> newHT;
            newHT._table.(newSize);
             ( i = ; i < _table.(); ++i) {
                 (_table[i]._state == EXIST) {
                    newHT.(_table[i]._kv);
                }
            }
            _table.(newHT._table);
        }

        HashFunc hf;
         hashi = (kv.first) % _table.();
         (_table[hashi]._state == EXIST) {
            ++hashi;
            hashi %= _table.();
        }
        _table[hashi]._kv = kv;
        _table[hashi]._state = EXIST;
        ++_n;
         ;
    }

    {
        HashFunc hf;
         hashi = (key) % _table.();
         (_table[hashi]._state != EMPTY) {
             (_table[hashi]._state == EXIST && _table[hashi]._kv.first == key) {
                 (HashData< K, V>*)&_table[hashi];
            }
            ++hashi;
            hashi %= _table.();
        }
         ;
    }

    {
        HashData< K, V>* ret = (key);
         (ret) {
            ret->_state = DELETE;
            --_n;
             ;
        }
         ;
    }

:
    vector<HashData<K, V>> _table;
     _n = ;
};

相关免费在线工具

加密/解密文本
使用加密算法（如AES、TripleDES、Rabbit或RC4）加密和解密文本明文。在线工具，加密/解密文本在线工具，online
Gemini 图片去水印
基于开源反向 Alpha 混合算法去除 Gemini/Nano Banana 图片水印，支持批量处理与下载。在线工具，Gemini 图片去水印在线工具，online
Base64 字符串编码/解码
将字符串编码和解码为其 Base64 格式表示形式即可。在线工具，Base64 字符串编码/解码在线工具，online
Base64 文件转换器
将字符串、文件或图像转换为其 Base64 表示形式。在线工具，Base64 文件转换器在线工具，online
Markdown转HTML
将 Markdown（GFM）转为 HTML 片段，浏览器内 marked 解析；与 HTML转Markdown 互为补充。在线工具，Markdown转HTML在线工具，online
HTML转Markdown
将 HTML 片段转为 GitHub Flavored Markdown，支持标题、列表、链接、代码块与表格等；浏览器内处理，可链接预填。在线工具，HTML转Markdown在线工具，online

template<class K, class T, class KeyOfT, class HashFunc = DefaultHashFunc<K>>
class HashTable {
    typedef HashNode<T> Node;
    template<class K, class T, class Ptr, class Ref, class KeyOfT, class HashFunc>
    friend struct HTIterator;

public:
    typedef HTIterator<K, T, T*, T&, KeyOfT, HashFunc> iterator;
    typedef HTIterator<K, T, const T*, const T&, KeyOfT, HashFunc> const_iterator;

    iterator begin() {
        for (size_t i = 0; i < _table.size(); ++i) {
            Node* cur = _table[i];
            if (cur) return iterator(cur, this);
        }
        return iterator(nullptr, this);
    }

    iterator end() { return iterator(nullptr, this); }

    ~HashTable() {
        for (size_t i = 0; i < _table.size(); ++i) {
            Node* cur = _table[i];
            while (cur) {
                Node* next = cur->_next;
                delete cur;
                cur = next;
            }
            _table[i] = nullptr;
        }
    }

    pair<iterator, bool> Insert(const T& data) {
        KeyOfT kot;
        iterator it = Find(kot(data));
        if (it != end()) return make_pair(it, false);

        HashFunc hf;
        if (_n == _table.size()) {
            size_t newSize = _table.size() * 2;
            vector<Node*> newTable(newSize, nullptr);
            for (size_t i = 0; i < _table.size(); ++i) {
                Node* cur = _table[i];
                while (cur) {
                    Node* next = cur->_next;
                    size_t hashi = hf(kot(cur->_data)) % newSize;
                    cur->_next = newTable[hashi];
                    newTable[hashi] = cur;
                    cur = next;
                }
                _table[i] = nullptr;
            }
            _table.swap(newTable);
        }

        size_t hashi = hf(kot(data)) % _table.size();
        Node* newnode = new Node(data);
        newnode->_next = _table[hashi];
        _table[hashi] = newnode;
        ++_n;
        return make_pair(iterator(newnode, this), true);
    }

    iterator Find(const K& key) {
        HashFunc hf;
        KeyOfT kot;
        size_t hashi = hf(key) % _table.size();
        Node* cur = _table[hashi];
        while (cur) {
            if (kot(cur->_data) == key) return iterator(cur, this);
            cur = cur->_next;
        }
        return end();
    }

    bool Erase(const K& key) {
        HashFunc hf;
        KeyOfT kot;
        size_t hashi = hf(key) % _table.size();
        Node* prev = nullptr;
        Node* cur = _table[hashi];
        while (cur) {
            if (kot(cur->_data) == key) {
                if (!prev) _table[hashi] = cur->_next;
                else prev->_next = cur->_next;
                --_n;
                delete cur;
                return true;
            }
            prev = cur;
            cur = cur->_next;
        }
        return false;
    }

private:
    vector<Node*> _table;
    size_t _n = 0;
};

template<class K, class T, class Ptr, class Ref, class KeyOfT, class HashFunc>
struct HTIterator {
    typedef HashNode<T> Node;
    typedef HTIterator Self;
    typedef HTIterator<K, T, T*, T&, KeyOfT, HashFunc> Iterator;

    Node* _node;
    HashTable<K, T, KeyOfT, HashFunc>* _pht;

    HTIterator(Node* node, const HashTable<K, T, KeyOfT, HashFunc>* pht)
        : _node(node), _pht(const_cast<HashTable*>(pht)) {}

    HTIterator(const Iterator& it) : _node(it._node), _pht(it._pht) {}

    Ref operator*() { return _node->_data; }
    Ptr operator->() { return &_node->_data; }

    Self& operator++() {
        if (_node->_next) {
            _node = _node->_next;
        } else {
            KeyOfT kot;
            HashFunc hf;
            size_t hashi = hf(kot(_node->_data)) % _pht->_table.size();
            ++hashi;
            while (hashi < _pht->_table.size()) {
                if (_pht->_table[hashi]) {
                    _node = _pht->_table[hashi];
                    return *this;
                }
                ++hashi;
            }
            _node = nullptr;
        }
        return *this;
    }

    bool operator!=(const Self& s) { return _node != s._node; }
    bool operator==(const Self& s) { return _node == s._node; }
};

namespace my_hash {
template<class K>
class unordered_set {
    struct SetKeyOfT {
        const K& operator()(const K& key) { return key; }
    };
public:
    typedef typename hash_bucket::HashTable<K, K, SetKeyOfT>::const_iterator iterator;
    typedef typename hash_bucket::HashTable<K, K, SetKeyOfT>::const_iterator const_iterator;

    iterator begin() { return _ht.begin(); }
    iterator end() { return _ht.end(); }

    pair<const_iterator, bool> insert(const K& key) {
        pair<typename hash_bucket::HashTable<K, K, SetKeyOfT>::iterator, bool> ret = _ht.Insert(key);
        return pair<const_iterator, bool>(ret.first, ret.second);
    }

private:
    hash_bucket::HashTable<K, K, SetKeyOfT> _ht;
};

template<class K, class V>
class unordered_map {
    struct MapKeyOfT {
        const K& operator()(const pair<K, V>& kv) { return kv.first; }
    };
public:
    typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT>::iterator iterator;
    typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT>::const_iterator const_iterator;

    iterator begin() { return _ht.begin(); }
    iterator end() { return _ht.end(); }

    pair<iterator, bool> insert(const pair<K, V>& kv) { return _ht.Insert(kv); }

    V& operator[](const K& key) {
        pair<iterator, bool> ret = _ht.Insert(make_pair(key, V()));
        return ret.first->second;
    }

private:
    hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT> _ht;
};
}

template<size_t N>
class bitset {
public:
    bitset() { _a.resize(N / 32 + 1); }

    void set(size_t x) {
        size_t i = x / 32;
        size_t j = x % 32;
        _a[i] |= (1 << j);
    }

    void reset(size_t x) {
        size_t i = x / 32;
        size_t j = x % 32;
        _a[i] &= (~(1 << j));
    }

    bool test(size_t x) {
        size_t i = x / 32;
        size_t j = x % 32;
        return _a[i] & (1 << j);
    }

private:
    vector<int> _a;
};

template<size_t N, class K, class Hash1, class Hash2, class Hash3>
class BloomFilter {
public:
    void Set(const K& key) {
        size_t hash1 = Hash1()(key) % N;
        _bs.set(hash1);
        size_t hash2 = Hash2()(key) % N;
        _bs.set(hash2);
        size_t hash3 = Hash3()(key) % N;
        _bs.set(hash3);
    }

    bool Test(const K& key) {
        size_t hash1 = Hash1()(key) % N;
        if (!_bs.test(hash1)) return false;
        size_t hash2 = Hash2()(key) % N;
        if (!_bs.test(hash2)) return false;
        size_t hash3 = Hash3()(key) % N;
        if (!_bs.test(hash3)) return false;
        return true;
    }

private:
    bitset<N> _bs;
};

C++ 哈希表核心机制：unordered 系列容器、位图与布隆过滤器实战

unordered 系列关联式容器

哈希基础

哈希冲突处理

闭散列模拟实现

更多推荐文章

相关免费在线工具

开散列模拟实现

迭代器模拟实现

unordered_set 与 unordered_map 封装

位图（Bitmap）

应用场景

布隆过滤器（Bloom Filter）

模拟实现

哈希切割

应用案例

练习题

更多推荐文章

相关免费在线工具

C++ 哈希表核心机制：unordered 系列容器、位图与布隆过滤器实战

unordered 系列关联式容器

哈希基础

哈希冲突处理

闭散列模拟实现

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具

开散列模拟实现

迭代器模拟实现

unordered_set 与 unordered_map 封装

位图（Bitmap）

应用场景

布隆过滤器（Bloom Filter）

模拟实现

哈希切割

应用案例

练习题

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具