C++ 哈希结构进阶：位图与布隆过滤器详解

位图利用比特位映射整型数据，空间效率高但仅支持整型。布隆过滤器通过多哈希函数映射非整型数据，存在误判但查询高效。文章深入讲解两者原理，推导布隆过滤器误判率公式，并提供 C++ 自定义位图与布隆过滤器的完整实现代码，涵盖 BKDR、AP、DJB 等哈希算法应用。

晚风告白发布于 2026/3/16更新于 2026/5/2010 浏览

位图

核心概念与实现

在面试或实际工程中，常遇到海量数据查询场景。例如：给定 40 亿个不重复的无符号整数，如何快速判断一个数是否存在？

暴力遍历 O(N) 太慢，排序加二分查找 O(NlogN) 内存又不够。既然结果只有'存在'或'不存在'两种状态，我们可以用二进制的一位来代表这个信息：1 表示存在，0 表示不存在。这就是位图（BitMap）。

位图本质上是一种直接定址法的哈希表，每个整型值映射到一个比特位。它主要提供 set、reset、test 三个接口。

namespace lydly {
template<size_t N>
class BitMap {
public:
    BitMap() {
        // 一个 int 有 32 位，+1 为了向上取整，初始全用 0 填充
        _bits.resize(N / 32 + 1, 0);
    }

    void Set(size_t x) {
        // i 找这个数在第几个 int
        // j 找这个数在这个 int 中的第几个位
        // 利用或运算将这一位设为 1，不改变其他位
        size_t i = x / 32;
        size_t j = x % 32;
        _bits[i] |= (1 << j);
    }

    void Reset(size_t x) {
        // i 找这个数在第几个 int
        // j 找这个数在这个 int 中的第几个位
        // 利用且运算将这一位设为 0，不改变其他位
        size_t i = x / 32;
        size_t j = x % 32;
        _bits[i] &= ~(1 << j);
    }

    bool Test( x) {
         i = x / ;
         j = x % ;
         _bits[i] & ( << j);
    }

:
    std::vector<> _bits; 
};
}

相关免费在线工具

加密/解密文本
使用加密算法（如AES、TripleDES、Rabbit或RC4）加密和解密文本明文。在线工具，加密/解密文本在线工具，online
Gemini 图片去水印
基于开源反向 Alpha 混合算法去除 Gemini/Nano Banana 图片水印，支持批量处理与下载。在线工具，Gemini 图片去水印在线工具，online
Base64 字符串编码/解码
将字符串编码和解码为其 Base64 格式表示形式即可。在线工具，Base64 字符串编码/解码在线工具，online
Base64 文件转换器
将字符串、文件或图像转换为其 Base64 表示形式。在线工具，Base64 文件转换器在线工具，online
Markdown转HTML
将 Markdown（GFM）转为 HTML 片段，浏览器内 marked 解析；与 HTML转Markdown 互为补充。在线工具，Markdown转HTML在线工具，online
HTML转Markdown
将 HTML 片段转为 GitHub Flavored Markdown，支持标题、列表、链接、代码块与表格等；浏览器内处理，可链接预填。在线工具，HTML转Markdown在线工具，online

#include "BitMap.h"
int main() {
    lydly::BitMap<0xFFFFFFFF> bm; // 开 2^32 位
    for (size_t i = 0; i < 5000; i++) {
        bm.Set(i);
    }
    for (int i = 0; i < 100; i++) {
        int n = rand() % 10000;
        if (bm.Test(n)) {
            std::cout << n << " 存在" << std::endl;
        } else {
            std::cout << n << " 不存在" << std::endl;
        }
    }
    return 0;
}

struct HashFuncBKDR {
    // Brian Kernighan 与 Dennis Ritchie 的《The C Programming Language》展示算法
    size_t operator()(const std::string& s) {
        size_t hash = 0;
        for (auto ch : s) {
            hash *= 31;
            hash += ch;
        }
        return hash;
    }
};

struct HashFuncAP {
    // Arash Partow 发明的一种 hash 算法
    size_t operator()(const std::string& s) {
        size_t hash = 0;
        for (size_t i = 0; i < s.size(); i++) {
            if ((i & 1) == 0) {
                hash ^= ((hash << 7) ^ (s[i]) ^ (hash >> 3));
            } else {
                hash ^= (~((hash << 11) ^ (s[i]) ^ (hash >> 5)));
            }
        }
        return hash;
    }
};

struct HashFuncDJB {
    // Daniel J. Bernstein 教授发明的一种 hash 算法
    size_t operator()(const std::string& s) {
        size_t hash = 5381;
        for (auto ch : s) {
            hash = hash * 33 ^ ch;
        }
        return hash;
    }
};

#include "BitMap.h"
#include <string>

template<size_t N,      // 数据个数
           size_t X = 5, // 每个数据占用的平均 bit 位数
           class K = std::string,
           class Hash1 = HashFuncBKDR,
           class Hash2 = HashFuncAP,
           class Hash3 = HashFuncDJB>
class BloomFilter {
public:
    void Set(const K& key) {
        size_t hash1 = Hash1()(key) % M;
        size_t hash2 = Hash2()(key) % M;
        size_t hash3 = Hash3()(key) % M;
        _bs.Set(hash1);
        _bs.Set(hash2);
        _bs.Set(hash3);
    }

    bool Test(const K& key) {
        size_t hash1 = Hash1()(key) % M;
        if (!_bs.Test(hash1)) return false;
        size_t hash2 = Hash2()(key) % M;
        if (!_bs.Test(hash2)) return false;
        size_t hash3 = Hash3()(key) % M;
        if (!_bs.Test(hash3)) return false;
        return true; // 可能存在误判
    }

private:
    static const size_t M = N * X;
    lydly::BitMap<M> _bs;
};

#include "BloomFilter.h"
int main() {
    BloomFilter<100> bf;
    // 插入 test1...test99
    for (int i = 0; i < 100; i++) {
        std::string s("test");
        s += std::to_string(i);
        bf.Set(s);
    }
    // 测试确定存在的数据
    for (int i = 0; i < 100; i++) {
        std::string s("test");
        s += std::to_string(i);
        if (bf.Test(s)) {
            std::cout << s << " 存在" << std::endl;
        }
    }
    // 测试不存在的数据 test1000...test1500
    for (int i = 1000; i < 1500; i++) {
        std::string s("test");
        s += std::to_string(i);
        if (bf.Test(s)) {
            std::cout << s << " 存在 (误判)" << std::endl;
        }
    }
    return 0;
}

C++ 哈希结构进阶：位图与布隆过滤器详解

位图

核心概念与实现

更多推荐文章

相关免费在线工具

C++ 标准库 bitset

布隆过滤器

概念

误判率数学推导

实现

更多推荐文章

相关免费在线工具

C++ 哈希结构进阶：位图与布隆过滤器详解

位图

核心概念与实现

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具

C++ 标准库 bitset

布隆过滤器

概念

误判率数学推导

实现

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具