哈希（Hash）核心概念与 C++ 应用

哈希（Hash）核心概念与 C++ 应用 | 极客日志

哈希表数组（桶数组）
索引 0 → 桶 0 → 链表节点 (key:A, val:1) → 链表节点 (key:J, val:3) → null
索引 1 → 桶 1 → 链表节点 (key:B, val:2) → null
索引 2 → 桶 2 → null （空桶）
索引 3 → 桶 3 → 链表节点 (key:D, val:5) → 链表节点 (key:M, val:7) → 链表节点 (key:X, val:9) → null

特性	拉链法	开放寻址法
冲突处理难度	简单（挂链表即可）	复杂（需找空位，处理删除）
负载因子容忍度	高（>1 也能工作）	低（>0.7 性能急剧下降）
内存利用率	非连续内存，灵活分配	连续数组，易浪费空间
删除操作	直接删链表节点，无残留	需'标记删除'，易产生空洞
扩容成本	元素迁移时不影响原链条	需重新探测所有元素位置

#include <iostream>
#include <string>
using namespace std;

// 定义链表节点结构
template <typename K, typename V>
struct HashNode {
    K key; // 键
    V value; // 值
    HashNode<K, V>* next; // 指向下一个节点的指针

    // 构造函数
    HashNode(const K& k, const V& v) : key(k), value(v), next(nullptr) {}
};

// 简易拉链法哈希表
template <typename K, typename V>
class HashTable {
private:
    using Node = HashNode<K, V>;
    Node** buckets; // 桶数组：二级指针，指向指针数组
    size_t bucketCount; // 桶的数量
    size_t elementCount; // 元素总数
    const float loadFactorThreshold = 0.75f; // 负载因子阈值

    // 简易哈希函数：针对 string 类型（可扩展其他类型）
    size_t hashFunc(const string& key) const {
        size_t hash = 0;
        for (char c : key) {
            hash = hash * 31 + c; // 31 是质数，减少哈希冲突
        }
        return hash;
    }

    // 针对 int 类型的哈希函数
    size_t hashFunc(int key) const {
        return key;
    }

    // 计算桶索引
    size_t getBucketIndex(const K& key) const {
        return hashFunc(key) % bucketCount;
    }

    // 扩容：桶数量翻倍，所有元素重新哈希
    void resize() {
        size_t newBucketCount = bucketCount * 2;
        Node** newBuckets = new Node*[newBucketCount](); // 初始化所有桶为 nullptr

        // 遍历旧桶，将所有元素迁移到新桶
        for (size_t i = 0; i < bucketCount; ++i) {
            Node* curr = buckets[i];
            while (curr != nullptr) {
                Node* next = curr->next; // 保存下一个节点，避免迁移时丢失
                // 计算新桶索引
                size_t newIndex = hashFunc(curr->key) % newBucketCount;
                // 头部插入新桶
                curr->next = newBuckets[newIndex];
                newBuckets[newIndex] = curr;
                curr = next;
            }
            delete[] buckets[i]; // 释放旧桶的链表节点
        }

        // 替换桶数组
        delete[] buckets;
        buckets = newBuckets;
        bucketCount = newBucketCount;
    }

public:
    // 构造函数：初始化桶数组
    HashTable(size_t initBucketCount = 16) : bucketCount(initBucketCount), elementCount(0) {
        // 初始化桶数组，每个桶初始为 nullptr
        buckets = new Node*[bucketCount]();
    }

    // 析构函数：释放所有内存
    ~HashTable() {
        for (size_t i = 0; i < bucketCount; ++i) {
            Node* curr = buckets[i];
            while (curr != nullptr) {
                Node* next = curr->next;
                delete curr;
                curr = next;
            }
        }
        delete[] buckets;
    }

    // 插入/更新元素
    void put(const K& key, const V& value) {
        // 检查负载因子，触发扩容
        if (static_cast<float>(elementCount) / bucketCount > loadFactorThreshold) {
            resize();
        }
        size_t index = getBucketIndex(key);
        Node* curr = buckets[index];

        // 遍历链表，检查 key 是否存在
        while (curr != nullptr) {
            if (curr->key == key) {
                curr->value = value; // key 存在，更新 value
                return;
            }
            curr = curr->next;
        }

        // key 不存在，头部插入新节点
        Node* newNode = new Node(key, value);
        newNode->next = buckets[index];
        buckets[index] = newNode;
        elementCount++;
    }

    // 查找元素
    bool get(const K& key, V& value) {
        size_t index = getBucketIndex(key);
        Node* curr = buckets[index];
        while (curr != nullptr) {
            if (curr->key == key) {
                value = curr->value; // 找到 key，返回 value
                return true;
            }
            curr = curr->next;
        }
        return false; // key 不存在
    }

    // 删除元素
    bool remove(const K& key) {
        size_t index = getBucketIndex(key);
        Node* curr = buckets[index];
        Node* prev = nullptr; // 记录前驱节点

        while (curr != nullptr) {
            if (curr->key == key) {
                // 处理删除逻辑
                if (prev == nullptr) { // 删除头节点
                    buckets[index] = curr->next;
                } else { // 删除中间/尾节点
                    prev->next = curr->next;
                }
                delete curr;
                elementCount--;
                return true;
            }
            prev = curr;
            curr = curr->next;
        }
        return false; // key 不存在
    }

    // 获取元素总数
    size_t size() const {
        return elementCount;
    }
};

// 测试代码
int main() {
    HashTable<string, int> ht;

    // 插入元素
    ht.put("apple", 5);
    ht.put("banana", 3);
    ht.put("apple", 10); // 更新 apple 的值

    // 查找元素
    int value;
    if (ht.get("apple", value)) {
        cout << "apple: " << value << endl; // 输出 10
    }

    // 删除元素
    ht.remove("banana");
    cout << "size: " << ht.size() << endl; // 输出 1

    return 0;
}

unordered_map<string, int> mp;
mp.reserve(10000); // 提前分配足够的桶，避免插入 10000 个元素时多次扩容

特性	unordered_set/unordered_map（哈希）	set/map（红黑树）
底层实现	哈希表 + 拉链法	红黑树（平衡二叉树）
元素顺序	无序	按键升序排列
平均时间复杂度	O(1) 极致高效	O(log n) 稳定高效
最坏时间复杂度	O(n)	O(log n) 永远稳定
内存占用	较大（空间换时间）	较小
适用场景	只需要查找/插入/删除，不需要有序	需要有序遍历、范围查询

#include <iostream>
#include <unordered_set>
using namespace std;

int main() {
    unordered_set<int> s;

    // 1. 插入元素 (平均 O(1))
    s.insert(1);
    s.insert(2);
    s.insert(1); // 重复元素，插入失败，无报错

    // 2. 查找元素 (平均 O(1))
    if (s.find(1) != s.end()) {
        cout << "找到元素 1" << endl;
    }

    // 3. 删除元素 (平均 O(1))
    s.erase(2);

    // 4. 判空/大小/清空
    cout << "容器大小：" << s.size() << endl;
    cout << "是否为空：" << s.empty() << endl;

    // 5. 遍历（无序）
    for (auto num : s) {
        cout << num << " "; // 输出：1
    }
    return 0;
}

#include <iostream>
#include <unordered_map>
#include <string>
using namespace std;

int main() {
    unordered_map<string, int> mp;

    // 1. 插入键值对 (平均 O(1))
    mp["张三"] = 20;
    mp.insert({"李四", 22});
    mp.insert({"张三", 25}); // 重复 key，插入失败

    // 2. 查找 value (平均 O(1))
    if (mp.count("张三")) { // count：判断 key 是否存在，返回 0/1
        cout << "张三的年龄：" << mp["张三"] << endl; // 输出：20
    }

    // 3. 删除元素 (平均 O(1))
    mp.erase("李四");

    // 4. 遍历（无序）
    for (auto& pair : mp) {
        cout << pair.first << " : " << pair.second << endl;
    }
    return 0;
}

#include <iostream>
#include <unordered_set>
#include <string>
using namespace std;

// 自定义结构体：学生
struct Student {
    int id; // 学号
    string name; // 姓名

    // 要求 1：必须重载 == 运算符，判断两个学生是否相等
    bool operator==(const Student& other) const {
        return this->id == other.id; // 学号唯一，作为相等判断依据
    }
};

// 方案 1：特化 std::hash 模板（最推荐，C++ 标准做法，面试首选）
namespace std {
    template<>
    struct hash<Student> {
        size_t operator()(const Student& s) const {
            // 计算哈希值：复用标准库对 int 的哈希函数
            return hash<int>()(s.id);
        }
    };
}

// 方案 2：自定义哈希函数，传入容器（灵活，适合临时使用）
struct StudentHash {
    size_t operator()(const Student& s) const {
        return hash<int>()(s.id);
    }
};

int main() {
    // 方案 1 使用：直接用 unordered_set
    unordered_set<Student> s1;
    s1.insert({1001, "张三"});
    s1.insert({1002, "李四"});

    // 方案 2 使用：传入自定义哈希函数
    unordered_set<Student, StudentHash> s2;
    s2.insert({1001, "张三"});

    return 0;
}

size_t operator()(const Student& s) const {
    // 组合哈希值：用异或/乘质数的方式，避免单一成员冲突
    return hash<int>()(s.id) ^ (hash<string>()(s.name) << 1);
}

哈希（Hash）核心概念与 C++ 应用

一、哈希的核心概念

1. 哈希到底是什么？

2. 哈希的核心基础概念

二、哈希冲突的核心解决方法

方法 1：拉链法 (Separate Chaining)

更多推荐文章

相关免费在线工具

方法 2：开放寻址法 (Open Addressing)

三、哈希的核心性能指标 + 特性

1. 哈希的核心性能指标：负载因子 (Load Factor)

2. 哈希的核心时间复杂度

3. 哈希的核心特性

四、哈希容器的使用

1. C++ 哈希容器全家福：unordered_set & unordered_map

2. 哈希容器的基础用法

3. 哈希容器的高频坑点

五、自定义类型作为哈希容器 key

1. 自定义类型作为哈希容器 key 的完整实现方案

2. 多成员结构体的哈希值计算

3. 哈希容器的性能调优技巧

六、哈希的经典应用场景

更多推荐文章

相关免费在线工具

哈希（Hash）核心概念与 C++ 应用

一、哈希的核心概念

1. 哈希到底是什么？

2. 哈希的核心基础概念

二、哈希冲突的核心解决方法

方法 1：拉链法 (Separate Chaining)

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具

方法 2：开放寻址法 (Open Addressing)

三、哈希的核心性能指标 + 特性

1. 哈希的核心性能指标：负载因子 (Load Factor)

2. 哈希的核心时间复杂度

3. 哈希的核心特性

四、哈希容器的使用

1. C++ 哈希容器全家福：unordered_set & unordered_map

2. 哈希容器的基础用法

3. 哈希容器的高频坑点

五、自定义类型作为哈希容器 key

1. 自定义类型作为哈希容器 key 的完整实现方案

2. 多成员结构体的哈希值计算

3. 哈希容器的性能调优技巧

六、哈希的经典应用场景

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具