C++ STL 哈希表原理与模拟实现

C++ STL 哈希表原理与模拟实现 | 极客日志

// 哈希表中每个位置的状态标记
enum STATE { 
    EXIST, // 当前位置有有效数据 
    EMPTY, // 当前位置为空 
    DELETE // 当前位置的数据已被删除（逻辑删除） 
}; 

// 哈希表存储的数据节点结构
template<class K, class V>
struct HashData {
    pair<K, V> _kv; // 存储的键值对
    STATE _state = EMPTY; // 当前节点的状态，默认为空
};

// 默认的哈希函数对象（适用于整数等可直接转换为 size_t 的类型）
template<class K>
struct DefaultHashFunc {
    size_t operator()(const K& key) {
        return (size_t)key;
    }
};

// 对 string 类型的特化哈希函数（使用 BKDR 算法）
template<>
struct DefaultHashFunc<string> {
    size_t operator()(const string& str) {
        size_t hash = 0;
        for (auto ch : str) {
            hash *= 131;
            hash += ch;
        }
        return hash;
    }
};

// 哈希表类，使用开放地址法（线性探测）解决冲突
template<class K, class V, class HashFunc = DefaultHashFunc<K>>
class HashTable {
public:
    // 构造函数：初始化哈希表大小（默认 10）
    HashTable() {
        _table.resize(10);
    }
private:
    vector<HashData<K, V>> _table;
    size_t _n = 0; // 当前哈希表中有效数据的个数
};

bool Insert(const pair<K, V>& kv) {
    // 扩容判断：负载因子 >= 0.7 时扩容
    if (_n * 10 / _table.size() >= 7) {
        size_t newSize = _table.size() * 2;
        HashTable<K, V, HashFunc> newHT;
        newHT._table.resize(newSize);
        // 遍历旧表，将所有状态为 EXIST 的节点重新插入到新表
        for (size_t i = 0; i < _table.size(); i++) {
            if (_table[i]._state == EXIST) {
                newHT.Insert(_table[i]._kv);
            }
        }
        _table.swap(newHT._table);
    }

    // 线性探测法寻找插入位置
    HashFunc hf;
    size_t hashi = hf(kv.first) % _table.size();
    while (_table[hashi]._state == EXIST) {
        ++hashi;
        hashi %= _table.size();
    }
    _table[hashi]._kv = kv;
    _table[hashi]._state = EXIST;
    ++_n;
    return true;
}

HashData<const K, V>* Find(const K& key) {
    HashFunc hf;
    size_t hashi = hf(key) % _table.size();
    while (_table[hashi]._state != EMPTY) {
        if (_table[hashi]._state == EXIST && _table[hashi]._kv.first == key) {
            return (HashData<const K, V>*)&_table[hashi];
        }
        ++hashi;
        hashi %= _table.size();
    }
    return nullptr;
}

bool Erase(const K& key) {
    HashData<const K, V>* ret = Find(key);
    if (ret) {
        ret->_state = DELETE;
        --_n;
        return true;
    }
    return false;
}

template <class K, class V, class HashFunc = DefaultHashFunc<K>>
class HashTable {
    typedef HashNode<K, V> Node;
public:
    HashTable() {
        _table.resize(10, nullptr);
    }
    ~HashTable() {
        for (size_t i = 0; i < _table.size(); i++) {
            Node *cur = _table[i];
            while (cur) {
                Node *next = cur->_next;
                delete cur;
                cur = next;
            }
            _table[i] = nullptr;
        }
    }
    // ... 其他成员函数
private:
    vector<Node *> _table;
    size_t _n = 0;
};

bool Insert(const pair<K, V> &kv) {
    if (Find(kv.first)) {
        return false;
    }
    HashFunc hf;
    // 扩容：当负载因子达到 1 时，将桶数组大小扩大为原来的 2 倍
    if (_n == _table.size()) {
        size_t newSize = _table.size() * 2;
        vector<Node *> newTable;
        newTable.resize(newSize, nullptr);
        // 遍历旧表，将每个节点重新哈希并头插到新表
        for (size_t i = 0; i < _table.size(); i++) {
            Node *cur = _table[i];
            while (cur) {
                Node *next = cur->_next;
                size_t hashi = hf(cur->_kv.first) % newSize;
                cur->_next = newTable[hashi];
                newTable[hashi] = cur;
                cur = next;
            }
            _table[i] = nullptr;
        }
        _table.swap(newTable);
    }
    size_t hashi = hf(kv.first) % _table.size();
    Node *newnode = new Node(kv);
    newnode->_next = _table[hashi];
    _table[hashi] = newnode;
    ++_n;
    return true;
}

Node *Find(const K &key) {
    HashFunc hf;
    size_t hashi = hf(key) % _table.size();
    Node *cur = _table[hashi];
    while (cur) {
        if (cur->_kv.first == key) {
            return cur;
        }
        cur = cur->_next;
    }
    return nullptr;
}

bool Erase(const K &key) {
    HashFunc hf;
    size_t hashi = hf(key) % _table.size();
    Node *prev = nullptr;
    Node *cur = _table[hashi];
    while (cur) {
        if (cur->_kv.first == key) {
            if (prev == nullptr) {
                _table[hashi] = cur->_next;
            } else {
                prev->_next = cur->_next;
            }
            delete cur;
            --_n;
            return true;
        }
        prev = cur;
        cur = cur->_next;
    }
    return false;
}

template<class K, class T, class Ptr, class Ref, class KeyOfT, class HashFunc>
struct HTIterator {
    typedef HashNode<T> Node;
    typedef HTIterator<K, T, Ptr, Ref, KeyOfT, HashFunc> Self;
    Node* _node;
    const HashTable<K, T, KeyOfT, HashFunc>* _pht;
    // ... 构造函数及运算符重载
};

namespace mySTL {
template<class K, class V>
class unordered_map {
    struct MapKeyOfT {
        const K& operator()(const pair<const K, V>& kv) {
            return kv.first;
        }
    };
public:
    typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT>::iterator iterator;
    iterator begin() { return _ht.begin(); }
    iterator end() { return _ht.end(); }
    pair<iterator, bool> insert(const pair<K, V>& kv) {
        return _ht.Insert(kv);
    }
    V& operator[](const K& key) {
        pair<iterator, bool> ret = _ht.Insert(make_pair(key, V()));
        return ret.first->second;
    }
private:
    hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT> _ht;
};
}

namespace mySTL {
template<class K>
class unordered_set {
    struct SetKeyOfT {
        const K& operator()(const K& key) {
            return key;
        }
    };
public:
    typedef typename hash_bucket::HashTable<K, K, SetKeyOfT>::const_iterator iterator;
    pair<iterator, bool> insert(const K& key) {
        pair<typename hash_bucket::HashTable<K, K, SetKeyOfT>::iterator, bool> ret = _ht.Insert(key);
        return pair<iterator, bool>(ret.first, ret.second);
    }
private:
    hash_bucket::HashTable<K, K, SetKeyOfT> _ht;
};
}

C++ STL 哈希表原理与模拟实现

一、哈希表介绍

1.1 基本介绍

1.2 负载因子

1.3 哈希冲突

常见的解决哈希冲突的方法

更多推荐文章

相关免费在线工具

哈希冲突的影响

1.4 哈希函数

两种常见的哈希函数构造方法

1.5 解决哈希冲突的常用方法

1.5.1 线性探测法

1.5.2 哈希桶

二、使用线性探测法模拟实现哈希表

2.1 哈希函数

2.2 哈希表类

2.2.1 插入操作

2.2.2 查询操作

2.2.3 删除操作

2.3 完整代码 + 测试

三、使用哈希桶模拟实现哈希表

3.1 哈希函数

3.2 哈希表类

3.2.1 插入操作

3.2.2 查询操作

3.2.3 删除操作

3.3 完整代码 + 测试

四、模拟实现 unordered_map 和 unordered_set

4.1 引入迭代器

4.2 修改哈希表代码

4.3 模拟实现 unordered_map

4.4 模拟实现 unordered_set

更多推荐文章

相关免费在线工具

C++ STL 哈希表原理与模拟实现

一、哈希表介绍

1.1 基本介绍

1.2 负载因子

1.3 哈希冲突

常见的解决哈希冲突的方法

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具

哈希冲突的影响

1.4 哈希函数

两种常见的哈希函数构造方法

1.5 解决哈希冲突的常用方法

1.5.1 线性探测法

1.5.2 哈希桶

二、使用线性探测法模拟实现哈希表

2.1 哈希函数

2.2 哈希表类

2.2.1 插入操作

2.2.2 查询操作

2.2.3 删除操作

2.3 完整代码 + 测试

三、使用哈希桶模拟实现哈希表

3.1 哈希函数

3.2 哈希表类

3.2.1 插入操作

3.2.2 查询操作

3.2.3 删除操作

3.3 完整代码 + 测试

四、模拟实现 unordered_map 和 unordered_set

4.1 引入迭代器

4.2 修改哈希表代码

4.3 模拟实现 unordered_map

4.4 模拟实现 unordered_set

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具