C++ 哈希表使用与底层实现原理

C++ 哈希表使用与底层实现原理 | 极客日志

template<class K> struct DefaultHashFunc { 
    size_t operator()(const K& key) { return (size_t)key; } 
}; 

template<> struct DefaultHashFunc<string> { 
    size_t operator()(const string& str) { 
        size_t hash = 0; 
        for (auto ch : str) { hash *= 131; hash += ch; } 
        return hash; 
    } 
}; 

namespace open_address { 
    enum STATE { EXIST, EMPTY, DELETE }; 
    template<class K, class V> struct HashData { 
        pair<K, V> _kv; 
        STATE _state = EMPTY; 
    }; 
    template<class K, class V, class HashFunc = DefaultHashFunc<K>> class HashTable { 
    public: 
        HashTable() { _table.resize(10); } 
        bool Insert(const pair<K, V>& kv) { 
            if (Find(kv.first)) { return false; } 
            if (_n * 10 / _table.size() >= 7) { 
                size_t newSize = _table.size() * 2; 
                HashTable<K, V, HashFunc> newHT; 
                newHT._table.resize(newSize); 
                for (size_t i = 0; i < _table.size(); i++) { 
                    if (_table[i]._state == EXIST) { newHT.Insert(_table[i]._kv); } 
                } 
                _table.swap(newHT._table); 
            } 
            HashFunc hf; 
            size_t hashi = hf(kv.first) % _table.size(); 
            while (_table[hashi]._state == EXIST) { ++hashi; hashi %= _table.size(); } 
            _table[hashi]._kv = kv; 
            _table[hashi]._state = EXIST; 
            ++_n; 
            return true; 
        } 
        HashData<const K, V>* Find(const K& key) { 
            HashFunc hf; 
            size_t hashi = hf(key) % _table.size(); 
            while (_table[hashi]._state != EMPTY) { 
                if (_table[hashi]._state == EXIST && _table[hashi]._kv.first == key) { 
                    return (HashData<const K, V>*) & _table[hashi]; 
                } 
                ++hashi; hashi %= _table.size(); 
            } 
            return nullptr; 
        } 
        bool Erase(const K& key) { 
            HashData<const K, V>* ret = Find(key); 
            if (ret) { ret->_state = DELETE; --_n; return true; } 
            return false; 
        } 
    private: 
        vector<HashData<K, V>> _table; 
        size_t _n = 0; 
    }; 
}

namespace hash_bucket { 
    template<class T> struct HashNode { 
        T _data; 
        HashNode<T>* _next; 
        HashNode(const T& data) :_data(data), _next(nullptr) {} 
    }; 
    template<class K, class T, class KeyOfT, class HashFunc> class HashTable; 
    template<class K, class T, class Ptr, class Ref, class KeyOfT, class HashFunc> struct HTIterator { 
        typedef HashNode<T> Node; 
        typedef HTIterator<K, T, Ptr, Ref, KeyOfT, HashFunc> Self; 
        typedef HTIterator<K, T, T*, T&, KeyOfT, HashFunc> Iterator; 
        Node* _node; 
        const HashTable<K, T, KeyOfT, HashFunc>* _pht; 
        HTIterator(Node* node, const HashTable<K, T, KeyOfT, HashFunc>* pht) :_node(node), _pht(pht) {} 
        Ref operator*() { return _node->_data; } 
        Ptr operator->() { return &_node->_data; } 
        Self& operator++() { 
            if (_node->_next) { _node = _node->_next; } 
            else { 
                KeyOfT kot; HashFunc hf; 
                size_t hashi = hf(kot(_node->_data)) % _pht->_table.size(); 
                ++hashi; 
                while (hashi < _pht->_table.size()) { 
                    if (_pht->_table[hashi]) { _node = _pht->_table[hashi]; return *this; } 
                    else { ++hashi; } 
                } 
                _node = nullptr; 
            } 
            return *this; 
        } 
        bool operator!=(const Self& s) { return _node != s._node; } 
        bool operator==(const Self& s) { return _node == s._node; } 
    }; 
}

size_t GetNextPrime(size_t prime) { 
    static const int __stl_num_primes = 28; 
    static const unsigned long __stl_prime_list[__stl_num_primes] = { 
        53, 97, 193, 389, 769, 1543, 3079, 6151, 12289, 24593, 49157, 98317, 
        196613, 393241, 786433, 1572869, 3145739, 6291469, 12582917, 25165843, 
        50331653, 100663319, 201326611, 402653189, 805306457, 1610612741, 
        3221225473, 4294967291 
    }; 
    size_t i = 0; 
    for (; i < PRIMECOUNT; ++i) { 
        if (__stl_prime_list[i] > prime) return __stl_prime_list[i]; 
    } 
    return __stl_prime_list[i]; 
}

template<class K, class T, class KeyOfT, class HashFunc = DefaultHashFunc<K>> 
class HashTable { 
    typedef HashNode<T> Node; 
    template<class K, class T, class Ptr, class Ref, class KeyOfT, class HashFunc> 
    friend struct HTIterator; 
};

template<class K, class T, class KeyOfT, class HashFunc> class HashTable; 
template<class K, class T, class Ptr, class Ref, class KeyOfT, class HashFunc> 
struct HTIterator { 
    // ... 同上 HTIterator 实现 
};

template<class K, class T, class KeyOfT, class HashFunc = DefaultHashFunc<K>> 
class HashTable { 
    typedef HashNode<T> Node; 
    template<class K, class T, class Ptr, class Ref, class KeyOfT, class HashFunc> 
    friend struct HTIterator; 
public: 
    typedef HTIterator<K, T, T*, T&, KeyOfT, HashFunc> iterator; 
    typedef HTIterator<K, T, const T*, const T&, KeyOfT, HashFunc> const_iterator; 
    iterator begin() { 
        for (size_t i = 0; i < _table.size(); i++) { 
            Node* cur = _table[i]; 
            if (cur) { return iterator(cur, this); } 
        } 
        return iterator(nullptr, this); 
    } 
    iterator end() { return iterator(nullptr, this); } 
    const_iterator begin() const { 
        for (size_t i = 0; i < _table.size(); i++) { 
            Node* cur = _table[i]; 
            if (cur) { return const_iterator(cur, this); } 
        } 
        return const_iterator(nullptr, this); 
    } 
    const_iterator end() const { return const_iterator(nullptr, this); } 
    size_t GetNextPrime(size_t prime) { /* 同上 */ } 
    HashTable() { _table.resize(GetNextPrime(1), nullptr); } 
    ~HashTable() { 
        for (size_t i = 0; i < _table.size(); i++) { 
            Node* cur = _table[i]; 
            while (cur) { Node* next = cur->_next; delete cur; cur = next; } 
            _table[i] = nullptr; 
        } 
    } 
    pair<iterator, bool> Insert(const T& data) { 
        KeyOfT kot; iterator it = Find(kot(data)); 
        if (it != end()) { return make_pair(it, false); } 
        HashFunc hf; 
        if (_n == _table.size()) { 
            size_t newSize = GetNextPrime(_table.size()); 
            vector<Node*> newTable; 
            newTable.resize(newSize, nullptr); 
            for (size_t i = 0; i < _table.size(); i++) { 
                Node* cur = _table[i]; 
                while (cur) { 
                    Node* next = cur->_next; 
                    size_t hashi = hf(kot(cur->_data)) % newSize; 
                    cur->_next = newTable[hashi]; 
                    newTable[hashi] = cur; 
                    cur = next; 
                } 
                _table[i] = nullptr; 
            } 
            _table.swap(newTable); 
        } 
        size_t hashi = hf(kot(data)) % _table.size(); 
        Node* newnode = new Node(data); 
        newnode->_next = _table[hashi]; 
        _table[hashi] = newnode; 
        ++_n; 
        return make_pair(iterator(newnode, this), true); 
    } 
    iterator Find(const K& key) { 
        HashFunc hf; KeyOfT kot; 
        size_t hashi = hf(key) % _table.size(); 
        Node* cur = _table[hashi]; 
        while (cur) { 
            if (kot(cur->_data) == key) { return iterator(cur, this); } 
            cur = cur->_next; 
        } 
        return end(); 
    } 
    bool Erase(const K& key) { 
        HashFunc hf; KeyOfT kot; 
        size_t hashi = hf(key) % _table.size(); 
        Node* prev = nullptr; Node* cur = _table[hashi]; 
        while (cur) { 
            if (kot(cur->_data) == key) { 
                if (prev == nullptr) { _table[hashi] = cur->_next; } 
                else { prev->_next = cur->_next; } 
                --_n; delete cur; return true; 
            } 
            prev = cur; cur = cur->_next; 
        } 
        return false; 
    } 
private: 
    vector<Node*> _table; 
    size_t _n = 0; 
};

#include"HashsTable.h" 
namespace lxp { 
    template<class K, class V> class unordered_map { 
        struct MapKeyOfT { 
            const K& operator()(const pair<const K, V>& kv) { return kv.first; } 
        }; 
    public: 
        typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT>::iterator iterator; 
        typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT>::const_iterator const_iterator; 
        iterator begin() { return _ht.begin(); } 
        iterator end() { return _ht.end(); } 
        const_iterator begin() const { return _ht.begin(); } 
        const_iterator end() const { return _ht.end(); } 
        pair<iterator, bool> insert(const pair<K, V>& kv) { return _ht.Insert(kv); } 
        V& operator[](const K& key) { 
            pair<iterator, bool> ret = _ht.Insert(make_pair(key, V())); 
            return ret.first->second; 
        } 
    private: 
        hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT> _ht; 
    }; 
}

#include"HashsTable.h" 
namespace lxp { 
    template<class K> class unordered_set { 
        struct SetKeyOfT { 
            const K& operator()(const K& key) { return key; } 
        }; 
    public: 
        typedef typename hash_bucket::HashTable<K, K, SetKeyOfT>::const_iterator iterator; 
        typedef typename hash_bucket::HashTable<K, K, SetKeyOfT>::const_iterator const_iterator; 
        const_iterator begin() const { return _ht.begin(); } 
        const_iterator end() const { return _ht.end(); } 
        pair<const_iterator, bool> insert(const K& key) { 
            pair<typename hash_bucket::HashTable<K, K, SetKeyOfT>::iterator, bool> ret = _ht.Insert(key); 
            return pair<const_iterator, bool>(ret.first, ret.second); 
        } 
    private: 
        hash_bucket::HashTable<K, K, SetKeyOfT> _ht; 
    }; 
}

#include<vector> #include<iostream> using namespace std; 
namespace lxp { 
    template<size_t N> class bitset { 
    public: 
        bitset() { _a.resize(N / 32 + 1); } 
        void set(size_t x) { 
            size_t i = x / 32; size_t j = x % 32; 
            _a[i] |= (1 << j); 
        } 
        void reset(size_t x) { 
            size_t i = x / 32; size_t j = x % 32; 
            _a[i] &= (~(1 << j)); 
        } 
        bool test(size_t x) { 
            size_t i = x / 32; size_t j = x % 32; 
            return _a[i] & (1 << j); 
        } 
    private: 
        vector<int> _a; 
    }; 
    template<size_t N> class twobitset { 
    public: 
        void set(size_t x) { 
            if (!_bs1.test(x) && !_bs2.test(x)) { _bs2.set(x); } 
            else if (!_bs1.test(x) && _bs2.test(x)) { _bs1.set(x); _bs2.reset(x); } 
        } 
        bool is_once(size_t x) { return !_bs1.test(x) && _bs2.test(x); } 
    private: 
        bitset<N> _bs1; bitset<N> _bs2; 
    }; 
}

struct BKDRHash { 
    size_t operator()(const string& s) { 
        size_t value = 0; 
        for (auto ch : s) { value *= 31; value += ch; } 
        return value; 
    } 
}; 
struct APHash { 
    size_t operator()(const string& s) { 
        size_t hash = 0; 
        for (long i = 0; i < s.size(); i++) { 
            if ((i & 1) == 0) { hash ^= ((hash << 7) ^ s[i] ^ (hash >> 3)); } 
            else { hash ^= (~((hash << 11) ^ s[i] ^ (hash >> 5))); } 
        } 
        return hash; 
    } 
}; 
struct DJBHash { 
    size_t operator()(const string& s) { 
        size_t hash = 5381; 
        for (auto ch : s) { hash += (hash << 5) + ch; } 
        return hash; 
    } 
}; 
template<size_t N, size_t X = 5, class K = string, class HashFunc1 = BKDRHash, class HashFunc2 = APHash, class HashFunc3 = DJBHash> 
class BloomFilter { 
public: 
    void Set(const K& key) { 
        size_t len = X * N; 
        size_t index1 = HashFunc1()(key) % len; 
        size_t index2 = HashFunc2()(key) % len; 
        size_t index3 = HashFunc3()(key) % len; 
        _bs.set(index1); _bs.set(index2); _bs.set(index3); 
    } 
    bool Test(const K& key) { 
        size_t len = X * N; 
        size_t index1 = HashFunc1()(key) % len; 
        if (_bs.test(index1) == false) return false; 
        size_t index2 = HashFunc2()(key) % len; 
        if (_bs.test(index2) == false) return false; 
        size_t index3 = HashFunc3()(key) % len; 
        if (_bs.test(index3) == false) return false; 
        return true; 
    } 
    void Reset(const K& key); 
private: 
    bitset<X* N> _bs; 
};

C++ 哈希表使用与底层实现原理

一、unordered 系列关联式容器

1.1 unordered_map

1.1.1 unordered_map 的文档介绍

1.1.2 unordered_map 的接口说明

更多推荐文章

相关免费在线工具

1.2 标准库中的 unordered_map

1.2.1 unordered_map 的介绍

二、底层结构

2.1 哈希概念

2.2 哈希冲突

2.3 哈希函数

2.4 哈希冲突解决

2.4.1 闭散列

2.4.2 开散列

2.5 开散列与闭散列比较

三、模拟实现

3.1 哈希表的改造

3.1. 模板参数列表的改造

3.2. 增加迭代器操作

3.3 增加通过 key 获取 value 操作

3.2 unordered_map

3.3 unordered_set

四、哈希的应用

4.1 位图

4.1.1 位图概念

4.1.2 位图的实现

4.1.3 位图的应用

4.2 布隆过滤器

4.2.1 布隆过滤器提出

4.2.2 布隆过滤器概念

4.2.3 布隆过滤器的插入

4.2.4 布隆过滤器的查找

4.2.5 布隆过滤器删除

4.2.6 布隆过滤器优点

4.2.7 布隆过滤器缺陷

更多推荐文章

相关免费在线工具

C++ 哈希表使用与底层实现原理

一、unordered 系列关联式容器

1.1 unordered_map

1.1.1 unordered_map 的文档介绍

1.1.2 unordered_map 的接口说明

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具

1.2 标准库中的 unordered_map

1.2.1 unordered_map 的介绍

二、底层结构

2.1 哈希概念

2.2 哈希冲突

2.3 哈希函数

2.4 哈希冲突解决

2.4.1 闭散列

2.4.2 开散列

2.5 开散列与闭散列比较

三、模拟实现

3.1 哈希表的改造

3.1. 模板参数列表的改造

3.2. 增加迭代器操作

3.3 增加通过 key 获取 value 操作

3.2 unordered_map

3.3 unordered_set

四、哈希的应用

4.1 位图

4.1.1 位图概念

4.1.2 位图的实现

4.1.3 位图的应用

4.2 布隆过滤器

4.2.1 布隆过滤器提出

4.2.2 布隆过滤器概念

4.2.3 布隆过滤器的插入

4.2.4 布隆过滤器的查找

4.2.5 布隆过滤器删除

4.2.6 布隆过滤器优点

4.2.7 布隆过滤器缺陷

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具