『 C++ – Hash 』闭散列与开散列哈希表详解及其实现 ( 万字 )

本文介绍: 哈希表是一种常用的数据结构,该数据结构往往能存储大量的数据,在C++当中,底层为哈希表的容器最常见的为unordered_xxx系列,例如unordered_map与unordered_set,这两个容器是在C++当中以哈希表为底层的关联式容器,具体的关联式容器的特点参照上篇;哈希表通过一个叫做哈希函数(Hash Function)的算法,将存储的每个数据项与一个唯一的键值(key)进行绑定;这个函数会将每个键值映射到哈希表中的一个位置,以便对数据进行快速访问;

请添加图片描述

哈希表是一种常用的数据结构,该数据结构往往能存储大量的数据,在C++当中,底层为哈希表的容器最常见的为unordered_xxx系列,例如unordered_map与unordered_set,这两个容器是在C++当中以哈希表为底层的关联式容器,具体的关联式容器的特点参照上篇;

哈希表通过一个叫做哈希函数(Hash Function)的算法,将存储的每个数据项与一个唯一的键值(key)进行绑定;这个函数会将每个键值映射到哈希表中的一个位置,以便对数据进行快速访问;

哈希表的高效决定了这个数据结构在计算机中的地位:

由于哈希表中的数据是以元素的存储位置与关键码的绑定映射的关系,那么在对数据进行查找的时候只需要通过所谓的关键码即能找到该元素;

{13,10,7,4,8,9};

#pragma once

//***************************************
//**********闭散列哈希表的实现***********
//***************************************

#include<iostream>
#include<vector>

enum State{
  //设置枚举类型
  /*
    分别为 ( 1.空 2.存在 3.删除 ) 三个状态
  */
    EMPTY,
    EXIST,
    DELETE
};

template<class K,class V>
struct HashiData{
      //设置节点
        std::pair<K, V> _kv;//Key Value模型
        State _state = EMPTY;//默认情况下节点为空
};

template<class K, class V>
class HashiTable{
    //整体模型
    public:
     typedef HashiData<K, V> Data;//使用typedef进行重命名方便后序的调用

    HashiTable():_tables(0),_n(0){}//构造函数使得初始的Vector容器的大小为0，_n表示当前存在的有效数据

     bool Insert(const std::pair<K, const V> &kv) {

        Data *to_find = Find(kv.first);
        if(to_find) return false; 

        if(_tables.size() == 0 || (_n*100) / _tables.size() >=75){

          size_t newsize = _tables.size() == 0 ? 10 : _tables.size() * 2;
          HashiTable<K, V> newtables;
          newtables._tables.resize(newsize);
          for(auto &it : _tables){
            newtables.Insert(it._kv);
          }
        _tables.swap(newtables._tables);
   
       size_t hashi = kv.first % _tables.size();
       size_t index = hashi;
       size_t i = 0;
    //    if (_tables[index]._kv.first == kv.first) return false;
       while (_tables[index]._state == EXIST) {
        
         index = (hashi + i)%_tables.size();
         ++i;
       }

       _tables[index]._kv = kv;
       _tables[index]._state = EXIST;
       ++_n;

       return true;
        }

        Data* Find(const K& key){
          //与插入函数的逻辑相同
          
          if(_tables.size() == 0){
            //如果是空表则返不进行查找
            return nullptr;
          }

          size_t hashi = key % _tables.size();
          size_t index = hashi;
          size_t i = 0;
          while (_tables[index]._state != EMPTY) {  //如果不为空则循环继续找数据

            if (_tables[index]._state == EXIST &&//条件为数据存在在表中且状态为存在
             _tables[index]._kv.first == key)
              return &_tables[index];

            index = (hashi + i) % _tables.size();
            ++i;
            if(index == hashi) break;
            //在查找过程中如果没找到数据的前提下index又回到了hashi的位置则代表已经找了一圈了 说明不存在数据 可以跳出循环(极端情况)
          }
          return nullptr;
        }

        bool Erase(const K& key){
          //采用伪删除法
          /*
            伪删除法的思路只要改变节点中的状态即可
          */
          Data *to_del = Find(key);
           if (to_del) {
            to_del->_state = DELETE;
            --_n;
            return true;
           }
          return false;
        }
         
    protected:
       

    private:
        std::vector<Data> _tables;//利用vector容器实现闭散列的哈希表
        size_t _n;//存储数据个数
};

#include <iostream>
#include <string>
#include <vector>

template <class K, class V>
struct HashNode {
  // 哈希表的节点设置
  typedef HashNode<K, V> Node;
  Node* _next = nullptr;
  std::pair<K, V> _kv;

  HashNode(const std::pair<K, V> kv) : _kv(kv) {}
};

template <class K>
struct HashFunc {
  size_t operator()(const K& key) {
    // std::cout << key << std::endl;
    return (size_t)key;
  }
};

template <>
struct HashFunc<std::string> {
  size_t operator()(const std::string& key) {
    size_t hash = 0;
    for (auto it : key) {
      hash += it;
      hash *= 31;
    }
    // std::cout << hash << std::endl;
    return hash;
  }
};

template <class K, class V, class Hash = HashFunc<K>>
class HashTable {
 public:
  typedef HashNode<K, V> Node;

  ~HashTable() {
    Node* cur = nullptr;
    for (size_t i = 0; i < _hashtable.size(); ++i) {
      if (_hashtable[i]) {
        cur = _hashtable[i];
        Node* next = cur->_next;
        while (cur) {
          delete cur;
          cur = next;
        }
      }
    }
  }

  bool Insert(const std::pair<K, V> kv) {
    Hash to_int;

    // 使用Find函数进行判断是否需要进行插入(需要预防除零错误)
    if (Find(kv.first)) {
      // 找到该数据说明该数据存在不予继续插入
      return false;
    }

    if (_n == _hashtable.size()) {
      // 判断负载因子是否为1 负载因子若是为1则进行扩容
    //   size_t newsize = _hashtable.size() == 0 ? 10 : _hashtable.size() * 2;
      size_t newsize = GetNextPrime(_hashtable.size());
      std::vector<Node*> newTable;
      newTable.resize(newsize);

      // for(Node *&cur : _hashtable) 遍历Node*指针数组
      for (auto& cur : _hashtable) {
        while (cur) {
          Node* next = cur->_next;
          size_t hashi = to_int(cur->_kv.first) % newTable.size();
          cur->_next = newTable[hashi];
          newTable[hashi] = cur;
          cur = next;
        }
      }
      _hashtable.swap(newTable);
    }

    // 正常插入
    Node* newnode = new Node(kv);
    size_t hashi = to_int(kv.first) % _hashtable.size();
    // std::cout << kv.first <<std::endl;
    newnode->_next = _hashtable[hashi];
    _hashtable[hashi] = newnode;
    ++_n;
    return true;
  }

  Node* Find(const K& key) {
    Hash to_int;

    if (_hashtable.size() == 0) return nullptr;  // 防止除零错误

    size_t hashi = to_int(key) % _hashtable.size();
    Node* cur = _hashtable[hashi];

    while (cur) {
      if (cur->_kv.first == key) {
        return cur;
      }
      cur = cur->_next;
    }
    return nullptr;
  }

  bool Erase(const K& key) {
    Hash to_int;
    if (_hashtable.size() == 0) return false;  // 防止空的情况继续删除

    size_t hashi = to_int(key) % _hashtable.size();

    Node* cur = _hashtable[hashi];
    Node* prev = nullptr;
    while (cur) {
      if (cur->_kv.first == key) {
        if (prev) {
          prev->_next = cur->_next;
        } else {
          _hashtable[hashi] = cur->_next;
        }
        delete cur;
        return true;
      } else {
        prev = cur;
        cur = cur->_next;
      }
    }
    return false;
  }

  void Check() {
    // 检查函数 没有太重要的意义
    int i = 0;
    for (auto cur : _hashtable) {
      std::cout << "(" << i << ")"
                << " == ";
      if (cur) {
        while (cur) {
          std::cout << cur->_kv.first << " : " << cur->_kv.second << " || ";
          cur = cur->_next;
        }
        std::cout << std::endl;
      } else {
        std::cout << "nullptr" << std::endl;
      }
      ++i;
    }
  }

 protected:
  size_t GetNextPrime(size_t prime) {
    static const int __stl_num_primes = 28;
    static const unsigned long __stl_prime_list[__stl_num_primes] = {
        53,        97,         193,        389,       769,       1543,
        3079,      6151,       12289,      24593,     49157,     98317,
        196613,    393241,     786433,     1572869,   3145739,   6291469,
        12582917,  25165843,   50331653,   100663319, 201326611, 402653189,
        805306457, 1610612741, 3221225473, 4294967291};
    size_t i = 0;
    for (; i < __stl_num_primes; ++i) {
      if (__stl_prime_list[i] > prime) return __stl_prime_list[i];
    }
    return __stl_prime_list[i];
  }

 private:
  std::vector<Node*> _hashtable;  // 哈希表整体构造
  size_t _n = 0;                  // 负载因子
};

显示所有内容

声明：本站所有文章，如无特殊说明或标注，均为本站原创发布。任何个人或组织，在未征得本站同意时，禁止复制、盗用、采集、发布本站内容到任何网站、书籍等各类媒体平台。如若本站内容侵犯了原著者的合法权益，可联系我们进行处理。

c 哈希闭散列

文章目录

👾 哈希表概念

👾 常见哈希函数

🎏 直接定址法

🎏 除留余数法

👾 哈希冲突的解决方案

🎏 闭散列与闭散列哈希表的实现

🎐 闭散列哈希表的节点设置与基本架构

🎐 闭散列哈希表的插入逻辑及实现

🎐 闭散列哈希表的扩容

🎐 闭散列哈希表的查找

🎐 闭散列哈希表的数据删除

🎐 闭散列哈希表整体代码(供参考)

🎏 开散列与开散列哈希表的实现

🎐 开散列哈希表的节点设置与基本架构

🎐 开散列哈希表的插入逻辑及实现

🎐 开散列哈希表的扩容

🦠 哈希表扩容的优化

🎐 确保哈希表的泛型特性

🎐 开散列哈希表的查找

🎐 开散列哈希表的数据删除

🎐 开散列哈希表整体代码(供参考)

发表回复取消回复