【C++】哈希表的模拟实现及 unordered

07-16 1443阅读

1.2.1 哈希表中仿函数的实现
1.2.2 哈希表中节点类的实现
1.2.3 哈希表中迭代器类的实现
1.2.4 哈希表中构造函数、析构函数和 Clear() 函数的实现
1.2.5 哈希表中Swap 和 Size 函数的实现
1.2.6 哈希表中 begin 和 end 函数的实现
1.2.7 哈希表中 Find 函数的实现
1.2.8 哈希表中 Insert 和 Erase 函数的实现
1.2.9 哈希表的整体实现
二、unordered_set的实现及测试
三、unordered_map的实现及测试

结尾

前言

上一篇文章中讲到了哈希的概念和STL中关于哈希容器的使用，并且在后面对开散列进行了模拟实现，那么在这一篇文章中在开散列的基础上进行改造成哈希表，并且在哈希表的基础上封装 unordered_set 和 unordered_map。

一、哈希表的模拟实现

1.1 哈希表的改造

1.1.1 模板参数列表的改造

K：表示关键码类型

T：不同容器V的类型不同，如果是unordered_map，K代表一个键值对，如果是unordered_set，T 为 K。

KOfT：因为V的类型不同，通过value取key的方式就不同，详细见

unordered_map/set的实现

HF: 哈希函数仿函数对象类型，哈希函数使用除留余数法，需要将Key转换为整形数字才能取模

template
class hash_table;

1.1.2 增加迭代器操作

// 前置声明，否则后面的__HTIterator找不到hash_table
	template
	class hash_table;
	template
	struct __HTIterator
	{
		typedef HashNode Node;
		typedef __HTIterator self;
		// 成员变量
		Node* _node;
		// 由于const迭代器中const修饰this
		// 所以这里需要加const，否则会导致权限放大
		// 编译无法通过的情况
		const hash_table* _pht;
		// 
		size_t hashi;
		// 构造函数
		__HTIterator(Node* node, hash_table* pht , size_t i)
			:_node(node)
			,_pht(pht)
			,hashi(i)
		{}
		// 构造函数
		__HTIterator(Node* node, const hash_table* pht, size_t i)
			:_node(node)
			, _pht(pht)
			, hashi(i)
		{}
		self& operator++()
		{
			if (_node->_next)
			{
				// 若当前桶其他节点没有走完
				// 那么继续走下一个节点
				_node = _node->_next;
			}
			else
			{
				// 若当前桶的所有节点都已经走完
				// 那么继续向下一个桶不为空的桶走
				hashi++;
				while (hashi _table.size())
				{
					if (_pht->_table[hashi])
					{
						_node = _pht->_table[hashi];
						break;
					}
					hashi++;
				}
				if (hashi == _pht->_table.size())
					_node = nullptr;
			}
			return *this;
		}
		Ref operator*()
		{
			return _node->_data;
		}
		Ptr operator->()
		{
			return &_node->_data;
		}
		bool operator!=(const self& s)
		{
			return _node != s._node;
		}
	};

1.2 哈希表的模拟实现

1.2.1 哈希表中仿函数的实现

若存储key的类型不是整形时，需要将其转化为整数，整形不需要处理，日常中我们最常用到的类型是string，那么这里就写一个string转化为整形的版本。

// 整数类型
template
struct HashFunc
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};
// 特化
template
struct HashFunc
{
	size_t operator()(const string& s)
	{
		size_t sum = 0;
		for (size_t i = 0; i  
1.2.2 哈希表中节点类的实现 
namespace hash_bucket
{
	template
	struct HashNode
	{
		HashNode* _next;
		T _data;
		HashNode(const T& data)
			:_data(data)
			, _next(nullptr)
		{}
	};
}
 
1.2.3 哈希表中迭代器类的实现 
namespace hash_bucket
{
	// 前置声明，否则后面的__HTIterator找不到hash_table
	template
	class hash_table;
	template
	struct __HTIterator
	{
		typedef HashNode Node;
		typedef __HTIterator self;
		// 成员变量
		Node* _node;
		// 由于const迭代器中const修饰this
		// 所以这里需要加const，否则会导致权限放大
		// 编译无法通过的情况
		const hash_table* _pht;
		// 
		size_t hashi;
		// 构造函数
		__HTIterator(Node* node, hash_table* pht, size_t i)
			:_node(node)
			, _pht(pht)
			, hashi(i)
		{}
		// 构造函数
		__HTIterator(Node* node, const hash_table* pht, size_t i)
			:_node(node)
			, _pht(pht)
			, hashi(i)
		{}
		self& operator++()
		{
			if (_node->_next)
			{
				// 若当前桶其他节点没有走完
				// 那么继续走下一个节点
				_node = _node->_next;
			}
			else
			{
				// 若当前桶的所有节点都已经走完
				// 那么继续向下一个桶不为空的桶走
				hashi++;
				while (hashi _table.size())
				{
					if (_pht->_table[hashi])
					{
						_node = _pht->_table[hashi];
						break;
					}
					hashi++;
				}
				if (hashi == _pht->_table.size())
					_node = nullptr;
			}
			return *this;
		}
		Ref operator*()
		{
			return _node->_data;
		}
		Ptr operator->()
		{
			return &_node->_data;
		}
		bool operator!=(const self& s)
		{
			return _node != s._node;
		}
	};
}
 
1.2.4 哈希表中构造函数、析构函数和 Clear() 函数的实现 
namespace hash_bucket
{
	template
	class hash_table
	{
	public:
		typedef HashNode Node;
	public:
		// 构造函数
		hash_table(int n = 10)
			:_table(vector(n))
			, _n(0)
		{}
		// 析构函数
		~hash_table()
		{
			Clear();
		}
		void Clear()
		{
			for (size_t i = 0; i _next;
					delete cur;
					cur = next;
				}
				_table[i] = nullptr;
			}
		}
	private:
		vector _table;
		int _n;
	};
}
 
1.2.5 哈希表中Swap 和 Size 函数的实现 
namespace hash_bucket
{
	template
	class hash_table
	{
	public:
		size_t Size()
		{
			return _n;
		}
		void Swap(hash_table& ht)
		{
			_table.swap(ht._table);
			swap(_n, ht._n);
		}
	private:
		vector _table;
		int _n;
	};
}
 
1.2.6 哈希表中 begin 和 end 函数的实现 
namespace hash_bucket
{
	template
	class hash_table
	{
	public:
		typedef HashNode Node;
		typedef __HTIterator iterator;
		typedef __HTIterator const_iterator;
		// 将__HTIterator设置为hash_table的友元
		// 使__HTIterator可以访问hash_table的私有
		template
		friend struct __HTIterator;
	public:
		// 迭代器
		iterator begin()
		{
			for (size_t i = 0; i  
1.2.7 哈希表中 Find 函数的实现 
namespace hash_bucket
{
	template
	class hash_table
	{
	public:
		typedef HashNode Node;
		typedef __HTIterator iterator;
		typedef __HTIterator const_iterator;
		// 将__HTIterator设置为hash_table的友元
		// 使__HTIterator可以访问hash_table的私有
		template
		friend struct __HTIterator;
	public:
		// 查找
		iterator Find(const K& key)
		{
			HF hf;
			KOfT koft;
			int hashi = hf(key) % _table.size();
			Node* cur = _table[hashi];
			while (cur)
			{
				if (koft(cur->_data) == key)
					return iterator(cur, this, hashi);
				cur = cur->_next;
			}
			// 找不到返回空
			return end();
		}
	private:
		vector _table;
		int _n;
	};
}
 
1.2.8 哈希表中 Insert 和 Erase 函数的实现 
namespace hash_bucket
{
	template
	class hash_table
	{
	public:
		typedef HashNode Node;
		typedef __HTIterator iterator;
		typedef __HTIterator const_iterator;
		// 将__HTIterator设置为hash_table的友元
		// 使__HTIterator可以访问hash_table的私有
		template
		friend struct __HTIterator;
	public:
		// 插入
		pair Insert(const T& data)
		{
			HF hf;
			KOfT koft;
			iterator tmp = Find(koft(data));
			// 不允许有相同的值
			if (tmp != end())
				return make_pair(tmp, false);
			// 扩容
			if (_n == _table.size())
			{
				int newcapacity = 2 * _table.size();
				hash_table newtable(newcapacity);
				for (size_t i = 0; i _next;
						int hashi = hf(koft(cur->_data)) % newcapacity;
						cur->_next = newtable._table[hashi];
						newtable._table[hashi] = cur;
						cur = next;
					}
					_table[i] = nullptr;
				}
				_table.swap(newtable._table);
			}
			// 头插 
			int hashi = hf(koft(data)) % _table.size();
			Node* newnode = new Node(data);
			newnode->_next = _table[hashi];
			_table[hashi] = newnode;
			_n++;
			return make_pair(iterator(newnode, this, hashi), true);
		}
		// 删除
		bool Erase(const K& key)
		{
			Node* tmp = Find(key);
			// 找不到则删除失败
			if (tmp == nullptr)
				return false;
			HF hf;
			KOfT koft;
			int hashi = hf(key) % _table.size();
			Node* prev = nullptr;
			Node* cur = _table[hashi];
			while (cur)
			{
				Node* next = cur->_next;
				if (koft(cur->_data) == key)
				{
					if (prev == nullptr)
					{
						_table[hashi] = next;
					}
					else
					{
						prev->_next = next;
					}
					_n--;
					delete cur;
					return true;
				}
				else
				{
					prev = cur;
					cur = next;
				}
			}
			return false;
		}
	private:
		vector _table;
		int _n;
	};
}
 
1.2.9 哈希表的整体实现 
#pragma once
#include
#include
#include
#include
#include
using namespace std;
// 整数类型
template
struct HashFunc
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};
// 特化
template
struct HashFunc
{
	size_t operator()(const string& s)
	{
		size_t sum = 0;
		for (size_t i = 0; i _next)
			{
				// 若当前桶其他节点没有走完
				// 那么继续走下一个节点
				_node = _node->_next;
			}
			else
			{
				// 若当前桶的所有节点都已经走完
				// 那么继续向下一个桶不为空的桶走
				hashi++;
				while (hashi _table.size())
				{
					if (_pht->_table[hashi])
					{
						_node = _pht->_table[hashi];
						break;
					}
					hashi++;
				}
				if (hashi == _pht->_table.size())
					_node = nullptr;
			}
			return *this;
		}
		Ref operator*()
		{
			return _node->_data;
		}
		Ptr operator->()
		{
			return &_node->_data;
		}
		bool operator!=(const self& s)
		{
			return _node != s._node;
		}
	};
	
	template
	class hash_table
	{
	public:
		typedef HashNode Node;
		typedef __HTIterator iterator;
		typedef __HTIterator const_iterator;
		// 将__HTIterator设置为hash_table的友元
		// 使__HTIterator可以访问hash_table的私有
		template
		friend struct __HTIterator;
	public:
		// 构造函数
		hash_table(int n = 10)
			:_table(vector(n))
			,_n(0)
		{}
		// 析构函数
		~hash_table()
		{
			Clear();
		}
		void Clear()
		{
			for (size_t i = 0; i _next;
					delete cur;
					cur = next;
				}
				_table[i] = nullptr;
			}
		}
		size_t Size()
		{
			return _n;
		}
		void Swap(hash_table& ht)
		{
			_table.swap(ht._table);
			swap(_n, ht._n);
		}
		// 迭代器
		iterator begin()
		{
			for (size_t i = 0; i _next;
						int hashi = hf(koft(cur->_data)) % newcapacity;
						cur->_next = newtable._table[hashi];
						newtable._table[hashi] = cur;
						cur = next;
					}
					_table[i] = nullptr;
				}
				_table.swap(newtable._table);
			}
			// 头插 
			int hashi = hf(koft(data)) % _table.size();
 			Node* newnode = new Node(data);
			newnode->_next = _table[hashi];
			_table[hashi] = newnode;
			_n++;
			return make_pair(iterator(newnode,this,hashi),true);
		}
		// 删除
		bool Erase(const K& key)
		{
			Node* tmp = Find(key);
			// 找不到则删除失败
			if (tmp == nullptr)
				return false;
			HF hf;
			KOfT koft;
			int hashi = hf(key) % _table.size();
			Node* prev = nullptr;
			Node* cur = _table[hashi];
			while (cur)
			{
				Node* next = cur->_next;
				if (koft(cur->_data) == key)
				{
					if (prev == nullptr)
					{
						_table[hashi] = next;
					}
					else
					{
						prev->_next = next;
					}
					_n--;
					delete cur;
					return true;
				}
				else
				{
					prev = cur;
					cur = next;
				}
			}
			return false;
		}
		// 查找
		iterator Find(const K& key)
		{
			HF hf;
			KOfT koft;
			int hashi = hf(key) % _table.size();
			Node* cur = _table[hashi];
			while (cur)
			{
				if (koft(cur->_data) == key)
					return iterator(cur,this,hashi);
				cur = cur->_next;
			}
			// 找不到返回空
			return end();
		}
		void Some()
		{
			size_t bucketSize = 0;
			size_t maxBucketLen = 0;
			size_t sum = 0;
			double averageBucketLen = 0;
			for (size_t i = 0; i _next;
				}
				sum += bucketLen;
				if (bucketLen > maxBucketLen)
				{
					maxBucketLen = bucketLen;
				}
			}
			averageBucketLen = (double)sum / (double)bucketSize;
			printf("all bucketSize:%d\n", _table.size());
			printf("bucketSize:%d\n", bucketSize);
			printf("maxBucketLen:%d\n", maxBucketLen);
			printf("averageBucketLen:%lf\n\n", averageBucketLen);
		}
	private:
		vector _table;
		int _n;
	public:
		void TestHT1()
		{
			hash_table ht;
			int a[] = { 4,14,24,34,5,7,1 };
			for (auto e : a)
			{
				ht.Insert(make_pair(e, e));
			}
			ht.Insert(make_pair(3, 3));
			ht.Insert(make_pair(3, 3));
			ht.Insert(make_pair(-3, -3));
			ht.Some();
		}
		void TestHT2()
		{
			string arr[] = { "香蕉", "甜瓜","苹果", "西瓜", "苹果", "西瓜", "苹果", "苹果", "西瓜", "苹果", "香蕉", "苹果", "香蕉" };
			//HashTable ht;
			hash_table ht;
			for (auto& e : arr)
			{
				//auto ret = ht.Find(e);
				HashNode* ret = ht.Find(e);
				if (ret)
				{
					ret->_kv.second++;
				}
				else
				{
					ht.Insert(make_pair(e, 1));
				}
			}
			ht.Insert(make_pair("apple", 1));
			ht.Insert(make_pair("sort", 1));
			ht.Insert(make_pair("abc", 1));
			ht.Insert(make_pair("acb", 1));
			ht.Insert(make_pair("aad", 1));
		}
		void TestHT3()
		{
			const size_t N = 1000;
			unordered_set us;
			set s;
			hash_table ht;
			vector v;
			v.reserve(N);
			srand(time(0));
			for (size_t i = 0; i