数据结构中的哈希集合与哈希表:概念、操作与实战
第一部分 哈希集合和哈希表的分类及常见形式
哈希表(Hash Table)和哈希集合(Hash Set)是基于哈希函数实现的高效数据结构,平均情况下可以提供O(1)时间复杂度的查找、插入和删除操作。
1. 哈希表(散列表)
键值对存储结构,通过键快速访问值
#define TABLE_SIZE 1000typedef struct HashNode {int key;int value;struct HashNode* next;
} HashNode;typedef struct {HashNode** buckets;int size;
} HashMap;
2. 哈希集合
只存储唯一键的结构,不存储值
typedef struct SetNode {int key;struct SetNode* next;
} SetNode;typedef struct {SetNode** buckets;int size;
} HashSet;
3. 常见哈希冲突解决方法
链地址法(Separate Chaining)
// 如上HashMap和HashSet的实现就是使用链地址法
开放寻址法(Open Addressing)
typedef struct {int* keys;int* values;int size;int capacity;
} OpenAddressingHashMap;
4. 动态扩容哈希表
当负载因子超过阈值时自动扩容
typedef struct {HashNode** buckets;int size;int capacity;float loadFactor;
} DynamicHashMap;
第二部分 哈希集合和哈希表常见操作
1. 哈希表基本操作
初始化哈希表
HashMap* createHashMap() {HashMap* map = (HashMap*)malloc(sizeof(HashMap));map->buckets = (HashNode**)calloc(TABLE_SIZE, sizeof(HashNode*));map->size = 0;return map;
}
哈希函数
int hashFunction(int key) {return key % TABLE_SIZE;
}
插入键值对
void put(HashMap* map, int key, int value) {int index = hashFunction(key);HashNode* node = map->buckets[index];// 检查是否已存在while(node != NULL) {if(node->key == key) {node->value = value; // 更新值return;}node = node->next;}// 创建新节点HashNode* newNode = (HashNode*)malloc(sizeof(HashNode));newNode->key = key;newNode->value = value;newNode->next = map->buckets[index];map->buckets[index] = newNode;map->size++;
}
获取值
int get(HashMap* map, int key) {int index = hashFunction(key);HashNode* node = map->buckets[index];while(node != NULL) {if(node->key == key) {return node->value;}node = node->next;}return -1; // 未找到
}
删除键值对
void removeKey(HashMap* map, int key) {int index = hashFunction(key);HashNode* node = map->buckets[index];HashNode* prev = NULL;while(node != NULL) {if(node->key == key) {if(prev == NULL) {map->buckets[index] = node->next;} else {prev->next = node->next;}free(node);map->size--;return;}prev = node;node = node->next;}
}
2. 哈希集合基本操作
初始化哈希集合
HashSet* createHashSet() {HashSet* set = (HashSet*)malloc(sizeof(HashSet));set->buckets = (SetNode**)calloc(TABLE_SIZE, sizeof(SetNode*));set->size = 0;return set;
}
添加元素
void add(HashSet* set, int key) {int index = hashFunction(key);SetNode* node = set->buckets[index];// 检查是否已存在while(node != NULL) {if(node->key == key) {return; // 已存在}node = node->next;}// 创建新节点SetNode* newNode = (SetNode*)malloc(sizeof(SetNode));newNode->key = key;newNode->next = set->buckets[index];set->buckets[index] = newNode;set->size++;
}
检查元素是否存在
int contains(HashSet* set, int key) {int index = hashFunction(key);SetNode* node = set->buckets[index];while(node != NULL) {if(node->key == key) {return 1; // 存在}node = node->next;}return 0; // 不存在
}
删除元素
void removeFromSet(HashSet* set, int key) {int index = hashFunction(key);SetNode* node = set->buckets[index];SetNode* prev = NULL;while(node != NULL) {if(node->key == key) {if(prev == NULL) {set->buckets[index] = node->next;} else {prev->next = node->next;}free(node);set->size--;return;}prev = node;node = node->next;}
}
3. 动态扩容实现
void resizeHashMap(DynamicHashMap* map) {int newCapacity = map->capacity * 2;HashNode** newBuckets = (HashNode**)calloc(newCapacity, sizeof(HashNode*));// 重新哈希所有元素for(int i = 0; i < map->capacity; i++) {HashNode* node = map->buckets[i];while(node != NULL) {HashNode* next = node->next;int newIndex = node->key % newCapacity;node->next = newBuckets[newIndex];newBuckets[newIndex] = node;node = next;}}free(map->buckets);map->buckets = newBuckets;map->capacity = newCapacity;
}void dynamicPut(DynamicHashMap* map, int key, int value) {// 检查是否需要扩容if((float)map->size / map->capacity > map->loadFactor) {resizeHashMap(map);}// 插入逻辑与普通哈希表相同// ...
}
第三部分 哈希集合和哈希表编程题例子
1. 两数之和
int* twoSum(int* nums, int numsSize, int target, int* returnSize) {HashMap* map = createHashMap();int* result = (int*)malloc(2 * sizeof(int));*returnSize = 2;for(int i = 0; i < numsSize; i++) {int complement = target - nums[i];int complementIndex = get(map, complement);if(complementIndex != -1) {result[0] = complementIndex;result[1] = i;return result;}put(map, nums[i], i);}return NULL;
}
2. 无重复字符的最长子串
int lengthOfLongestSubstring(char* s) {int charIndex[128]; // ASCII字符集memset(charIndex, -1, sizeof(charIndex));int maxLength = 0;int start = 0;for(int end = 0; s[end] != '\0'; end++) {if(charIndex[s[end]] >= start) {start = charIndex[s[end]] + 1;}charIndex[s[end]] = end;maxLength = fmax(maxLength, end - start + 1);}return maxLength;
}
3. 字母异位词分组
#define PRIME_NUM 31long hashString(char* str) {long hash = 0;for(int i = 0; str[i] != '\0'; i++) {hash = PRIME_NUM * hash + str[i];}return hash;
}char*** groupAnagrams(char** strs, int strsSize, int* returnSize, int** returnColumnSizes) {HashMap* map = createHashMap();int groupCount = 0;for(int i = 0; i < strsSize; i++) {// 复制字符串并排序作为键char* key = strdup(strs[i]);int len = strlen(key);qsort(key, len, sizeof(char), compareChar);long hash = hashString(key);int index = get(map, hash);if(index == -1) {put(map, hash, groupCount);index = groupCount++;}free(key);}// 分配结果内存// ... 具体实现略 ...return result;
}
4. 存在重复元素
bool containsDuplicate(int* nums, int numsSize) {HashSet* set = createHashSet();for(int i = 0; i < numsSize; i++) {if(contains(set, nums[i])) {return true;}add(set, nums[i]);}return false;
}
5. 最长连续序列
int longestConsecutive(int* nums, int numsSize) {HashSet* set = createHashSet();// 将所有数字加入集合for(int i = 0; i < numsSize; i++) {add(set, nums[i]);}int maxLength = 0;for(int i = 0; i < numsSize; i++) {// 只有当当前数字是序列起点时才检查if(!contains(set, nums[i] - 1)) {int currentNum = nums[i];int currentLength = 1;while(contains(set, currentNum + 1)) {currentNum++;currentLength++;}maxLength = fmax(maxLength, currentLength);}}return maxLength;
}
6. 设计LRU缓存
typedef struct LRUNode {int key;int value;struct LRUNode* prev;struct LRUNode* next;
} LRUNode;typedef struct {int capacity;int size;LRUNode* head;LRUNode* tail;LRUNode** hashMap; // 简化的哈希表,实际应使用更高效的实现
} LRUCache;LRUCache* lRUCacheCreate(int capacity) {LRUCache* cache = (LRUCache*)malloc(sizeof(LRUCache));cache->capacity = capacity;cache->size = 0;cache->head = NULL;cache->tail = NULL;cache->hashMap = (LRUNode**)calloc(10000, sizeof(LRUNode*)); // 假设key范围在0-9999return cache;
}void moveToHead(LRUCache* cache, LRUNode* node) {if(node == cache->head) return;// 从链表中移除节点if(node->prev) node->prev->next = node->next;if(node->next) node->next->prev = node->prev;// 如果是尾节点,更新尾指针if(node == cache->tail) {cache->tail = node->prev;}// 将节点移到头部node->prev = NULL;node->next = cache->head;if(cache->head) cache->head->prev = node;cache->head = node;// 如果链表为空,更新尾指针if(cache->tail == NULL) {cache->tail = node;}
}int lRUCacheGet(LRUCache* obj, int key) {LRUNode* node = obj->hashMap[key];if(node == NULL) return -1;moveToHead(obj, node);return node->value;
}void lRUCachePut(LRUCache* obj, int key, int value) {LRUNode* node = obj->hashMap[key];if(node != NULL) {// 更新现有节点的值并移到头部node->value = value;moveToHead(obj, node);return;}// 创建新节点node = (LRUNode*)malloc(sizeof(LRUNode));node->key = key;node->value = value;node->prev = NULL;node->next = NULL;// 如果缓存已满,移除最近最少使用的节点if(obj->size == obj->capacity) {LRUNode* tail = obj->tail;obj->hashMap[tail->key] = NULL;if(obj->head == obj->tail) {obj->head = NULL;obj->tail = NULL;} else {obj->tail = tail->prev;obj->tail->next = NULL;}free(tail);obj->size--;}// 添加新节点到头部node->next = obj->head;if(obj->head) obj->head->prev = node;obj->head = node;if(obj->tail == NULL) {obj->tail = node;}obj->hashMap[key] = node;obj->size++;
}
哈希表和哈希集合是计算机科学中最高效的数据结构之一,广泛应用于数据库索引、缓存实现、编译器符号表等领域。通过合理的哈希函数设计和冲突解决方法,可以在大多数情况下实现接近常数时间的操作。理解这些数据结构的实现原理和应用场景,对于解决实际编程问题至关重要。