问题描述
我一直在为动态大小的哈希表从头开始编写 C 实现。我犯了一个严重的错误,因为我的散列函数是基于散列表的容量。由于容量随时间变化,这不起作用。为动态分配的哈希表开发哈希函数有哪些建议?
此外,我正在使用二次探测,因此我的调整大小基于此。例如,如果我的哈希表容量为 8,并且一个新键最初被哈希到索引 0,那么我计算的新索引将是 1 (0 + 1^2),5 (1 + 2^2),14 (5 + 3 ^2) 等使用二次探查,我会停在 14,因为它大于 8。所以,我会创建一个容量为 15 的新哈希表。我想保留这个实现,但如果有更好的方法我打开改变它。无论如何,我仍在寻找如何为动态数组而不是静态数组开发哈希函数。
编辑: 我的意思是因为我的哈希函数是基于哈希表的容量,当我在表大小改变后去检索一个元素时,它不起作用。例如,在我的主程序中,我去删除键为“A”的元素,然后再次打印出表格,但 A 仍然存在。这是因为我使用我的散列函数来查找“A”存在的位置以将其删除,但是当我删除“A”时散列函数是不同的,因为当我插入“A”时,容量与我插入时不同试图删除它。所以,散列函数没有把我带到正确的地方。
我读过一些关于调整哈希表大小时的内容,我只需要使用新哈希表的大小重新哈希当前哈希表中的所有元素。我只是想知道除此之外是否还有其他方法可以做到这一点。
状态.h
#ifndef STATUS_H
#define STATUS_H
typedef enum status { FAILURE,SUCCESS } Status;
typedef enum boolean { FALSE,TRUE } Boolean;
#endif
HashTableElement.h
#ifndef KEY_AND_DATA_H
#define KEY_AND_DATA_H
#include "status.h"
typedef void* HASH_TABLE_ELEMENT;
/*Precondition: none
Postcondition: returns a handle to a new hash table element. Else returns NULL */
HASH_TABLE_ELEMENT hash_table_element_create(char* key,int data);
/*Precondition: hHash_table_element is a handle to a valid hash table element,data is the
new data value.
Postcondition: the data inside the hash table has been updated. */
void hash_table_element_update(HASH_TABLE_ELEMENT hHash_table_element,int data);
/*Precondition: hHash_table_element is a handle to a valid hash table element.
Postcondition: returns the data value. */
int hash_table_element_get_data(HASH_TABLE_ELEMENT hHash_table_element);
/*Precondition: hHash_table_element is a handle to a valid hash table element.
Postcondition: returns the key */
const char* hash_table_element_get_key(HASH_TABLE_ELEMENT hHash_table_element);
/*Precondition: hHash_table_element1 and 2 are handles to valid hash table elements.
Postcondition: returns true or false if the keys match or not*/
Boolean hash_table_element_keys_match(HASH_TABLE_ELEMENT hHash_table_element1,HASH_TABLE_ELEMENT hHash_table_element2);
Status hash_table_element_get_character_by_index(HASH_TABLE_ELEMENT hHash_table_element,int index,char* ch);
void hash_table_element_destroy(HASH_TABLE_ELEMENT* phHash_table_element);
#endif
哈希表.h
#ifndef HASH_TABLE_H
#define HASH_TABLE_H
#include "status.h"
typedef void* HASH_TABLE;
/* Precondition: none
Postcondition: returns a handle to an empty hash table or NULL on Failure */
HASH_TABLE hash_table_init_default(unsigned initial_capacity);
/* Precondition: capacity is the capacity of the hash table.
key is the key to be hased.
Postcondition: returns an index in the hash table that comes from
hasing the key with the hash table capacity */
unsigned hash_table_hash(unsigned capacity,char* key);
/* Precondition: hHash_table is a handle to a valid hash_table
Postcondition: returns the capacity */
unsigned hash_table_get_capacity(HASH_TABLE hHash_table);
/* Precondition: hHash_table is a handle to a valid hash table. Key and data
are the info to be put into the hash_table
Postcondition: a new element has been created and inserted in the hash table
Returns FAILURE for any memory allocation failure */
Status hash_table_insert(HASH_TABLE hHash_table,char* key,int data);
/* Precondition: hHash_table is a handle to a valid hash table object. Key is the
key to search for.
Postcondition: if the key exists,stores it in data and returns SUCCESS. Else,returns FAILURE and stores a 0 in data */
Status hash_table_get_data_by_key(HASH_TABLE hHash_table,int* data);
/* Precondition: hHash_table is a handle to a hash table. key is the key to be looked for.
Postcondition: if the key exists,stores the index in indexOfKey and returns true. If it
doesn't,returns false and stors a 0 in indexOfKey */
Boolean hash_table_get_key_index(HASH_TABLE hHash_table,unsigned* indexOfKey);
/* Precondition: hHash_table is a handle to a hash table. Index is the index to search.
Data stores the data at the index.
Postcondition: returns SUCCESS and stores the data value at that index in data. If the index
caused overflow,or the index was NULL,returns FAILIURE and data is set to 0 */
Status hash_table_get_data_by_index(HASH_TABLE hHash_table,int* data);
/* Precondition: hHash_table is a handle to a hash table. Index is the index to search.
Data stores the data at the index.
Postcondition: returns SUCCESS and stores the key at that index in key. If the index
caused overflow,returns FAILIURE and key is set as the empty string */
Status hash_table_get_key_by_index(HASH_TABLE hHash_table,char* key);
/* Precondition: hHash_table is a handle to a valid hash table object. Key is the
key to be searched for
Postcondition: if the element corresponding to the key exists,it is removed and
SUCCESS is returned. Else,it FAILURE is returned */
Status hash_table_remove_element(HASH_TABLE hHash_table,char* key);
/* Precondition: phHash_table is a pointer to a handle to a hash table
Postcondion: all memory associated with the hash table has been freed.
and the hash table handle is set to NULL */
void hash_table_destroy(HASH_TABLE* phHash_table);
void debug(HASH_TABLE hHash_table);
#endif
HashTableElement.c
#include "HashTableElement.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct hash_table_element {
char* key;
int data;
unsigned capacity; // capacity of hash table during creation
} Hash_table_element;
HASH_TABLE_ELEMENT hash_table_element_create(char* key,int data) {
Hash_table_element* pHash_table_element = (Hash_table_element*)malloc(sizeof(Hash_table_element));
if (pHash_table_element != NULL) {
pHash_table_element->key = (char*)malloc(sizeof(char) * (strlen(key) + 1));
if (pHash_table_element->key == NULL) {
free(pHash_table_element);
return NULL;
}
for (unsigned i = 0; i < strlen(key); i++)
pHash_table_element->key[i] = key[i];
pHash_table_element->key[strlen(key)] = '\0';
pHash_table_element->data = data;
}
return (HASH_TABLE_ELEMENT)pHash_table_element;
}
void hash_table_element_update(HASH_TABLE_ELEMENT hHash_table_element,int data) {
Hash_table_element* pHash_table_element = (Hash_table_element*)hHash_table_element;
pHash_table_element->data = data;
}
int hash_table_element_get_data(HASH_TABLE_ELEMENT hHash_table_element) {
Hash_table_element* pHash_table_element = (Hash_table_element*)hHash_table_element;
return pHash_table_element->data;
}
const char* hash_table_element_get_key(HASH_TABLE_ELEMENT hHash_table_element) {
Hash_table_element* pHash_table_element = (Hash_table_element*)hHash_table_element;
return (const char*)pHash_table_element->key;
}
Boolean hash_table_element_keys_match(HASH_TABLE_ELEMENT hHash_table_element1,HASH_TABLE_ELEMENT hHash_table_element2) {
Hash_table_element* pHash_table_element1 = (Hash_table_element*)hHash_table_element1;
Hash_table_element* pHash_table_element2 = (Hash_table_element*)hHash_table_element2;
if (!strcmp(pHash_table_element1->key,pHash_table_element2->key))
return TRUE;
return FALSE;
}
Status hash_table_element_get_character_by_index(HASH_TABLE_ELEMENT hHash_table_element,char* ch) {
Hash_table_element* pHash_table_element = (Hash_table_element*)hHash_table_element;
if (index > strlen(pHash_table_element->key)) {
*ch = '\0';
return FAILURE;
}
*ch = pHash_table_element->key[index];
return SUCCESS;
}
void hash_table_element_destroy(HASH_TABLE_ELEMENT* phHash_table_element) {
if (*phHash_table_element != NULL) {
Hash_table_element* pHash_table_element = (Hash_table_element*)*phHash_table_element;
free(pHash_table_element->key);
free(pHash_table_element);
*phHash_table_element = NULL;
}
}
哈希表.c
#include "HashTable.h"
#include "HashTableElement.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct hash_table {
HASH_TABLE_ELEMENT* table;
unsigned capacity;
} Hash_table;
HASH_TABLE hash_table_init_default(unsigned initial_capacity) {
Hash_table* pHash_table = (Hash_table*)malloc(sizeof(Hash_table));
if (pHash_table != NULL) {
pHash_table->table = (HASH_TABLE_ELEMENT*)malloc(sizeof(HASH_TABLE_ELEMENT) * initial_capacity);
if (pHash_table->table == NULL) {
free(pHash_table);
return NULL;
}
for (unsigned i = 0; i < initial_capacity; i++) {
pHash_table->table[i] = NULL;
}
pHash_table->capacity = initial_capacity;
}
return (HASH_TABLE)pHash_table;
}
unsigned hash_table_hash(unsigned capacity,char* key) {
unsigned sum = 0;
for (unsigned i = 0; i < strlen(key); i++)
sum += key[i];
return sum % capacity;
}
unsigned hash_table_get_capacity(HASH_TABLE hHash_table) {
Hash_table* pHash_table = (Hash_table*)hHash_table;
return pHash_table->capacity;
}
Status hash_table_insert(HASH_TABLE hHash_table,int data) {
Hash_table* pHash_table = (Hash_table*)hHash_table;
unsigned index = hash_table_hash(pHash_table->capacity,key);
unsigned quadraticNum = 1;
Boolean overflow = (Boolean)(index >= pHash_table->capacity);
while (!overflow && pHash_table->table[index] != NULL) {
if (!strcmp(hash_table_element_get_key(pHash_table->table[index]),key)) {
hash_table_element_update(pHash_table->table[index],data);
return SUCCESS;
}
else {
index += quadraticNum * quadraticNum;
quadraticNum++;
if (index >= pHash_table->capacity) {
overflow = TRUE;
}
}
}
if (overflow) {
unsigned newCapacity = index + 1;
HASH_TABLE_ELEMENT* newTable = (HASH_TABLE_ELEMENT*)malloc(sizeof(HASH_TABLE_ELEMENT) * newCapacity);
if (newTable == NULL)
return FAILURE;
for (unsigned i = 0; i < pHash_table->capacity; i++) {
if (pHash_table->table[i] == NULL)
newTable[i] = NULL;
else {
newTable[i] =
hash_table_element_create(hash_table_element_get_key(pHash_table->table[i]),hash_table_element_get_data(pHash_table->table[i]));
if (newTable[i] == NULL) {
for (int j = i - 1; j >= 0; j--)
hash_table_element_destroy(&(newTable[j]));
free(newTable);
return FAILURE;
}
}
}
for (unsigned i = pHash_table->capacity; i < newCapacity - 1; i++)
newTable[i] = NULL;
newTable[newCapacity - 1] = hash_table_element_create(key,data,pHash_table->capacity);
if (newTable[newCapacity - 1] == NULL) {
for (int i = newCapacity - 2; i >= 0; i--)
hash_table_element_destroy(&(newTable[i]));
free(newTable);
return FAILURE;
}
for (unsigned i = 0; i < pHash_table->capacity; i++)
hash_table_element_destroy(&(pHash_table->table[i]));
free(pHash_table->table);
pHash_table->table = newTable;
pHash_table->capacity = newCapacity;
return SUCCESS;
}
else {
pHash_table->table[index] = hash_table_element_create(key,pHash_table->capacity);
if (pHash_table->table[index] == NULL)
return FAILURE;
return SUCCESS;
}
}
Boolean hash_table_get_key_index(HASH_TABLE hHash_table,unsigned* indexOfKey) {
Hash_table* pHash_table = (Hash_table*)hHash_table;
unsigned index = hash_table_hash(pHash_table->capacity,key);
unsigned quadraticNum = 1;
while (index < pHash_table->capacity) {
if (pHash_table->table[index] != NULL) {
if (!strcmp(key,hash_table_element_get_key(pHash_table->table[index]))) {
*indexOfKey = index;
return TRUE;
}
}
index += quadraticNum * quadraticNum;
quadraticNum++;
}
*indexOfKey = 0;
return FALSE;
}
Status hash_table_get_data_by_key(HASH_TABLE hHash_table,int* data) {
unsigned indexOfKey = 0;
if (hash_table_get_key_index(hHash_table,key,&indexOfKey)) {
Hash_table* pHash_table = (Hash_table*)hHash_table;
*data = hash_table_element_get_data(pHash_table->table[indexOfKey]);
return SUCCESS;
}
*data = 0;
return FAILURE;
}
Status hash_table_get_data_by_index(HASH_TABLE hHash_table,int* data) {
Hash_table* pHash_table = (Hash_table*)hHash_table;
if (index >= pHash_table->capacity || pHash_table->table[index] == NULL) {
*data = 0;
return FAILURE;
}
*data = hash_table_element_get_data(pHash_table->table[index]);
return SUCCESS;
}
Status hash_table_get_key_by_index(HASH_TABLE hHash_table,char* key) {
Hash_table* pHash_table = (Hash_table*)hHash_table;
if (index >= pHash_table->capacity || pHash_table->table[index] == NULL) {
key[0] = '\0';
return FAILURE;
}
char ch;
for (unsigned i = 0; i < strlen(hash_table_element_get_key(pHash_table->table[index])); i++) {
hash_table_element_get_character_by_index(pHash_table->table[index],i,&key[i]);
}
key[strlen(hash_table_element_get_key(pHash_table->table[index]))] = '\0';
return SUCCESS;
}
Status hash_table_remove_element(HASH_TABLE hHash_table,char* key) {
unsigned indexOfKey = 0;
if (hash_table_get_key_index(hHash_table,&indexOfKey)) {
Hash_table* pHash_table = (Hash_table*)hHash_table;
hash_table_element_destroy(&(pHash_table->table[indexOfKey]));
return SUCCESS;
}
return FAILURE;
}
void hash_table_destroy(HASH_TABLE* phHash_table) {
Hash_table* pHash_table = (Hash_table*)*phHash_table;
for (unsigned i = 0; i < pHash_table->capacity; i++)
hash_table_element_destroy(&(pHash_table->table[i]));
free(pHash_table->table);
free(pHash_table);
*phHash_table = NULL;
}
void debug(HASH_TABLE hHash_table) {
Hash_table* pHash_table = (Hash_table*)hHash_table;
int data;
char key[100];
char DNE[4] = "DNE";
for (unsigned i = 0; i < pHash_table->capacity; i++) {
printf("Index: %-10d",i);
Status keyStatus = hash_table_get_key_by_index(hHash_table,key);
Status dataStatus = hash_table_get_data_by_index(hHash_table,&data);
if (keyStatus == FAILURE && dataStatus == FAILURE) {
printf("Key: %-10sData: %-10s\n",DNE,DNE);
}
else {
printf("Key: %-10sData: %-10d\n",data);
}
}
}
main.c
#include <stdio.h>
#include "HashTable.h"
#include <string.h>
#include <vld.h>
int main(int argc,char** argv) {
HASH_TABLE hHash_table = hash_table_init_default(5);
char key[3] = "A";
unsigned num = 1;
for (unsigned i = 0; i < 26; i++) {
hash_table_insert(hHash_table,num);
key[0] = key[0] + 1;
num++;
}
debug(hHash_table);
printf("\n\n\n");
hash_table_remove_element(hHash_table,"A");
debug(hHash_table);
hash_table_destroy(&hHash_table);
return 0;
}
Visual Leak Detector read settings from: C:\Program Files (x86)\Visual Leak Detector\vld.ini
Visual Leak Detector Version 2.5.1 installed.
Index: 0 Key: A Data: 1
Index: 1 Key: B Data: 2
Index: 2 Key: C Data: 3
Index: 3 Key: D Data: 4
Index: 4 Key: E Data: 5
Index: 5 Key: F Data: 6
Index: 6 Key: G Data: 7
Index: 7 Key: H Data: 8
Index: 8 Key: S Data: 19
Index: 9 Key: Q Data: 17
Index: 10 Key: J Data: 10
Index: 11 Key: K Data: 11
Index: 12 Key: L Data: 12
Index: 13 Key: M Data: 13
Index: 14 Key: N Data: 14
Index: 15 Key: I Data: 9
Index: 16 Key: O Data: 15
Index: 17 Key: P Data: 16
Index: 18 Key: V Data: 22
Index: 19 Key: W Data: 23
Index: 20 Key: X Data: 24
Index: 21 Key: Y Data: 25
Index: 22 Key: Z Data: 26
Index: 23 Key: T Data: 20
Index: 24 Key: R Data: 18
Index: 25 Key: DNE Data: DNE
Index: 26 Key: DNE Data: DNE
Index: 27 Key: DNE Data: DNE
Index: 28 Key: DNE Data: DNE
Index: 29 Key: DNE Data: DNE
Index: 30 Key: DNE Data: DNE
Index: 31 Key: DNE Data: DNE
Index: 32 Key: DNE Data: DNE
Index: 33 Key: DNE Data: DNE
Index: 34 Key: DNE Data: DNE
Index: 35 Key: DNE Data: DNE
Index: 36 Key: DNE Data: DNE
Index: 37 Key: DNE Data: DNE
Index: 38 Key: DNE Data: DNE
Index: 39 Key: DNE Data: DNE
Index: 40 Key: U Data: 21
Index: 0 Key: A Data: 1
Index: 1 Key: B Data: 2
Index: 2 Key: C Data: 3
Index: 3 Key: D Data: 4
Index: 4 Key: E Data: 5
Index: 5 Key: F Data: 6
Index: 6 Key: G Data: 7
Index: 7 Key: H Data: 8
Index: 8 Key: S Data: 19
Index: 9 Key: Q Data: 17
Index: 10 Key: J Data: 10
Index: 11 Key: K Data: 11
Index: 12 Key: L Data: 12
Index: 13 Key: M Data: 13
Index: 14 Key: N Data: 14
Index: 15 Key: I Data: 9
Index: 16 Key: O Data: 15
Index: 17 Key: P Data: 16
Index: 18 Key: V Data: 22
Index: 19 Key: W Data: 23
Index: 20 Key: X Data: 24
Index: 21 Key: Y Data: 25
Index: 22 Key: Z Data: 26
Index: 23 Key: T Data: 20
Index: 24 Key: R Data: 18
Index: 25 Key: DNE Data: DNE
Index: 26 Key: DNE Data: DNE
Index: 27 Key: DNE Data: DNE
Index: 28 Key: DNE Data: DNE
Index: 29 Key: DNE Data: DNE
Index: 30 Key: DNE Data: DNE
Index: 31 Key: DNE Data: DNE
Index: 32 Key: DNE Data: DNE
Index: 33 Key: DNE Data: DNE
Index: 34 Key: DNE Data: DNE
Index: 35 Key: DNE Data: DNE
Index: 36 Key: DNE Data: DNE
Index: 37 Key: DNE Data: DNE
Index: 38 Key: DNE Data: DNE
Index: 39 Key: DNE Data: DNE
Index: 40 Key: U Data: 21
No memory leaks detected.
Visual Leak Detector is Now exiting.
C:\UML\Computer Science\COMP.1020 Computing II\Interfaces\Hash Table ADT\No Duplicates\Hash Table ADT\Debug\Hash Table ADT.exe (process 24304) exited with code 0.
Press any key to close this window . . .
解决方法
警告:这与其说是算法更改,不如说是样式更改。这是因为所使用的样式由于样式本身的冗长而掩盖了很多算法。
通常,这个问题在 codereview 上会更好。但是,你也觉得你有错误。由于风格,它可以很容易地掩盖错误。
我重构了你的代码。而且,我跑了。没有内存泄漏,所以我不确定是什么问题。
您定义(例如):
typedef void *HASH_TABLE_ENTRY;
而且,你随处使用
然后,在给定的函数中,您将转换为:
Hash_table_element *pHash_table_element = (Hash_table_element *) hHash_table_element;
那是大量类型不安全。它可以掩盖主机微妙的错误。
这部分是因为您将实际的 struct
定义放在代码的 .c
中。
只需将真正的 struct
定义放在 .h
中,然后摆脱所有的转换。
此外,对于指针类型使用 typedef
被某些开发人员视为“代码异味”。
你不需要需要一个不透明的“句柄”。而且,即使你这样做了,这也不是办法。 [不要这样做,但是...],要创建一个类型安全句柄,您需要(例如):
typedef struct {
void *hte_handle;
} *HTE_HANDLE;
一个好的样式规则是为函数参数和函数作用域变量使用短名称。而且,变量名称不必复制其名称中的类型。替换(例如):
Hash_table_element *pHash_table_element;
与:
Hash_table_element *hte;
而且,typedef
名称有点长。考虑更换(例如):
typedef struct { ... } Hash_table_element;
与:
typedef struct { ... } hte_t;
对于长函数名也是如此。 一切前缀(例如):
hash_table_element_create(char *key,int data)
代替:
hte_create(char *key,int data)
你在做什么:
for (i = 0; i < strlen(key); ++i)
这很慢。它将运行时间从 O(n) 增加到 O(n^2)。最好这样做:
size_t keylen = strlen(key);
for (i = 0; i < keylen; ++i)
这是您的代码的串联重构。由于涉及的文件,我创建了一个连接文件。它在前面有 perl
代码来自动提取文件。或者,在 __DATA__
行之后,每个文件都以:% filename
#!/usr/bin/perl
# tbin/ovrcat.pm -- archive extractor
ovrcat(@ARGV);
exit(0);
sub ovrcat
{
my($xfsrc,$bf);
my($file,$xfcur);
$pgmtail = "ovrcat";
$xfsrc = "ovrcat::DATA";
$xfsrc = \*$xfsrc;
while ($bf = <$xfsrc>) {
chomp($bf);
if ($bf =~ /^%\s+(.+)$/) {
setofile($1);
next;
}
print($xfdst $bf,"\n")
if (ref($xfdst));
}
while (($file,$xfcur) = each(%lookup)) {
close($xfcur);
}
}
sub setofile
{
my($ofile) = @_;
my($xfcur);
{
$xfdst = $lookup{$ofile};
last if (ref($xfdst));
printf("$pgmtail: extracting %s ...\n",$ofile);
open($xfcur,">$ofile") or
die("ovrcat: unable to open '$ofile' -- $!\n");
$lookup{$ofile} = $xfcur;
$xfdst = $xfcur;
}
}
package ovrcat;
1;
__DATA__
% htable.h
#ifndef HASH_TABLE_H
#define HASH_TABLE_H
#include <status.h>
#include <hte.h>
typedef struct hash_table {
hte_t **table;
unsigned capacity;
} hashtable_t;
#if 0
/* Precondition: none
Postcondition: returns a handle to an empty hash table or NULL on Failure */
hash_table_t *hash_table_init_default(unsigned initial_capacity);
/* Precondition: capacity is the capacity of the hash table.
key is the key to be hased.
Postcondition: returns an index in the hash table that comes from
hasing the key with the hash table capacity */
unsigned hash_table_hash(unsigned capacity,char *key);
/* Precondition: hHash_table is a handle to a valid hashtable_t
Postcondition: returns the capacity */
unsigned hash_table_get_capacity(hashtable_t *table);
/* Precondition: hHash_table is a handle to a valid hash table. Key and data
are the info to be put into the hash_table
Postcondition: a new element has been created and inserted in the hash table
Returns FAILURE for any memory allocation failure */
Status hash_table_insert(hashtable_t *hHash_table,char *key,int data);
/* Precondition: hHash_table is a handle to a valid hash table object. Key is the
key to search for.
Postcondition: if the key exists,stores it in data and returns SUCCESS. Else,returns FAILURE and stores a 0 in data */
Status hash_table_get_data_by_key(hashtable_t *hHash_table,int *data);
/* Precondition: hHash_table is a handle to a hash table. key is the key to be looked for.
Postcondition: if the key exists,stores the index in indexOfKey and returns true. If it
doesn't,returns false and stors a 0 in indexOfKey */
Boolean hash_table_get_key_index(hashtable_t *hHash_table,unsigned *indexOfKey);
/* Precondition: hHash_table is a handle to a hash table. Index is the index to search.
Data stores the data at the index.
Postcondition: returns SUCCESS and stores the data value at that index in data. If the index
caused overflow,or the index was NULL,returns FAILIURE and data is set to 0 */
Status hash_table_get_data_by_index(hashtable_t *hHash_table,int index,int *data);
/* Precondition: hHash_table is a handle to a hash table. Index is the index to search.
Data stores the data at the index.
Postcondition: returns SUCCESS and stores the key at that index in key. If the index
caused overflow,returns FAILIURE and key is set as the empty string */
Status hash_table_get_key_by_index(hashtable_t *hHash_table,char *key);
/* Precondition: hHash_table is a handle to a valid hash table object. Key is the
key to be searched for
Postcondition: if the element corresponding to the key exists,it is removed and
SUCCESS is returned. Else,it FAILURE is returned */
Status hash_table_remove_element(hashtable_t *hHash_table,char *key);
/* Precondition: phHash_table is a pointer to a handle to a hash table
Postcondion: all memory associated with the hash table has been freed.
and the hash table handle is set to NULL */
void hash_table_destroy(hashtable_t ** phHash_table);
void debug(hashtable_t *hHash_table);
#endif
#include <htable.proto>
#endif
% hte.h
#ifndef HTE_H
#define HTE_H
#include <status.h>
typedef struct hte {
char *key;
int data;
unsigned capacity; // capacity of hash table during creation
} hte_t;
#if 0
typedef void *HASH_TABLE_ELEMENT;
/*Precondition: none
Postcondition: returns a handle to a new hash table element. Else returns NULL */
HASH_TABLE_ELEMENT
hte_create(char *key,int data);
/*Precondition: hHash_table_element is a handle to a valid hash table element,data is the
new data value.
Postcondition: the data inside the hash table has been updated. */
void
hte_update(HASH_TABLE_ELEMENT hHash_table_element,int data);
/*Precondition: hHash_table_element is a handle to a valid hash table element.
Postcondition: returns the data value. */
#if 0
int
hte_get_data(HASH_TABLE_ELEMENT hHash_table_element);
#else
int
hte_get_data(const HASH_TABLE_ELEMENT hHash_table_element);
#endif
/*Precondition: hHash_table_element is a handle to a valid hash table element.
Postcondition: returns the key */
const char *
hte_get_key(const hte_t *hte);
/*Precondition: hHash_table_element1 and 2 are handles to valid hash table elements.
Postcondition: returns true or false if the keys match or not*/
Boolean
hte_keys_match(HASH_TABLE_ELEMENT hHash_table_element1,HASH_TABLE_ELEMENT hHash_table_element2);
Status
hte_get_character_by_index(HASH_TABLE_ELEMENT hHash_table_element,char *ch);
void hte_destroy(HASH_TABLE_ELEMENT * phHash_table_element);
#endif
#include <hte.proto>
#endif
% status.h
#ifndef STATUS_H
#define STATUS_H
typedef enum status { FAILURE,SUCCESS } Status;
typedef enum boolean { FALSE,TRUE } Boolean;
#endif
% htable.c
#include <htable.h>
#include <hte.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
hashtable_t *
htable_init_default(unsigned initial_capacity)
{
hashtable_t *htab = malloc(sizeof(*htab));
if (htab != NULL) {
htab->table = malloc(sizeof(*htab->table) * initial_capacity);
if (htab->table == NULL) {
free(htab);
return NULL;
}
for (unsigned i = 0; i < initial_capacity; i++) {
htab->table[i] = NULL;
}
htab->capacity = initial_capacity;
}
return htab;
}
unsigned
htable_hash(unsigned capacity,const char *key)
{
unsigned sum = 0;
size_t keylen = strlen(key);
for (unsigned i = 0; i < keylen; i++)
sum += key[i];
return sum % capacity;
}
unsigned
htable_get_capacity(const hashtable_t *htab)
{
return htab->capacity;
}
Status
htable_insert(hashtable_t *htab,int data)
{
unsigned index = htable_hash(htab->capacity,key);
unsigned quadraticNum = 1;
Boolean overflow = (Boolean) (index >= htab->capacity);
while (!overflow && htab->table[index] != NULL) {
if (strcmp(hte_get_key(htab->table[index]),key) == 0) {
hte_update(htab->table[index],data);
return SUCCESS;
}
else {
index += quadraticNum * quadraticNum;
quadraticNum++;
if (index >= htab->capacity) {
overflow = TRUE;
}
}
}
if (overflow) {
unsigned newCapacity = index + 1;
hte_t **newTable = malloc(sizeof(*newTable) * newCapacity);
if (newTable == NULL)
return FAILURE;
for (unsigned i = 0; i < htab->capacity; i++) {
hte_t *htefrom = htab->table[i];
if (htefrom == NULL) {
newTable[i] = NULL;
continue;
}
newTable[i] = hte_create(hte_get_key(htefrom),hte_get_data(htefrom));
if (newTable[i] == NULL) {
for (int j = i - 1; j >= 0; j--)
hte_destroy(&newTable[j]);
free(newTable);
return FAILURE;
}
}
for (unsigned i = htab->capacity; i < newCapacity - 1; i++)
newTable[i] = NULL;
#if 0
newTable[newCapacity - 1] = hte_create(key,data,htab->capacity);
#else
newTable[newCapacity - 1] = hte_create(key,data);
#endif
if (newTable[newCapacity - 1] == NULL) {
for (int i = newCapacity - 2; i >= 0; i--)
hte_destroy(&newTable[i]);
free(newTable);
return FAILURE;
}
for (unsigned i = 0; i < htab->capacity; i++)
hte_destroy(&htab->table[i]);
free(htab->table);
htab->table = newTable;
htab->capacity = newCapacity;
return SUCCESS;
}
else {
#if 0
htab->table[index] = hte_create(key,htab->capacity);
#else
htab->table[index] = hte_create(key,data);
#endif
if (htab->table[index] == NULL)
return FAILURE;
return SUCCESS;
}
}
Boolean
htable_get_key_index(hashtable_t *htab,const char *key,unsigned *indexOfKey)
{
unsigned index = htable_hash(htab->capacity,key);
unsigned quadraticNum = 1;
while (index < htab->capacity) {
if (htab->table[index] != NULL) {
if (! strcmp(key,hte_get_key(htab->table[index]))) {
*indexOfKey = index;
return TRUE;
}
}
index += quadraticNum * quadraticNum;
quadraticNum++;
}
*indexOfKey = 0;
return FALSE;
}
Status
htable_get_data_by_key(hashtable_t *htab,int *data)
{
unsigned indexOfKey = 0;
if (htable_get_key_index(htab,key,&indexOfKey)) {
*data = hte_get_data(htab->table[indexOfKey]);
return SUCCESS;
}
*data = 0;
return FAILURE;
}
Status
htable_get_data_by_index(hashtable_t *htab,int *data)
{
if (index >= htab->capacity || htab->table[index] == NULL) {
*data = 0;
return FAILURE;
}
*data = hte_get_data(htab->table[index]);
return SUCCESS;
}
Status
htable_get_key_by_index(hashtable_t *htab,char *key)
{
if (index >= htab->capacity || htab->table[index] == NULL) {
key[0] = '\0';
return FAILURE;
}
//char ch;
size_t keylen = strlen(hte_get_key(htab->table[index]));
for (unsigned i = 0; i < keylen; i++) {
hte_get_character_by_index(htab->table[index],i,&key[i]);
}
key[keylen] = 0;
return SUCCESS;
}
Status
htable_remove_element(hashtable_t *htab,const char *key)
{
unsigned indexOfKey = 0;
if (htable_get_key_index(htab,&indexOfKey)) {
hte_destroy(&htab->table[indexOfKey]);
return SUCCESS;
}
return FAILURE;
}
void
htable_destroy(hashtable_t **phtab)
{
hashtable_t *htab = *phtab;
for (unsigned i = 0; i < htab->capacity; i++)
hte_destroy(&htab->table[i]);
free(htab->table);
free(htab);
*phtab = NULL;
}
void
debug(hashtable_t *htab)
{
int data;
char key[100];
char DNE[4] = "DNE";
for (unsigned i = 0; i < htab->capacity; i++) {
printf("Index: %-10d",i);
Status keyStatus = htable_get_key_by_index(htab,key);
Status dataStatus = htable_get_data_by_index(htab,&data);
if (keyStatus == FAILURE && dataStatus == FAILURE) {
printf("Key: %-10sData: %-10s\n",DNE,DNE);
}
else {
printf("Key: %-10sData: %-10d\n",data);
}
}
}
% hte.c
#include <hte.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
hte_t *
hte_create(const char *key,int data)
{
hte_t *hte = malloc(sizeof(*hte));
//size_t keylen = strlen(key);
if (hte != NULL) {
hte->key = strdup(key);
if (hte->key == NULL) {
free(hte);
return NULL;
}
hte->data = data;
}
return hte;
}
void
hte_update(hte_t *hte,int data)
{
hte->data = data;
}
int
hte_get_data(const hte_t *hte)
{
return hte->data;
}
const char *
hte_get_key(const hte_t *hte)
{
return (const char *) hte->key;
}
Boolean
hte_keys_match(const hte_t *hte1,const hte_t *hte2)
{
if (! strcmp(hte1->key,hte2->key))
return TRUE;
return FALSE;
}
Status
hte_get_character_by_index(hte_t *hte,char *ch)
{
if (index > strlen(hte->key)) {
*ch = '\0';
return FAILURE;
}
*ch = hte->key[index];
return SUCCESS;
}
void
hte_destroy(hte_t **phte)
{
hte_t *hte = *phte;
if (hte != NULL) {
free(hte->key);
free(hte);
*phte = NULL;
}
}
% quadhash.c
#include <stdio.h>
#include <htable.h>
#include <string.h>
#if 0
#include <vld.h>
#endif
int
main(int argc,char **argv)
{
hashtable_t *htab = htable_init_default(5);
char key[3] = "A";
unsigned num = 1;
for (unsigned i = 0; i < 26; i++) {
htable_insert(htab,num);
key[0] = key[0] + 1;
num++;
}
debug(htab);
printf("\n\n\n");
htable_remove_element(htab,"A");
debug(htab);
htable_destroy(&htab);
return 0;
}
% htable.proto
// htable.proto -- prototypes
hashtable_t *
htable_init_default(unsigned initial_capacity);
unsigned
htable_hash(unsigned capacity,const char *key);
unsigned
htable_get_capacity(const hashtable_t *htab);
Status
htable_insert(hashtable_t *htab,int data);
Boolean
htable_get_key_index(hashtable_t *htab,unsigned *indexOfKey);
Status
htable_get_data_by_key(hashtable_t *htab,int *data);
Status
htable_get_data_by_index(hashtable_t *htab,int *data);
Status
htable_get_key_by_index(hashtable_t *htab,char *key);
Status
htable_remove_element(hashtable_t *htab,const char *key);
void
htable_destroy(hashtable_t **phtab);
void
debug(hashtable_t *htab);
% hte.proto
// hte.proto -- prototypes
hte_t *
hte_create(const char *key,int data);
void
hte_update(hte_t *hte,int data);
int
hte_get_data(const hte_t *hte);
const char *
hte_get_key(const hte_t *hte);
Boolean
hte_keys_match(const hte_t *hte1,const hte_t *hte2);
Status
hte_get_character_by_index(hte_t *hte,char *ch);
void
hte_destroy(hte_t **phte);
,
我读过一些关于调整哈希表大小时的内容,我只需要使用新哈希表的大小重新哈希当前哈希表中的所有元素。
是的,这很常见。
我只是想知道除此之外是否还有其他方法可以做到这一点。
是的,还有其他方法。一种方法是保留旧表和新表,然后有一段时间您必须在两者中进行搜索/擦除,但是您可以通过从旧表“迁移”一个元素来分散调整大小的成本(以获得更可预测的操作延迟)每当访问新表时将表移至新表(因为您无论如何都会完成重新哈希键的工作),或者每次在新表中完成插入时迁移旧表元素。
另一个类似的想法是重新分配到更大的存储桶数组而不是重新定位元素 - 然后在进行查找/擦除/插入时,您在修改时检查新旧存储桶计数(以任何顺序),迁移元素根据当前的bucket_count动态添加到他们新的最佳bucket。
另一种方法是将哈希值与表中的 key[/value] 条目一起存储,这样您就可以一次调整键的大小并重新定位它们的新存储桶位置,但不需要重新计算哈希值对于所有键。