【三】哈希表

2019-10-10  本文已影响0人  Michael_abc

前言

哈希表基本上是我见过使用最广泛的数据结构了,不管是PHP,MySQL,Redis,JAVA等,都使用到了哈希表,使用特别广泛。

定义

哈希表是一种通过关键码查找值得数据结构,是一种无顺序的散列数据结构。

例图

image.png

说明

哈希表也有很多种,今天我们用来举例的是PHP的哈希表实现。

PHP的哈希表

在PHP内核中 大量使用的HashTable ,函数列表,类列表,数组,多维数组就是哈希表套着哈希表

image.png

PHP的哈希实现时数组+顺序链表+碰撞链表

哈希扩容

个人哈希表C语言实现

my_hash.h

#ifndef MY_MY_HASH_H
#define MY_MY_HASH_H

#define MY_HASH_MAX_SIZE 0x1000000000

#define MY_HASH_EXPAND_RATIO 0.8
#define MY_HASH_REDUCE_RATIO 0.2
#define MY_HASH_INIT_SIZE 4

typedef struct my_hash_bucket_s my_hash_bucket_t;
typedef struct my_hash_s my_hash_t;

struct my_hash_s {
    unsigned int size;
    unsigned int mask;
    unsigned int ele_num;
    float fill_ratio;
    my_hash_bucket_t *list_head;
    my_hash_bucket_t *list_tail;
    my_hash_bucket_t **table;
};

struct my_hash_bucket_s {
    unsigned long index;
    unsigned int key_len;
    unsigned int index_num;
    char *key;
    void *data;
    my_hash_bucket_t *list_next;
    my_hash_bucket_t *list_prev;
    my_hash_bucket_t *next;
    my_hash_bucket_t *prev;
};

my_hash_t *my_hash_init(void);

my_hash_bucket_t *my_hash_bucket_init(char *, unsigned int, int, void *);

void *my_hash_find(my_hash_t *, char *, int);

int my_hash_insert_or_update(my_hash_t *, char *, unsigned int, void *);

int my_hash_delete(my_hash_t *, char *, int);

int my_hash_expand(my_hash_t *);

int my_hash_reduce(my_hash_t *);

int my_hash_destory(my_hash_t *);

int my_hash_key_index(char *key, int);

void my_hash_foreach(my_hash_t *);

void my_hash_dump(my_hash_t *);

#endif

my_hash.c

#include <my_hash.h>

/**
 * hash表初始化
 * @return
 */
my_hash_t *my_hash_init(void) {
    unsigned int mem_size;
    my_hash_t *hash;
    hash = (my_hash_t *) malloc(sizeof(my_hash_t));
    hash->size = MY_HASH_INIT_SIZE;
    hash->mask = hash->size - 1;
    hash->ele_num = 0;
    hash->fill_ratio = 0.0;
    hash->list_head = NULL;
    hash->list_tail = NULL;
    mem_size = hash->size * sizeof(my_hash_bucket_t);
    hash->table = (my_hash_bucket_t **) malloc(mem_size);
    memset(hash->table, 0, mem_size);
    return hash;
}

/**
 *
 * @param key
 * @param key_len
 * @param index
 * @param data
 * @return
 */
my_hash_bucket_t *my_hash_bucket_init(char *key, unsigned int key_len, int index, void *data) {
    my_hash_bucket_t *new_bucket;
    new_bucket = (my_hash_bucket_t *) malloc(sizeof(my_hash_bucket_t));
    new_bucket->next = new_bucket->prev = NULL;
    new_bucket->list_next = new_bucket->list_prev = NULL;
    new_bucket->index_num = 0;
    new_bucket->data = data;
    new_bucket->index = index;
    new_bucket->key = key;
    new_bucket->key_len = key_len;
    return new_bucket;
}

/**
 * hash查找
 * @param hash
 * @param key
 * @return
 */
void *my_hash_find(my_hash_t *hash, char *key, int key_len) {
    int index = my_hash_key_index(key, key_len) % hash->mask;
    my_hash_bucket_t *bucket;
    bucket = hash->table[index];

    while (bucket) {
        if (bucket->key == key) {
            bucket->index_num++;
            return bucket->data;
        }
        bucket = bucket->next;
    }
    return NULL;
}

/**
 *
 * @param hash
 * @param key
 * @param key_len
 * @param data
 * @return
 */
int my_hash_insert_or_update(my_hash_t *hash, char *key, unsigned int key_len, void *data) {
    int ret;
    //查找更新
    void *old_data;
    int hash_index = my_hash_key_index(key, key_len);
    int index = hash_index % hash->mask;
    my_hash_bucket_t *bucket, *new_bucket;
    bucket = hash->table[index];
    while (bucket) {
        if (strcmp(bucket->key, key) == 0) {
            old_data = bucket->data;
            bucket->data = data;
            free(old_data);
            return RET_SUCCESS;
        }
        bucket = bucket->next;
    }
    // 插入
    new_bucket = my_hash_bucket_init(key, key_len, index, data);
    if (hash->table[index]) {
        hash->table[index]->prev = new_bucket;
        new_bucket->next = hash->table[index];
    }
    hash->table[index] = new_bucket;
    hash->ele_num++;
    hash->fill_ratio = (float) hash->ele_num / hash->size;

    if (hash->list_tail) {
        new_bucket->list_prev = hash->list_tail;
        hash->list_tail->list_next = new_bucket;
    }
    hash->list_tail = new_bucket;
    if (hash->ele_num == 1) {
        hash->list_head = new_bucket;
    }
    //判断容积率
    if (hash->fill_ratio > MY_HASH_EXPAND_RATIO) {
        ret = my_hash_expand(hash);
        if (ret != RET_SUCCESS) {
            return ret;
        }
    }
    return RET_SUCCESS;
}

/**
 * 哈希删除
 * @param hash
 * @param key
 * @param key_len
 * @return
 */
int my_hash_delete(my_hash_t *hash, char *key, int key_len) {
    int opt_ret = RET_FAIL;
    int index = my_hash_key_index(key, key_len) % hash->mask;
    my_hash_bucket_t *bucket, *prev_bucket;
    prev_bucket = NULL;
    bucket = hash->table[index];
    while (bucket) {
        //find it
        if (strcmp(bucket->key, key) == 0) {
            if (bucket->list_prev && bucket->list_next) {
                //中间
                bucket->list_prev->list_next = bucket->list_next;
                bucket->list_next->list_prev = bucket->list_prev;
            } else if (bucket->list_prev && !bucket->list_next) {
                //尾
                bucket->list_prev->list_next = NULL;
                hash->list_tail = bucket->list_prev;
            } else if (!bucket->list_prev && bucket->list_next) {
                //头
                bucket->list_next->list_prev = NULL;
                hash->list_head = bucket->list_next;
            }
            if (bucket->prev && bucket->next) {
                //中间
                bucket->prev->next = bucket->next;
                bucket->next->prev = bucket->prev;
            } else if (bucket->prev && !bucket->next) {
                //尾
                bucket->prev->next = NULL;
            } else if (!bucket->prev && bucket->next) {
                //头
                bucket->next->prev = NULL;
                hash->table[index] = bucket->next;
            } else {
                hash->table[index] = NULL;
            }
            free(bucket);
            bucket = NULL;
            opt_ret = RET_SUCCESS;
            hash->ele_num--;
            hash->fill_ratio = (float) hash->ele_num / hash->size;
            break;
        }
        prev_bucket = bucket;
        bucket = bucket->next;
    }
    if (opt_ret == RET_SUCCESS && hash->fill_ratio <= MY_HASH_REDUCE_RATIO) {
        opt_ret = my_hash_reduce(hash);
        if (opt_ret != RET_SUCCESS) {
            return opt_ret;
        }
    }
    return opt_ret;
}

/**
 * hash扩张
 * @param hash
 * @return
 */
int my_hash_expand(my_hash_t *hash) {
    hash->size = hash->size << 1;
    if (hash->size > MY_HASH_MAX_SIZE) {
        return RET_FAIL;
    }
    // define
    int index;
    unsigned int mem_size;
    my_hash_bucket_t *bucket, *tmp_bucket;
    my_hash_bucket_t **new_table;
    hash->mask = hash->size - 1;
    //
    mem_size = sizeof(my_hash_bucket_t) * hash->size;
    new_table = (my_hash_bucket_t **) malloc(mem_size);
    memset(new_table, 0, mem_size);
    free(hash->table);
    hash->table = new_table;
    hash->fill_ratio = hash->ele_num / hash->size;
    bucket = hash->list_head;
    //
    while (bucket) {
        bucket->next = bucket->prev = NULL;
        index = my_hash_key_index(bucket->key, bucket->key_len) % hash->mask;
        bucket->index = index;
        if (hash->table[index]) {
            hash->table[index]->prev = bucket;
            bucket->next = hash->table[index];
        }
        hash->table[index] = bucket;
        bucket = bucket->list_next;
    }
    return RET_SUCCESS;
}

/**
 *
 * @param hash
 * @return
 */
int my_hash_reduce(my_hash_t *hash) {
    hash->size = hash->size >> 1;
    hash->size = hash->size < MY_HASH_INIT_SIZE ? MY_HASH_INIT_SIZE : hash->size;
    // define
    int index;
    unsigned int mem_size;
    my_hash_bucket_t *bucket, *tmp_bucket;
    my_hash_bucket_t **new_table;
    hash->mask = hash->size - 1;
    //
    mem_size = sizeof(my_hash_bucket_t) * hash->size;
    new_table = (my_hash_bucket_t **) malloc(mem_size);
    memset(new_table, 0, mem_size);
    free(hash->table);
    hash->table = new_table;
    hash->fill_ratio = hash->ele_num / hash->size;
    bucket = hash->list_head;
    //
    while (bucket) {
        bucket->next = bucket->prev = NULL;
        index = my_hash_key_index(bucket->key, bucket->key_len) % hash->mask;
        bucket->index = index;
        if (hash->table[index]) {
            hash->table[index]->prev = bucket;
            bucket->next = hash->table[index];
        }
        hash->table[index] = bucket;
        bucket = bucket->list_next;
    }
    return RET_SUCCESS;
}

/**
 * hash字符串index
 * @param key
 * @return
 */
int my_hash_key_index(char *key, int key_len) {
    unsigned int hash = 1315423911;
    unsigned int i = 0;
    for (i = 0; i < key_len; key++, i++) {
        hash ^= ((hash << 5) + (*key) + (hash >> 2));
    }
    return hash;
}
/**
 *
 * @param hash
 * @return
 */
int my_hash_destory(my_hash_t *hash) {
    my_hash_bucket_t *bucket, *while_bucket;
    while_bucket = hash->list_head;
    while (while_bucket) {
        bucket = while_bucket;
        free(bucket);
        while_bucket = while_bucket->list_next;
    }
    free(hash);
    return RET_SUCCESS;
}

/**
 *
 * @param hash
 */
void my_hash_foreach(my_hash_t *hash) {
    my_hash_bucket_t *bucket;
    bucket = hash->list_head;
    while (bucket) {
        printf("[key:%s(%d)]->%d\n", bucket->key, bucket->index, *(int *)bucket->data);
        bucket = bucket->list_next;
    }
}

/**
 *
 * @param hash
 */
void my_hash_dump(my_hash_t *hash) {
    my_hash_bucket_t *bucket;
    int i;
    for (i = 1; i <= hash->size; i++) {
        bucket = hash->table[i];
        printf("node:%03d-->", i);
        while (bucket) {
            printf("[%s(%d)]=>%d->", bucket->key, bucket->index, *(int *)bucket->data);
            bucket = bucket->next;
        }
        printf("\n");
    }
}
上一篇下一篇

猜你喜欢

热点阅读