19-23年 学习笔记

【 数据结构 & 算法 】—— 哈希表、字符串

2021-02-23  本文已影响0人  Du1in9

思维导图

预备知识1:哈希表定义

哈希表(Hash table,也叫散列表)是根据关键字值(key)直接进行访问的数据结构,以加快查找关键字值的速度。这个映射函数叫做哈希函数,存放记录的数组叫做哈希表。


预备知识2:最简单的哈希 — 字符哈希

#include <stdio.h>
#include <string>

int main(){
    int char_map[128] = {0};
    std::string str = "abcdefgaaxxy";
    
    //根据映射,统计各个字符出现的次数 
    for (int i = 0; i < str.length(); i++){
        char_map[str[i]]++;
        //例: char_map['a']++ = char_map[97]++
    }
    
    for (int i = 0; i < 128; i++){
        if (char_map[i] > 0){
            printf("[%c][%d] : %d\n", i, i, char_map[i]);
        }
    }
    return 0;
}

预备知识3:哈希表排序整数

#include <stdio.h>

int main(){
    int random[10] = {999, 1, 444, 7, 20, 9, 1, 3, 7, 7};
    int hash_map[1000] = {0};
    
    //根据映射,统计各个整数出现的次数 
    for (int i = 0; i < 10; i++){
        hash_map[random[i]]++;
    }   
    for (int i = 0; i < 1000; i++){ //按顺序输出 
        for (int j = 0; j < hash_map[i]; j++){
            printf("%d\n", i);
        }
    }   
    return 0;
}
问题引入1,任意元素的映射

1、当遇到负数或非常大的整数,如何进行哈希映射?
2、当遇到字符串,如何进行哈希映射?
3、当遇到无法直接映射的数据类型,如何进行哈希映射?
利用哈希函数,将关键字值转换为整数再对表长取余,转换为表长范围内的整数。

问题引入2,发生冲突
#include <stdio.h>
#include <string>

//直接对表长取余
int int_func(int key, int table_len){    
    return key % table_len;
}

//将字符串的字符ASCII码相加,再对表长取余
int string_func(std::string key, int table_len){
    int sum = 0;
    for (int i = 0; i < key.length(); i++){
        sum += key[i];
    }
    return sum % table_len;
}

int main(){
    const int TABLE_LEN = 10;
    int hash_map[TABLE_LEN] = {0};  
    //冲突1: 99999995%10 = 5
    hash_map[int_func(99999995, TABLE_LEN)]++;
    hash_map[int_func(5, TABLE_LEN)]++;
    //冲突2:('a'+'b'+'c')%10 =  ('b'+'a'+'c')%10
    hash_map[string_func("abc", TABLE_LEN)]++;
    hash_map[string_func("bac", TABLE_LEN)]++;  
    for (int i = 0; i < TABLE_LEN; i++){
        printf("hash_map[%d] = %d\n", i, hash_map[i]);
    }
    return 0;
}
#include <stdio.h>
#include <vector>

struct ListNode {
    int val;
    ListNode *next;
    ListNode(int x) : val(x), next(NULL) {}
};

int hash_func(int key, int table_len){
    return key % table_len;
}

void insert(ListNode *hash_table[], ListNode *node, int table_len){
    int hash_key = hash_func(node->val, table_len);
    node->next = hash_table[hash_key];
    hash_table[hash_key] = node;
}

bool search(ListNode *hash_table[], int value, int table_len){
    int hash_key = hash_func(value, table_len);
    ListNode *head = hash_table[hash_key];
    while(head){
        if (head->val == value){
            return true;
        }
        head = head->next;
    }
    return false;
}

int main(){
    const int TABLE_LEN = 11;
    ListNode *hash_table[TABLE_LEN] = {0};
    std::vector<ListNode *> hash_node_vec;
    int test[8] = {1, 1, 4, 9, 20, 30, 150, 500};
    for (int i = 0; i < 8; i++){
        hash_node_vec.push_back(new ListNode(test[i]));
    }   
    for (int i = 0; i < hash_node_vec.size(); i++){
        insert(hash_table, hash_node_vec[i], TABLE_LEN);
    }   
    printf("Hash table:\n");
    for (int i = 0; i < TABLE_LEN; i++){
        printf("[%d]:", i);
        ListNode *head = hash_table[i];
        while(head){
            printf("->%d", head->val);
            head = head->next;
        }
        printf("\n");
    }
    printf("\n");   
    printf("Test search:\n");
    for (int i = 0; i < 10; i++){
        if (search(hash_table, i, TABLE_LEN)){
            printf("%d is in the hash table.\n");
        }
        else{
            printf("%d is not in the hash table.\n");
        }
    }
    return 0;
}

预备知识4:哈希map与STL map

#include <stdio.h>
#include <map>
#include <string>

struct ListNode {
    std::string key;
    int val;
    ListNode *next;
    ListNode(int x) : val(x), next(NULL) {}
};

int main(){
    std::map<std::string, int> hash_map;    
    std::string str1 = "abc";
    std::string str2 = "aaa";
    std::string str3 = "xxxxx"; 
    hash_map[str1] = 1;
    hash_map[str2] = 2;
    hash_map[str3] = 100;   
    if (hash_map.find(str1) != hash_map.end()){
        printf("%s is in hash_map, value is %d\n",
            str1.c_str(), hash_map[str1]);
    }   
    std::map<std::string, int> ::iterator it;
    for (it = hash_map.begin(); it != hash_map.end(); it++){
        printf("hash_map[%s] = %d\n", it->first.c_str(), it->second);
    }   
    return 0;
}

例1:最长回文串(easy)(字符哈希)

已知一个大小写字符组成的字符串,求用该字符串中的字符可以生成的最长回文字符串长度。例如 s="abcccdda",可生成的最长回文字符串长度为9,如dccaaccd、adccbccda,acdcacdca等,都是正确的。

#include <stdio.h>
#include <string>

class Solution {
public:
    int longestPalindrome(std::string s) {
        int char_map[128] = {0};
        int max_length = 0;
        int flag = 0;
        for (int i = 0; i < s.length(); i++){
            char_map[s[i]]++;
        }
        for (int i = 0; i < 128; i++){
            if (char_map[i] % 2 == 0){
                max_length += char_map[i];
            }
            else{
                max_length += char_map[i] - 1;
                flag = 1;
            }
        }
        return max_length + flag;
    }
};

int main(){
    std::string s = "abccccddaa";
    Solution solve;
    printf("%d\n", solve.longestPalindrome(s));
    return 0;
}

例2:词语模式(easy)(字串符哈希)

已知字符串pattern与字符串str,确认str是否与pattern匹配。
str与pattern匹配代表字符串str中的单词与pattern中的字符一一对应。
例如,pattern ="abba",str ="dog cat cat dog"匹配
pattern ="abba",str="dog cat cat fish"不匹配
pattern="aaaa",str ="dog cat cat dog"不匹配
pattern ="abba",str ="dog dog dog dog"不匹配


#include <stdio.h>

#include <string>
#include <map>
class Solution {
public:
    bool wordPattern(std::string pattern, std::string str) {
        std::map<std::string, char> word_map;
        char used[128] = {0};
        std::string word;
        int pos = 0;
        str.push_back(' ');
        for (int i = 0; i < str.length(); i++){
            if (str[i] == ' '){
                if (pos == pattern.length()){
                    return false;
                }
                if (word_map.find(word) == word_map.end()){
                    if (used[pattern[pos]]){
                        return false;
                    }
                    word_map[word] = pattern[pos];
                    used[pattern[pos]] = 1;
                }
                else{
                    if (word_map[word] != pattern[pos]){
                        return false;
                    }
                }
                word = "";
                pos++;
            }
            else{
                word += str[i];
            }
        }
        if (pos != pattern.length()){
            return false;
        }
        return true;
    }
};

int main(){
    std::string pattern = "abba";
    std::string str = "dog cat cat dog";
    Solution solve;
    printf("%d\n", solve.wordPattern(pattern, str));
    return 0;
}

例3:同字符词语分组(medium)(数组哈希)

已知一组字符串,将所有anagram(由颠倒字母顺序而构成的字)分组输出。
例如:["eat","tea","tan","ate","nat","bat"]
返回:[["ate","eat","tea"],["nat","tan"],["bat"]]

方法 1 算法思路

#include <stdio.h>

#include <vector>
#include <string>
#include <map>
#include <algorithm>

class Solution {
public:
    std::vector<std::vector<std::string> > groupAnagrams(
            std::vector<std::string>& strs) {
        std::map<std::string, std::vector<std::string> > anagram;
        std::vector<std::vector<std::string> > result;      
        for (int i = 0; i < strs.size(); i++){
            std::string str = strs[i];
            std::sort(str.begin(), str.end());
            if (anagram.find(str) == anagram.end()){
                std::vector<std::string> item;
                anagram[str] = item;
            }
            anagram[str].push_back(strs[i]);
        }
        std::map<std::string, std::vector<std::string> > ::iterator it;
        for (it = anagram.begin(); it != anagram.end(); it++){
            result.push_back((*it).second);
        }
        return result;
    }
};

int main(){
    std::vector<std::string> strs;
    strs.push_back("eat");
    strs.push_back("tea");
    strs.push_back("tan");
    strs.push_back("ate");
    strs.push_back("nat");
    strs.push_back("bat");
    Solution solve;
    std::vector<std::vector<std::string> > result 
        = solve.groupAnagrams(strs);
    for (int i = 0; i < result.size(); i++){
        for (int j = 0; j < result[i].size(); j++){
            printf("[%s]", result[i][j].c_str());
        }
        printf("\n");
    }   
    return 0;
}

方法 2 算法思路

#include <stdio.h>

#include <vector>
#include <string>
#include <map>

void change_to_vector(std::string &str, std::vector<int> &vec){
    for (int i = 0; i < 26; i++){
        vec.push_back(0);
    }
    for (int i = 0; i < str.length(); i++){
        vec[str[i]-'a']++;
    }
}

class Solution {
public:
    std::vector<std::vector<std::string> > groupAnagrams(
            std::vector<std::string>& strs) {
        std::map<std::vector<int>, std::vector<std::string> > anagram;
        std::vector<std::vector<std::string> > result;      
        for (int i = 0; i < strs.size(); i++){
            std::vector<int> vec;
            change_to_vector(strs[i], vec);
            if (anagram.find(vec) == anagram.end()){
                std::vector<std::string> item;
                anagram[vec] = item;
            }
            anagram[vec].push_back(strs[i]);
        }
        std::map<std::vector<int>,
            std::vector<std::string> > ::iterator it;
        for (it = anagram.begin(); it != anagram.end(); it++){
            result.push_back((*it).second);
        }
        return result;
    }
};

int main(){
    std::vector<std::string> strs;
    strs.push_back("eat");
    strs.push_back("tea");
    strs.push_back("tan");
    strs.push_back("ate");
    strs.push_back("nat");
    strs.push_back("bat");
    Solution solve;
    std::vector<std::vector<std::string> > result = solve.groupAnagrams(strs);
    for (int i = 0; i < result.size(); i++){
        for (int j = 0; j < result[i].size(); j++){
            printf("[%s]", result[i][j].c_str());
        }
        printf("\n");
    }   
    return 0;
}

例4:无重复字符的最长子串(medium)(字符哈希)

已知一个字符串,求用该字符串的无重复字符的最长子串及长度。
例如:s ="abcabcbb" -> "abc",3
s="bbbbb" -> "b",1
s="pwwkew" -> "wke",3


#include <stdio.h>

#include <string>
class Solution {
public:
    int lengthOfLongestSubstring(std::string s) {
        int begin = 0;
        int result = 0;
        std::string word = "";
        int char_map[128] = {0};
        for (int i = 0; i < s.length(); i++){
            char_map[s[i]]++;
            if (char_map[s[i]] == 1){
                word += s[i];
                if (result < word.length()){
                    result = word.length();
                }
            }
            else{
                while(begin < i && char_map[s[i]] > 1){
                    char_map[s[begin]]--;
                    begin++;
                }
                word = "";
                for (int j = begin; j <= i; j++){
                    word += s[j];
                }
            }
        }
        return result;
    }
};

int main(){
    std::string s = "abcbadab";
    Solution solve;
    printf("%d\n", solve.lengthOfLongestSubstring(s));  
    return 0;
}

例5:重复的DNA序列(medium)(字串符哈希)

将DNA序列看作是只包含 A,C,G,T 四个字符的字符串,给一个DNA字符串找到所有长度为10的且出现超过1次的子串。
例如:s="AAAAAAAAAAA"
Return:["AAAAAAAAAA"]
s="ААААAСССССАААААССССССАAAAAGGGTТТ"
Return:["AAAAACCCCC","СССССAAAAA"]

方法 1 算法思路

#include <stdio.h>

#include <vector>
#include <string>
#include <map>

class Solution {
public:
    std::vector<std::string> findRepeatedDnaSequences(std::string s) {
        std::map<std::string, int> word_map;
        std::vector<std::string> result;
        for (int i = 0; i < s.length(); i++){
            std::string word = s.substr(i, 10);
            if (word_map.find(word) != word_map.end()){
                word_map[word] += 1;
            }
            else{
                word_map[word] = 1;
            }
        }
        std::map<std::string, int> ::iterator it;
        for (it = word_map.begin(); it != word_map.end(); it++){
            if (it->second > 1){
                result.push_back(it->first);
            }
        }
        return result;
    }
};

int main(){
    std::string s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT";
    Solution solve;
    std::vector<std::string> result = solve.findRepeatedDnaSequences(s);
    for (int i = 0; i < result.size(); i++){
        printf("%s\n", result[i].c_str());
    }
    return 0;
}

方法 2 算法思路

#include <stdio.h>

#include <vector>
#include <string>

int g_hash_map[1048576] = {0};

std::string change_int_to_DNA(int DNA){
    static const char DNA_CHAR[] = {'A', 'C', 'G', 'T'};
    std::string str;                
    for (int i = 0; i < 10; i++){
        str += DNA_CHAR[DNA & 3];
        DNA = DNA >> 2;
    }
    return str;
}

class Solution {
public:
    std::vector<std::string> findRepeatedDnaSequences(std::string s) {
        std::vector<std::string> result;
        if (s.length() < 10){
            return result;
        }
        for (int i = 0; i < 1048576; i++){
            g_hash_map[i] = 0;
        }       
        int char_map[128] = {0};
        char_map['A'] = 0;
        char_map['C'] = 1;
        char_map['G'] = 2;
        char_map['T'] = 3;      
        int key = 0;
        for (int i = 9; i >= 0; i--){
            key = (key << 2) + char_map[s[i]];
        }
        g_hash_map[key] = 1;
        for (int i = 10; i < s.length(); i++){
            key = key >> 2;
            key = key | (char_map[s[i]] << 18);
            g_hash_map[key]++;
        }
        for (int i = 0; i < 1048576; i++){
            if (g_hash_map[i] > 1){
                result.push_back(change_int_to_DNA(i));
            }
        }
        return result;
    }
};

int main(){
    std::string s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT";
    Solution solve;
    std::vector<std::string> result = solve.findRepeatedDnaSequences(s);
    for (int i = 0; i < result.size(); i++){
        printf("%s\n", result[i].c_str());
    }
    return 0;
}

例6:最小窗口子串(hard)(哈希维护窗口)

已知字符串S与T,求在S中的最小窗口(区间),使得区间中包含了T中的所有字符。
例如:S="ADOBECODEBANC",T="ABC"。
包含T的子区间中,有"ADOBEC","CODEBA","BANC"等等;最小窗口是"BANC"。


#include <stdio.h>
#include <string>
#include <vector>

class Solution {
private:
    bool is_window_ok(int map_s[], int map_t[], std::vector<int> &vec_t){
        for (int i = 0; i < vec_t.size(); i++){
            if (map_s[vec_t[i]] < map_t[vec_t[i]]){
                return false;
            }
        }
        return true;
    }
public:
    std::string minWindow(std::string s, std::string t) {
        const int MAX_ARRAY_LEN = 128;
        int map_t[MAX_ARRAY_LEN] = {0};
        int map_s[MAX_ARRAY_LEN] = {0};
        std::vector<int> vec_t;
        for (int i = 0; i < t.length(); i++){
            map_t[t[i]]++;
        }
        for (int i = 0; i < MAX_ARRAY_LEN; i++){
            if (map_t[i] > 0){
                vec_t.push_back(i);
            }
        }
        
        int window_begin = 0;
        std::string result;
        for (int i = 0; i < s.length(); i++){
            map_s[s[i]]++;
            while(window_begin < i){
                char begin_ch = s[window_begin];
                if (map_t[begin_ch] == 0){
                    window_begin++;
                }
                else if (map_s[begin_ch] > map_t[begin_ch]){
                    map_s[begin_ch]--;
                    window_begin++;
                }
                else{
                    break;
                }
            }
            if (is_window_ok(map_s, map_t, vec_t)){
                int new_window_len = i - window_begin + 1;
                if (result == "" || result.length() > new_window_len){
                    result = s.substr(window_begin, new_window_len);
                }
            }
        }
        return result;
    }
};

int main(){
    
    Solution solve;
    std::string result = solve.minWindow("ADOBECODEBANC", "ABC");
    printf("%s\n", result.c_str());
    result = solve.minWindow("MADOBCCABEC", "ABCC");
    printf("%s\n", result.c_str());
    result = solve.minWindow("aa", "aa");
    printf("%s\n", result.c_str());
    
    return 0;   
}
上一篇 下一篇

猜你喜欢

热点阅读