【 数据结构 & 算法 】—— 哈希表、字符串
思维导图
预备知识1:哈希表定义
哈希表(Hash table,也叫散列表)是根据关键字值(key)直接进行访问的数据结构,以加快查找关键字值的速度。这个映射函数叫做哈希函数,存放记录的数组叫做哈希表。
![]()
预备知识2:最简单的哈希 — 字符哈希
- 字符哈希
#include <stdio.h>
#include <string>
int main(){
int char_map[128] = {0};
std::string str = "abcdefgaaxxy";
//根据映射,统计各个字符出现的次数
for (int i = 0; i < str.length(); i++){
char_map[str[i]]++;
//例: char_map['a']++ = char_map[97]++
}
for (int i = 0; i < 128; i++){
if (char_map[i] > 0){
printf("[%c][%d] : %d\n", i, i, char_map[i]);
}
}
return 0;
}
预备知识3:哈希表排序整数
- 哈希表排序整数
#include <stdio.h>
int main(){
int random[10] = {999, 1, 444, 7, 20, 9, 1, 3, 7, 7};
int hash_map[1000] = {0};
//根据映射,统计各个整数出现的次数
for (int i = 0; i < 10; i++){
hash_map[random[i]]++;
}
for (int i = 0; i < 1000; i++){ //按顺序输出
for (int j = 0; j < hash_map[i]; j++){
printf("%d\n", i);
}
}
return 0;
}
问题引入1,任意元素的映射
1、当遇到负数或非常大的整数,如何进行哈希映射?
2、当遇到字符串,如何进行哈希映射?
3、当遇到无法直接映射的数据类型,如何进行哈希映射?
利用哈希函数,将关键字值转换为整数再对表长取余,转换为表长范围内的整数。
问题引入2,发生冲突
#include <stdio.h>
#include <string>
//直接对表长取余
int int_func(int key, int table_len){
return key % table_len;
}
//将字符串的字符ASCII码相加,再对表长取余
int string_func(std::string key, int table_len){
int sum = 0;
for (int i = 0; i < key.length(); i++){
sum += key[i];
}
return sum % table_len;
}
int main(){
const int TABLE_LEN = 10;
int hash_map[TABLE_LEN] = {0};
//冲突1: 99999995%10 = 5
hash_map[int_func(99999995, TABLE_LEN)]++;
hash_map[int_func(5, TABLE_LEN)]++;
//冲突2:('a'+'b'+'c')%10 = ('b'+'a'+'c')%10
hash_map[string_func("abc", TABLE_LEN)]++;
hash_map[string_func("bac", TABLE_LEN)]++;
for (int i = 0; i < TABLE_LEN; i++){
printf("hash_map[%d] = %d\n", i, hash_map[i]);
}
return 0;
}
- 拉链法解决冲突,构造哈希表
#include <stdio.h>
#include <vector>
struct ListNode {
int val;
ListNode *next;
ListNode(int x) : val(x), next(NULL) {}
};
int hash_func(int key, int table_len){
return key % table_len;
}
void insert(ListNode *hash_table[], ListNode *node, int table_len){
int hash_key = hash_func(node->val, table_len);
node->next = hash_table[hash_key];
hash_table[hash_key] = node;
}
bool search(ListNode *hash_table[], int value, int table_len){
int hash_key = hash_func(value, table_len);
ListNode *head = hash_table[hash_key];
while(head){
if (head->val == value){
return true;
}
head = head->next;
}
return false;
}
int main(){
const int TABLE_LEN = 11;
ListNode *hash_table[TABLE_LEN] = {0};
std::vector<ListNode *> hash_node_vec;
int test[8] = {1, 1, 4, 9, 20, 30, 150, 500};
for (int i = 0; i < 8; i++){
hash_node_vec.push_back(new ListNode(test[i]));
}
for (int i = 0; i < hash_node_vec.size(); i++){
insert(hash_table, hash_node_vec[i], TABLE_LEN);
}
printf("Hash table:\n");
for (int i = 0; i < TABLE_LEN; i++){
printf("[%d]:", i);
ListNode *head = hash_table[i];
while(head){
printf("->%d", head->val);
head = head->next;
}
printf("\n");
}
printf("\n");
printf("Test search:\n");
for (int i = 0; i < 10; i++){
if (search(hash_table, i, TABLE_LEN)){
printf("%d is in the hash table.\n");
}
else{
printf("%d is not in the hash table.\n");
}
}
return 0;
}
预备知识4:哈希map与STL map
#include <stdio.h>
#include <map>
#include <string>
struct ListNode {
std::string key;
int val;
ListNode *next;
ListNode(int x) : val(x), next(NULL) {}
};
int main(){
std::map<std::string, int> hash_map;
std::string str1 = "abc";
std::string str2 = "aaa";
std::string str3 = "xxxxx";
hash_map[str1] = 1;
hash_map[str2] = 2;
hash_map[str3] = 100;
if (hash_map.find(str1) != hash_map.end()){
printf("%s is in hash_map, value is %d\n",
str1.c_str(), hash_map[str1]);
}
std::map<std::string, int> ::iterator it;
for (it = hash_map.begin(); it != hash_map.end(); it++){
printf("hash_map[%s] = %d\n", it->first.c_str(), it->second);
}
return 0;
}
例1:最长回文串(easy)(字符哈希)
已知一个大小写字符组成的字符串,求用该字符串中的字符可以生成的最长回文字符串长度。例如 s="abcccdda",可生成的最长回文字符串长度为9,如dccaaccd、adccbccda,acdcacdca等,都是正确的。
- LeetCode 409.Longest Palindrome
#include <stdio.h>
#include <string>
class Solution {
public:
int longestPalindrome(std::string s) {
int char_map[128] = {0};
int max_length = 0;
int flag = 0;
for (int i = 0; i < s.length(); i++){
char_map[s[i]]++;
}
for (int i = 0; i < 128; i++){
if (char_map[i] % 2 == 0){
max_length += char_map[i];
}
else{
max_length += char_map[i] - 1;
flag = 1;
}
}
return max_length + flag;
}
};
int main(){
std::string s = "abccccddaa";
Solution solve;
printf("%d\n", solve.longestPalindrome(s));
return 0;
}
例2:词语模式(easy)(字串符哈希)
已知字符串pattern与字符串str,确认str是否与pattern匹配。
str与pattern匹配代表字符串str中的单词与pattern中的字符一一对应。
例如,pattern ="abba",str ="dog cat cat dog"匹配
pattern ="abba",str="dog cat cat fish"不匹配
pattern="aaaa",str ="dog cat cat dog"不匹配
pattern ="abba",str ="dog dog dog dog"不匹配
![]()
![]()
- LeetCode 290.Word Pattern
#include <stdio.h>
#include <string>
#include <map>
class Solution {
public:
bool wordPattern(std::string pattern, std::string str) {
std::map<std::string, char> word_map;
char used[128] = {0};
std::string word;
int pos = 0;
str.push_back(' ');
for (int i = 0; i < str.length(); i++){
if (str[i] == ' '){
if (pos == pattern.length()){
return false;
}
if (word_map.find(word) == word_map.end()){
if (used[pattern[pos]]){
return false;
}
word_map[word] = pattern[pos];
used[pattern[pos]] = 1;
}
else{
if (word_map[word] != pattern[pos]){
return false;
}
}
word = "";
pos++;
}
else{
word += str[i];
}
}
if (pos != pattern.length()){
return false;
}
return true;
}
};
int main(){
std::string pattern = "abba";
std::string str = "dog cat cat dog";
Solution solve;
printf("%d\n", solve.wordPattern(pattern, str));
return 0;
}
例3:同字符词语分组(medium)(数组哈希)
已知一组字符串,将所有anagram(由颠倒字母顺序而构成的字)分组输出。
例如:["eat","tea","tan","ate","nat","bat"]
返回:[["ate","eat","tea"],["nat","tan"],["bat"]]
方法 1 算法思路
- LeetCode 49.Group Anagrams(solve1)
#include <stdio.h>
#include <vector>
#include <string>
#include <map>
#include <algorithm>
class Solution {
public:
std::vector<std::vector<std::string> > groupAnagrams(
std::vector<std::string>& strs) {
std::map<std::string, std::vector<std::string> > anagram;
std::vector<std::vector<std::string> > result;
for (int i = 0; i < strs.size(); i++){
std::string str = strs[i];
std::sort(str.begin(), str.end());
if (anagram.find(str) == anagram.end()){
std::vector<std::string> item;
anagram[str] = item;
}
anagram[str].push_back(strs[i]);
}
std::map<std::string, std::vector<std::string> > ::iterator it;
for (it = anagram.begin(); it != anagram.end(); it++){
result.push_back((*it).second);
}
return result;
}
};
int main(){
std::vector<std::string> strs;
strs.push_back("eat");
strs.push_back("tea");
strs.push_back("tan");
strs.push_back("ate");
strs.push_back("nat");
strs.push_back("bat");
Solution solve;
std::vector<std::vector<std::string> > result
= solve.groupAnagrams(strs);
for (int i = 0; i < result.size(); i++){
for (int j = 0; j < result[i].size(); j++){
printf("[%s]", result[i][j].c_str());
}
printf("\n");
}
return 0;
}
方法 2 算法思路
- LeetCode 49.Group Anagrams(solve2)
#include <stdio.h>
#include <vector>
#include <string>
#include <map>
void change_to_vector(std::string &str, std::vector<int> &vec){
for (int i = 0; i < 26; i++){
vec.push_back(0);
}
for (int i = 0; i < str.length(); i++){
vec[str[i]-'a']++;
}
}
class Solution {
public:
std::vector<std::vector<std::string> > groupAnagrams(
std::vector<std::string>& strs) {
std::map<std::vector<int>, std::vector<std::string> > anagram;
std::vector<std::vector<std::string> > result;
for (int i = 0; i < strs.size(); i++){
std::vector<int> vec;
change_to_vector(strs[i], vec);
if (anagram.find(vec) == anagram.end()){
std::vector<std::string> item;
anagram[vec] = item;
}
anagram[vec].push_back(strs[i]);
}
std::map<std::vector<int>,
std::vector<std::string> > ::iterator it;
for (it = anagram.begin(); it != anagram.end(); it++){
result.push_back((*it).second);
}
return result;
}
};
int main(){
std::vector<std::string> strs;
strs.push_back("eat");
strs.push_back("tea");
strs.push_back("tan");
strs.push_back("ate");
strs.push_back("nat");
strs.push_back("bat");
Solution solve;
std::vector<std::vector<std::string> > result = solve.groupAnagrams(strs);
for (int i = 0; i < result.size(); i++){
for (int j = 0; j < result[i].size(); j++){
printf("[%s]", result[i][j].c_str());
}
printf("\n");
}
return 0;
}
例4:无重复字符的最长子串(medium)(字符哈希)
已知一个字符串,求用该字符串的无重复字符的最长子串及长度。
例如:s ="abcabcbb" -> "abc",3
s="bbbbb" -> "b",1
s="pwwkew" -> "wke",3
![]()
- LeetCode 3.Longest Substring Without Repeating Characters
#include <stdio.h>
#include <string>
class Solution {
public:
int lengthOfLongestSubstring(std::string s) {
int begin = 0;
int result = 0;
std::string word = "";
int char_map[128] = {0};
for (int i = 0; i < s.length(); i++){
char_map[s[i]]++;
if (char_map[s[i]] == 1){
word += s[i];
if (result < word.length()){
result = word.length();
}
}
else{
while(begin < i && char_map[s[i]] > 1){
char_map[s[begin]]--;
begin++;
}
word = "";
for (int j = begin; j <= i; j++){
word += s[j];
}
}
}
return result;
}
};
int main(){
std::string s = "abcbadab";
Solution solve;
printf("%d\n", solve.lengthOfLongestSubstring(s));
return 0;
}
例5:重复的DNA序列(medium)(字串符哈希)
将DNA序列看作是只包含 A,C,G,T 四个字符的字符串,给一个DNA字符串找到所有长度为10的且出现超过1次的子串。
例如:s="AAAAAAAAAAA"
Return:["AAAAAAAAAA"]
s="ААААAСССССАААААССССССАAAAAGGGTТТ"
Return:["AAAAACCCCC","СССССAAAAA"]
方法 1 算法思路
- LeetCode 187.Repeated DNA Sequences(solve1)
#include <stdio.h>
#include <vector>
#include <string>
#include <map>
class Solution {
public:
std::vector<std::string> findRepeatedDnaSequences(std::string s) {
std::map<std::string, int> word_map;
std::vector<std::string> result;
for (int i = 0; i < s.length(); i++){
std::string word = s.substr(i, 10);
if (word_map.find(word) != word_map.end()){
word_map[word] += 1;
}
else{
word_map[word] = 1;
}
}
std::map<std::string, int> ::iterator it;
for (it = word_map.begin(); it != word_map.end(); it++){
if (it->second > 1){
result.push_back(it->first);
}
}
return result;
}
};
int main(){
std::string s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT";
Solution solve;
std::vector<std::string> result = solve.findRepeatedDnaSequences(s);
for (int i = 0; i < result.size(); i++){
printf("%s\n", result[i].c_str());
}
return 0;
}
方法 2 算法思路
- LeetCode 187.Repeated DNA Sequences(solve2)
#include <stdio.h>
#include <vector>
#include <string>
int g_hash_map[1048576] = {0};
std::string change_int_to_DNA(int DNA){
static const char DNA_CHAR[] = {'A', 'C', 'G', 'T'};
std::string str;
for (int i = 0; i < 10; i++){
str += DNA_CHAR[DNA & 3];
DNA = DNA >> 2;
}
return str;
}
class Solution {
public:
std::vector<std::string> findRepeatedDnaSequences(std::string s) {
std::vector<std::string> result;
if (s.length() < 10){
return result;
}
for (int i = 0; i < 1048576; i++){
g_hash_map[i] = 0;
}
int char_map[128] = {0};
char_map['A'] = 0;
char_map['C'] = 1;
char_map['G'] = 2;
char_map['T'] = 3;
int key = 0;
for (int i = 9; i >= 0; i--){
key = (key << 2) + char_map[s[i]];
}
g_hash_map[key] = 1;
for (int i = 10; i < s.length(); i++){
key = key >> 2;
key = key | (char_map[s[i]] << 18);
g_hash_map[key]++;
}
for (int i = 0; i < 1048576; i++){
if (g_hash_map[i] > 1){
result.push_back(change_int_to_DNA(i));
}
}
return result;
}
};
int main(){
std::string s = "AAAAACCCCCAAAAACCCCCCAAAAAGGGTTT";
Solution solve;
std::vector<std::string> result = solve.findRepeatedDnaSequences(s);
for (int i = 0; i < result.size(); i++){
printf("%s\n", result[i].c_str());
}
return 0;
}
例6:最小窗口子串(hard)(哈希维护窗口)
已知字符串S与T,求在S中的最小窗口(区间),使得区间中包含了T中的所有字符。
例如:S="ADOBECODEBANC",T="ABC"。
包含T的子区间中,有"ADOBEC","CODEBA","BANC"等等;最小窗口是"BANC"。
![]()
![]()
- LeetCode 76. Minimum Window Substring
#include <stdio.h>
#include <string>
#include <vector>
class Solution {
private:
bool is_window_ok(int map_s[], int map_t[], std::vector<int> &vec_t){
for (int i = 0; i < vec_t.size(); i++){
if (map_s[vec_t[i]] < map_t[vec_t[i]]){
return false;
}
}
return true;
}
public:
std::string minWindow(std::string s, std::string t) {
const int MAX_ARRAY_LEN = 128;
int map_t[MAX_ARRAY_LEN] = {0};
int map_s[MAX_ARRAY_LEN] = {0};
std::vector<int> vec_t;
for (int i = 0; i < t.length(); i++){
map_t[t[i]]++;
}
for (int i = 0; i < MAX_ARRAY_LEN; i++){
if (map_t[i] > 0){
vec_t.push_back(i);
}
}
int window_begin = 0;
std::string result;
for (int i = 0; i < s.length(); i++){
map_s[s[i]]++;
while(window_begin < i){
char begin_ch = s[window_begin];
if (map_t[begin_ch] == 0){
window_begin++;
}
else if (map_s[begin_ch] > map_t[begin_ch]){
map_s[begin_ch]--;
window_begin++;
}
else{
break;
}
}
if (is_window_ok(map_s, map_t, vec_t)){
int new_window_len = i - window_begin + 1;
if (result == "" || result.length() > new_window_len){
result = s.substr(window_begin, new_window_len);
}
}
}
return result;
}
};
int main(){
Solution solve;
std::string result = solve.minWindow("ADOBECODEBANC", "ABC");
printf("%s\n", result.c_str());
result = solve.minWindow("MADOBCCABEC", "ABCC");
printf("%s\n", result.c_str());
result = solve.minWindow("aa", "aa");
printf("%s\n", result.c_str());
return 0;
}