【 数据结构 & 算法 】—— 高级数据结构
思维导图
1/3:trie树(字典树)的基础知识
trie树,又称字典树或前缀树,是一种有序的、用于统计、排序和存储字符串的数据结构,它与二叉查找树不同,关键字不是直接保存在节点中,而是由节点在树中的位置决定。
一个节点的所有子孙都有相同的前缀,也就是这个节点对应的字符串,而根节点对应空字符串。一般情况下,不是所有的节点都有对应的值,只有叶子节点和部分内部节点所对应的键才有相关的值。
trie树的最大优点就是利用字符串的公共前缀来减少存储空间与查询时间,从而最大限度地减少无谓的字符串比较,是非常高效的字符串查找数据结构。
- 预备知识_Trie前序遍历
#include <stdio.h>
#define TRIE_MAX_CHAR_NUM 26
struct TrieNode{
TrieNode *child[TRIE_MAX_CHAR_NUM];
bool is_end;
TrieNode() : is_end(false){
for (int i = 0; i < TRIE_MAX_CHAR_NUM; i++){
child[i] = 0;
}
}
};
void preorder_trie(TrieNode *node, int layer){
for (int i = 0; i < TRIE_MAX_CHAR_NUM; i++){
if (node->child[i]){
for (int j = 0; j < layer; j++){
printf("---");
}
printf("%c", i + 'a');
if (node->child[i]->is_end){
printf("(end)");
}
printf("\n");
preorder_trie(node->child[i], layer + 1);
}
}
}
int main(){
TrieNode root;
TrieNode n1;
TrieNode n2;
TrieNode n3;
root.child['a'-'a'] = &n1;
root.child['b'-'a'] = &n2;
root.child['e'-'a'] = &n3;
n2.is_end = true;
TrieNode n4;
TrieNode n5;
TrieNode n6;
n1.child['b'-'a'] = &n4;
n2.child['c'-'a'] = &n5;
n3.child['f'-'a'] = &n6;
TrieNode n7;
TrieNode n8;
TrieNode n9;
TrieNode n10;
n4.child['c'-'a'] = &n7;
n4.child['d'-'a'] = &n8;
n5.child['d'-'a'] = &n9;
n6.child['g'-'a'] = &n10;
n7.is_end = true;
n8.is_end = true;
n9.is_end = true;
n10.is_end = true;
TrieNode n11;
n7.child['d'-'a'] = &n11;
n11.is_end = true;
preorder_trie(&root, 0);
return 0;
}
Trie树获取全部单词
- 预备知识_trie获取全部单词
#include <stdio.h>
#include <vector>
#include <string>
#define TRIE_MAX_CHAR_NUM 26
struct TrieNode{
TrieNode *child[TRIE_MAX_CHAR_NUM];
bool is_end;
TrieNode() : is_end(false){
for (int i = 0; i < TRIE_MAX_CHAR_NUM; i++){
child[i] = 0;
}
}
};
void get_all_word_from_trie(TrieNode *node, std::string &word,
std::vector<std::string> &word_list){
for (int i = 0; i < TRIE_MAX_CHAR_NUM; i++){
if (node->child[i]){
word.push_back(i + 'a');
if (node->child[i]->is_end){
word_list.push_back(word);
}
get_all_word_from_trie(node->child[i], word, word_list);
word.erase(word.length()-1, 1);
}
}
}
int main(){
TrieNode root;
TrieNode n1;
TrieNode n2;
TrieNode n3;
root.child['a'-'a'] = &n1;
root.child['b'-'a'] = &n2;
root.child['e'-'a'] = &n3;
n2.is_end = true;
TrieNode n4;
TrieNode n5;
TrieNode n6;
n1.child['b'-'a'] = &n4;
n2.child['c'-'a'] = &n5;
n3.child['f'-'a'] = &n6;
TrieNode n7;
TrieNode n8;
TrieNode n9;
TrieNode n10;
n4.child['c'-'a'] = &n7;
n4.child['d'-'a'] = &n8;
n5.child['d'-'a'] = &n9;
n6.child['g'-'a'] = &n10;
n7.is_end = true;
n8.is_end = true;
n9.is_end = true;
n10.is_end = true;
TrieNode n11;
n7.child['d'-'a'] = &n11;
n11.is_end = true;
std::string word;
std::vector<std::string> word_list;
get_all_word_from_trie(&root, word, word_list);
for (int i = 0; i < word_list.size(); i++){
printf("%s\n", word_list[i].c_str());
}
return 0;
}
Trie树的单词插入
Trie树的单词搜索
例1:实现trie树(medium)(trie树建立)
实现一个trie树,包括insert(插入单词)、search(搜索单词)、startsWith(检查是否包含前缀)。输入的字符串只包含小写字符(a-z)
- LeetCode 208. Implement Trie (Prefix Tree)
#include <stdio.h>
#include <string>
#include <vector>
#define TRIE_MAX_CHAR_NUM 26
struct TrieNode{
TrieNode *child[TRIE_MAX_CHAR_NUM];
bool is_end;
TrieNode() : is_end(false){
for (int i = 0; i < TRIE_MAX_CHAR_NUM; i++){
child[i] = 0;
}
}
};
class TrieTree{
public:
TrieTree(){
}
~TrieTree(){
for (int i = 0; i < _node_vec.size(); i++){
delete _node_vec[i];
}
}
void insert(const char *word) {
TrieNode *ptr = &_root;
while(*word){
int pos = *word - 'a';
if (!ptr->child[pos]){
ptr->child[pos] = new_node();
}
ptr = ptr->child[pos];
word++;
}
ptr->is_end = true;
}
bool search(const char *word){
TrieNode *ptr = &_root;
while(*word){
int pos = *word - 'a';
if (!ptr->child[pos]){
return false;
}
ptr = ptr->child[pos];
word++;
}
return ptr->is_end;
}
bool startsWith(const char *prefix){
TrieNode *ptr = &_root;
while(*prefix){
int pos = *prefix - 'a';
if (!ptr->child[pos]){
return false;
}
ptr = ptr->child[pos];
prefix++;
}
return true;
}
private:
TrieNode *new_node(){
TrieNode *node = new TrieNode();
_node_vec.push_back(node);
return node;
}
std::vector<TrieNode *> _node_vec;
TrieNode _root;
};
class Trie {
public:
Trie() {
}
void insert(std::string word) {
_trie_tree.insert(word.c_str());
}
bool search(std::string word) {
return _trie_tree.search(word.c_str());
}
bool startsWith(std::string prefix) {
return _trie_tree.startsWith(prefix.c_str());
}
private:
TrieTree _trie_tree;
};
int main(){
Trie trie;
trie.insert("abcde");
printf("%d\n", trie.search("abcde"));
printf("%d\n", trie.startsWith("abc"));
printf("%d\n", trie.startsWith("abcdef"));
printf("%d\n", trie.startsWith("abcde"));
return 0;
}
例2:添加与查找单词(medium)(trie树搜索)
设计一个数据结构,支持如下两个操作:
1)添加单词:addWord(word)
2)搜索单词:bool search(word)
搜索单词时,可以按照普通的方式搜索单词(原始单词),或正则表达式的方式搜索单词,添加的单词只包含小写字符a-z;搜索时,只包含小写字符'a'-'z'或'.'。'.'代表任意一个小写字符。
例如:
addWord("bad")
addWord("dad")
addWord("mad")
search("pad")-> false
search("bad")-> true
search(".ad")-> true
search("b..")-> true
- LeetCode 211.Add and Search Word - Data structure design
#include <stdio.h>
#include <vector>
#define TRIE_MAX_CHAR_NUM 26
struct TrieNode{
TrieNode *child[TRIE_MAX_CHAR_NUM];
bool is_end;
TrieNode() : is_end(false){
for (int i = 0; i < TRIE_MAX_CHAR_NUM; i++){
child[i] = 0;
}
}
};
class TrieTree{
public:
TrieTree(){
}
~TrieTree(){
for (int i = 0; i < _node_vec.size(); i++){
delete _node_vec[i];
}
}
void insert(const char *word) {
TrieNode *ptr = &_root;
while(*word){
int pos = *word - 'a';
if (!ptr->child[pos]){
ptr->child[pos] = new_node();
}
ptr = ptr->child[pos];
word++;
}
ptr->is_end = true;
}
TrieNode *root(){
return &_root;
}
private:
TrieNode *new_node(){
TrieNode *node = new TrieNode();
_node_vec.push_back(node);
return node;
}
std::vector<TrieNode *> _node_vec;
TrieNode _root;
};
bool search_trie(TrieNode *node, const char *word){
if (*word == '\0'){
if (node->is_end){
return true;
}
return false;
}
if (*word == '.'){
for (int i = 0; i < TRIE_MAX_CHAR_NUM; i++){
if (node->child[i] &&
search_trie(node->child[i], word + 1)){
return true;
}
}
}
else{
int pos = *word - 'a';
if (node->child[pos] &&
search_trie(node->child[pos], word + 1)){
return true;
}
}
return false;
}
#include <string>
class WordDictionary {
public:
WordDictionary() {
}
void addWord(std::string word) {
_trie_tree.insert(word.c_str());
}
bool search(std::string word) {
return search_trie(_trie_tree.root(), word.c_str());
}
private:
TrieTree _trie_tree;
};
int main(){
WordDictionary word_dictionary;
word_dictionary.addWord("bad");
word_dictionary.addWord("dad");
word_dictionary.addWord("mad");
printf("%d\n", word_dictionary.search("pad"));
printf("%d\n", word_dictionary.search("bad"));
printf("%d\n", word_dictionary.search(".ad"));
printf("%d\n", word_dictionary.search("b.."));
return 0;
}
2/3:并查集的基础知识
并查集(Union Find),又称不相交集合(Disjiont Set),它应用于N个元素的集合求并与查询问题,在该应用场景中,我们通常是在开始时让每个元素构成一个单元素的集合,然后按一定顺序将属于同一组的元素所在的集合合并,其间要反复查找一个元素在哪个集合中。虽然该问题并不复杂,但面对极大的数据量时,普通的数据结构往往无法解决,并查集就是解决该种问题最为优秀的算法。
并查集功能分析
- 预备知识_并查集数组实现
#include <stdio.h>
#include <vector>
class DisjointSet{
public:
DisjointSet(int n){
for (int i = 0; i < n; i++){
_id.push_back(i);
}
}
int find(int p){
return _id[p];
}
void union_(int p, int q){
int pid = find(p);
int qid = find(q);
if (pid == qid){
return;
}
for (int i = 0; i < _id.size(); i++){
if (_id[i] == pid){
_id[i] = qid;
}
}
}
void print_set(){
printf("元素: ");
for (int i = 0; i < _id.size(); i++){
printf("%d ", i);
}
printf("\n");
printf("集合: ");
for (int i = 0; i < _id.size(); i++){
printf("%d ", _id[i]);
}
printf("\n");
}
private:
std::vector<int> _id;
};
int main(){
DisjointSet disjoint_set(8);
disjoint_set.print_set();
printf("Union(0, 5)\n");
disjoint_set.union_(0, 5);
disjoint_set.print_set();
printf("Find(0) = %d, Find(5) = %d\n",
disjoint_set.find(0), disjoint_set.find(5));
printf("Find(2) = %d, Find(5) = %d\n",
disjoint_set.find(2), disjoint_set.find(5));
disjoint_set.union_(2, 4);
disjoint_set.print_set();
disjoint_set.union_(0, 4);
disjoint_set.print_set();
printf("Find(2) = %d, Find(5) = %d\n",
disjoint_set.find(2), disjoint_set.find(5));
return 0;
}
- 预备知识_并查集森林实现
#include <stdio.h>
#include <vector>
class DisjointSet{
public:
DisjointSet(int n){
for (int i = 0; i < n; i++){
_id.push_back(i);
_size.push_back(1);
}
_count = n;
}
int find(int p){
while(p != _id[p]){
_id[p] = _id[_id[p]];
p = _id[p];
}
return p;
}
void union_(int p, int q){
int i = find(p);
int j = find(q);
if (i == j){
return;
}
if (_size[i] < _size[j]){
_id[i] = j;
_size[j] += _size[i];
}
else{
_id[j] = i;
_size[i] += _size[j];
}
_count--;
}
void print_set(){
printf("元素: ");
for (int i = 0; i < _id.size(); i++){
printf("%d ", i);
}
printf("\n");
printf("集合: ");
for (int i = 0; i < _id.size(); i++){
printf("%d ", _id[i]);
}
printf("\n");
}
int count(){
return _count;
}
private:
std::vector<int> _id;
std::vector<int> _size;
int _count;
};
int main(){
DisjointSet disjoint_set(8);
disjoint_set.print_set();
printf("Union(0, 5)\n");
disjoint_set.union_(0, 5);
disjoint_set.print_set();
printf("Find(0) = %d, Find(5) = %d\n",
disjoint_set.find(0), disjoint_set.find(5));
printf("Find(2) = %d, Find(5) = %d\n",
disjoint_set.find(2), disjoint_set.find(5));
disjoint_set.union_(2, 4);
disjoint_set.print_set();
disjoint_set.union_(0, 4);
disjoint_set.print_set();
printf("Find(2) = %d, Find(5) = %d\n",
disjoint_set.find(2), disjoint_set.find(5));
return 0;
}
例3:朋友圈(medium)(井查集、图的搜索)
有N个同学,他们之间有些是朋友,有些不是。"友谊"是可以传递的,例如A与B是朋友,B与C是朋友,那么A与C也是朋友;朋友圈就是完成"友谊"传递后的一组朋友。
给定N*N的矩阵代表同学间是否是朋友,如果M[i][j]= 1代表第i个学生与第j个学生是朋友,否则不是。求朋友圈的个数。
- LeetCode 547.Friend Circles(solve1)
#include <stdio.h>
#include <vector>
void DFS_graph(int u,
std::vector<std::vector<int> >& graph,
std::vector<int>& visit){
visit[u] = 1;
for (int i = 0; i < graph[u].size(); i++){
if (visit[i] == 0 && graph[u][i] == 1){
DFS_graph(i, graph, visit);
}
}
}
class Solution {
public:
int findCircleNum(std::vector<std::vector<int> >& M) {
std::vector<int> visit(M.size(), 0);
int count = 0;
for (int i = 0; i < M.size(); i++){
if (visit[i] == 0){
DFS_graph(i, M, visit);
count++;
}
}
return count;
}
};
int main(){
int test[][3] = {{1, 1, 0}, {1, 1, 0}, {0, 0, 1}};
std::vector<std::vector<int> > M(3, std::vector<int>(3, 0));
for (int i = 0; i < 3; i++){
for (int j = 0; j < 3; j++){
M[i][j] = test[i][j];
}
}
Solution solve;
printf("%d\n", solve.findCircleNum(M));
return 0;
}
- LeetCode 547.Friend Circles(solve2)
#include <stdio.h>
#include <vector>
class DisjointSet{
public:
DisjointSet(int n){
for (int i = 0; i < n; i++){
_id.push_back(i);
_size.push_back(1);
}
_count = n;
}
int find(int p){
while(p != _id[p]){
_id[p] = _id[_id[p]];
p = _id[p];
}
return p;
}
void union_(int p, int q){
int i = find(p);
int j = find(q);
if (i == j){
return;
}
if (_size[i] < _size[j]){
_id[i] = j;
_size[j] += _size[i];
}
else{
_id[j] = i;
_size[i] += _size[j];
}
_count--;
}
int count(){
return _count;
}
private:
std::vector<int> _id;
std::vector<int> _size;
int _count;
};
class Solution {
public:
int findCircleNum(std::vector<std::vector<int> >& M) {
DisjointSet disjoint_set(M.size());
for (int i = 0; i < M.size(); i++){
for (int j = i + 1; j < M.size(); j++){
if (M[i][j]){
disjoint_set.union_(i, j);
}
}
}
return disjoint_set.count();
}
};
int main(){
int test[][3] = {{1, 1, 0}, {1, 1, 0}, {0, 0, 1}};
std::vector<std::vector<int> > M(3, std::vector<int>(3, 0));
for (int i = 0; i < 3; i++){
for (int j = 0; j < 3; j++){
M[i][j] = test[i][j];
}
}
Solution solve;
printf("%d\n", solve.findCircleNum(M));
return 0;
}
3/3:线段树的基础知识
线段树是一种平衡二叉搜索树(完全二叉树),它将一个线段区间划分成一些单元区间。对于线段树中的每一个非叶子节点[a,b],它的左儿子表示的区间为[a,(a+b)/2],右儿子表示的区间为[(a+b)/2+1,b],最后的叶子节点数目为N,与数组下标对应。线段树的一般包括建立、查询、插入、更新等操作,建立规模为N的时间复杂度是O(NlogN),其他操作时间复杂度为O(logN)
- 预备知识_线段树
#include <stdio.h>
#include <vector>
void build_segment_tree(std::vector<int> &value,
std::vector<int> &nums,
int pos, int left, int right){
if (left == right){
value[pos] = nums[left];
return;
}
int mid = (left + right) / 2;
build_segment_tree(value, nums, pos * 2 + 1, left, mid);
build_segment_tree(value, nums, pos * 2 + 2, mid + 1, right);
value[pos] = value[pos * 2 + 1] + value[pos * 2 + 2];
}
void print_segment_tree(std::vector<int> &value,
int pos, int left, int right, int layer){
for (int i = 0; i < layer; i++){
printf("---");
}
printf("[%d %d][%d] : %d\n", left, right, pos, value[pos]);
if (left == right){
return;
}
int mid = (left + right) / 2;
print_segment_tree(value, pos * 2 + 1, left, mid, layer + 1);
print_segment_tree(value, pos * 2 + 2, mid + 1, right, layer + 1);
}
int sum_range_segment_tree(std::vector<int> &value,
int pos, int left, int right,
int qleft, int qright){
if (qleft > right || qright < left){
return 0;
}
if (qleft <= left && qright >= right){
return value[pos];
}
int mid = (left + right) / 2;
return sum_range_segment_tree(value, pos * 2 + 1,
left, mid, qleft, qright) +
sum_range_segment_tree(value, pos * 2 + 2, mid + 1,
right, qleft, qright);
}
void update_segment_tree(std::vector<int> &value,
int pos, int left, int right,
int index, int new_value){
if (left == right && left == index){
value[pos] = new_value;
return;
}
int mid = (left + right) / 2;
if (index <= mid){
update_segment_tree(value, pos * 2 + 1,
left, mid, index, new_value);
}
else{
update_segment_tree(value, pos * 2 + 2, mid + 1,
right, index, new_value);
}
value[pos] = value[pos * 2 + 1] + value[pos * 2 + 2];
}
int main(){
std::vector<int> nums;
for (int i = 0; i < 6; i++){
nums.push_back(i);
}
std::vector<int> value;
for (int i = 0; i < 24; i++){
value.push_back(0);
}
build_segment_tree(value, nums, 0, 0, nums.size() - 1);
printf("segment_tree:\n");
print_segment_tree(value, 0, 0, nums.size() - 1, 0);
int sum_range = sum_range_segment_tree(
value, 0, 0, nums.size() - 1, 2, 4);
printf("sum_range [2, 5] = %d\n", sum_range);
update_segment_tree(value, 0, 0, nums.size() - 1, 2, 10);
printf("segment_tree:\n");
print_segment_tree(value, 0, 0, nums.size() - 1, 0);
return 0;
}
例4:区域和的查询(medium)
给定一个整数数组nums,求这个整数数组中,下标i到下标j之间的数字和(i<=j),a[i]+a[i+1]+...+a[j]。在求和的过程中,可能需要更新数组的某个元素a[i]。
例如:数组 nums[1,3,5]
sumRange(0,2)-> 9
update(1,2)
sumRange(0,2)-> 8
- LeetCode 307. Range Sum Query - Mutable
#include <stdio.h>
#include <vector>
void build_segment_tree(std::vector<int> &value,
std::vector<int> &nums,
int pos, int left, int right){
if (left == right){
value[pos] = nums[left];
return;
}
int mid = (left + right) / 2;
build_segment_tree(value, nums, pos * 2 + 1, left, mid);
build_segment_tree(value, nums, pos * 2 + 2, mid + 1, right);
value[pos] = value[pos * 2 + 1] + value[pos * 2 + 2];
}
int sum_range_segment_tree(std::vector<int> &value,
int pos, int left, int right,
int qleft, int qright){
if (qleft > right || qright < left){
return 0;
}
if (qleft <= left && qright >= right){
return value[pos];
}
int mid = (left + right) / 2;
return sum_range_segment_tree(value, pos * 2 + 1,
left, mid, qleft, qright) +
sum_range_segment_tree(value, pos * 2 + 2, mid + 1,
right, qleft, qright);
}
void update_segment_tree(std::vector<int> &value,
int pos, int left, int right,
int index, int new_value){
if (left == right && left == index){
value[pos] = new_value;
return;
}
int mid = (left + right) / 2;
if (index <= mid){
update_segment_tree(value, pos * 2 + 1,
left, mid, index, new_value);
}
else{
update_segment_tree(value, pos * 2 + 2, mid + 1,
right, index, new_value);
}
value[pos] = value[pos * 2 + 1] + value[pos * 2 + 2];
}
class NumArray {
public:
NumArray(std::vector<int> nums) {
if (nums.size() == 0){
return;
}
int n = nums.size() * 4;
for (int i = 0; i < n; i++){
_value.push_back(0);
}
build_segment_tree(_value, nums, 0, 0, nums.size() - 1);
_right_end = nums.size() - 1;
}
void update(int i, int val) {
update_segment_tree(_value, 0, 0, _right_end, i, val);
}
int sumRange(int i, int j) {
return sum_range_segment_tree(_value, 0, 0, _right_end, i, j);
}
private:
std::vector<int> _value;
int _right_end;
};
int main(){
std::vector<int> nums;
nums.push_back(1);
nums.push_back(3);
nums.push_back(5);
NumArray num_array(nums);
printf("%d\n", num_array.sumRange(0, 2));
num_array.update(1, 2);
printf("%d\n", num_array.sumRange(0, 2));
return 0;
}