Trie树
2019-05-12 本文已影响0人
TomyZhang
一、概念
Trie树又称为字典树、单词查找树、前缀树等,是一种树状结构。
对于英文字符串,其每个结点包括26个next指针,每个指针对应一个字母,即每一条边为一个字母,同时每个结点包含一个标识,表示从根结点到该结点的边是否组成了一个单词。
Trie树二、数据结构
#define NUM 26
struct TrieNode {
bool isStr;
TrieNode *next[NUM];
};
三、相关操作
- 插入
- 查找
- 删除
四、实现
#include<stdio.h>
#include<malloc.h>
#define SEARCHWORD_MAXCOUNT 100
#define SEARCHWORD_MAXLEN 12
#define TRIE_NUM 26
struct TrieNode { //Trie树结点
char ch; //当前字母
bool isStrEnd; //单词结束标志
int frequency; //词频统计
TrieNode *child[TRIE_NUM]; //孩子结点指针
};
struct SearchNode { //查找结果结点
char ch[SEARCHWORD_MAXLEN + 1]; //单词
int frequency; //词频
};
TrieNode root;
SearchNode searchNode[SEARCHWORD_MAXCOUNT];
int searchIndex;
int mstrlen(char *a) {
int len = 0;
while (a[len] != '\0') {
len++;
}
return len;
}
void mstrncpy(char *dest, char *src, int len) {
for (int i = 0; i < len; i++) {
dest[i] = src[i];
}
dest[len] = '\0';
}
TrieNode* newNode(char ch) { //创建并初始化新结点
TrieNode *node = (TrieNode*)malloc(sizeof(TrieNode));
node->ch = ch;
node->isStrEnd = false;
node->frequency = 0;
for(int i=0; i<TRIE_NUM; i++) {
node->child[i] = NULL;
}
return node;
}
void freeTrie(TrieNode *node) { //使用DFS后根序方式释放结点内存
if(node == NULL) {
return;
}
for(int i=0; i<TRIE_NUM; i++) {
freeTrie(node->child[i]);
}
if(node != &root) {
free(node);
} else {
for(int i=0; i<TRIE_NUM; i++) {
node->child[i] = NULL;
}
}
}
void printSearchResult(char pre[]) {
printf("printSearchResult, pre: %s\n", pre);
for(int i=0; i<searchIndex; i++) {
printf("%d: %s, %d times\n", i, searchNode[i].ch, searchNode[i].frequency);
}
}
void insertStr(char ch[]) { //插入或者更新字符串
int n = mstrlen(ch);
TrieNode *p = &root;
for(int i=0; i<n; i++) {
char temp = ch[i];
int index = temp - 'a';
if(p->child[index] == NULL) { //子节点为空,创建一个新的子节点
TrieNode *node = newNode(temp);
p->child[index] = node;
}
p = p->child[index];
}
if(p->isStrEnd) { //如果已经是字符串结尾,则frequency加1
p->frequency++;
} else { //如果不是字符串结尾,则设置为字符串结尾,并将frequency设置为 1
p->isStrEnd = true;
p->frequency = 1;
}
}
void findStrEnd(char preStr[], int n, TrieNode *node) { //使用DFS先根序方式查找字符串
if(node == NULL) {
return;
}
SearchNode temp;
mstrncpy(temp.ch, preStr, n); //拷贝前面出现的字符串
temp.ch[n] = node->ch; //追加当前字母
temp.ch[n+1] = '\0'; //追加字符串结束标记
if(node->isStrEnd) {
temp.frequency = node->frequency;
searchNode[searchIndex] = temp;
searchIndex++;
}
for(int i=0; i<TRIE_NUM; i++) {
findStrEnd(temp.ch, n+1, node->child[i]);
}
}
void searchStr(char pre[]) { //根据前缀查找字符串
int n = mstrlen(pre);
searchIndex = 0;
TrieNode *p = &root;
for(int i=0; i<n; i++) {
char temp = pre[i];
int index = temp - 'a';
if(p->child[index] == NULL) { //子节点为空, 无查找结果
return;
} else { //子节点不为空,继续查找
p = p->child[index];
}
}
if(p->isStrEnd) {
SearchNode temp;
mstrncpy(temp.ch, pre, n);
temp.ch[n] = '\0';
temp.frequency = p->frequency;
searchNode[searchIndex] = temp;
searchIndex++;
}
for(int i=0; i<TRIE_NUM; i++) {
findStrEnd(pre, n, p->child[i]);
}
printSearchResult(pre);
}
void main() {
root.ch = ' ';
root.isStrEnd = false;
root.frequency = 0;
insertStr("abc");
insertStr("abcde");
insertStr("abc");
insertStr("adeb");
insertStr("abcde");
insertStr("adebc");
insertStr("afgbb");
insertStr("abcde");
insertStr("afg");
insertStr("afgb");
searchStr("abc");
searchStr("ade");
searchStr("afg");
}