数据结构核心：KMP 算法、Trie 树与并查集实战解析 | 极客日志

C++算法

数据结构核心：KMP 算法、Trie 树与并查集实战解析

KMP 算法通过 next 数组优化字符串匹配，避免暴力回退；Trie 树利用前缀共享高效存储查询字符串集合；并查集通过路径压缩快速维护集合关系。文章提供 C++ 完整模板代码，解析核心逻辑与常见变体，帮助开发者掌握高频考点与工程应用。

DevOpsTeam发布于 2026/3/15更新于 2026/4/241 浏览

数据结构核心：KMP 算法、Trie 树与并查集实战解析

KMP 算法

字符串匹配是算法中的经典问题。给定主串 S 和模式串 P，我们需要找到 P 在 S 中首次出现的位置。

暴力匹配思路

最直观的做法是枚举主串的每一个位置作为起点，逐个字符比较。一旦遇到不匹配，模式串就回到开头重新尝试。这种方法逻辑简单，但在长文本场景下效率较低，时间复杂度为 O(m*n)。

for(int i = 1; i <= m; i++) { // 枚举主串起点
    bool flag = true;
    for(int j = 1; j <= n; j++) {
        if(S[i + j - 1] != P[j]) {
            flag = false;
            break;
        }
    }
}

优化原理

仔细观察会发现，当匹配失败时，模式串本身往往包含重复的前缀和后缀。我们不需要把模式串完全重置到开头，而是可以直接跳过已经确认匹配的部分。这就是 KMP 的核心思想。

文章配图

为了实现这一跳跃，我们需要预处理一个 next 数组（代码中常记为 ne）。

next 数组的含义

next[i] 表示以 i 结尾的子串中，最长相等前后缀的长度。也就是说，如果 next[i] = j，意味着 P[1...j] 和 P[i-j+1...i] 是相同的。

构建 next 数组的过程其实也是一个自匹配的过程：

// 计算 next 数组
for(int i = 2, j = 0; i <= n; i++) {
    while(j && P[i] != P[j + 1]) j = ne[j];
    if(P[i] == P[j + 1]) j++;
    ne[i] = j;
}

这里 j 代表当前匹配到的前缀长度。如果不匹配，就回退到 ne[j] 继续尝试，直到匹配成功或 j 归零。

匹配过程

有了 next 数组，匹配主串时就不需要回溯指针 i，只需调整 j 即可。这样整体时间复杂度降为 O(m+n)。

for(int i = 1, j = 0; i <= m; i++) {
    while(j && S[i] != P[j + 1]) j = ne[j];
    if(S[i] == P[j + 1]) j++;
    if(j == n) {
        printf("%d ", i - n); // 输出匹配起始位置
        j = ne[j]; // 继续寻找下一个匹配
    }
}

#include<iostream>
using namespace std;
const int N = 1e4 + 10;
const int M = 1e5 + 10;
char S[M], P[N], ne[N];
int n, m;

int main(){
    cin >> n >> (P + 1) >> m >> (S + 1);
    
    // 求 next 数组
    for(int i = 2, j = 0; i <= n; i++){
        while(j && P[i] != P[j + 1]) j = ne[j];
        if(P[i] == P[j + 1]) j++;
        ne[i] = j;
    }
    
    // 匹配过程
    for(int i = 1, j = 0; i <= m; i++){
        while(j && S[i] != P[j + 1]) j = ne[j];
        if(S[i] == P[j + 1]) j++;
        if(j == n){
            printf("%d ", i - n);
            j = ne[j];
        }
    }
    return 0;
}

#include<iostream>
using namespace std;
const int N = 1e5 + 10;
char str[N];
int son[N][26], cnt[N], idx;

// 插入操作
void insert(char str[]){
    int p = 0;
    for(int i = 0; str[i]; i++){
        int u = str[i] - 'a';
        if(!son[p][u]) son[p][u] = ++idx;
        p = son[p][u];
    }
    cnt[p]++;
}

// 查询操作
int query(char str[]){
    int p = 0;
    for(int i = 0; str[i]; i++){
        int u = str[i] - 'a';
        if(!son[p][u]) return 0;
        p = son[p][u];
    }
    return cnt[p];
}

int main(){
    int n;
    scanf("%d", &n);
    while(n--){
        char op[2];
        scanf("%s%s", op, str);
        if(op[0] == 'I') insert(str);
        else printf("%d\n", query(str));
    }
    return 0;
}

#include<iostream>
using namespace std;
const int N = 1e5 + 10;
int p[N], n, m;

int find(int x){
    if(p[x] != x) p[x] = find(p[x]); // 路径压缩
    return p[x];
}

int main(){
    cin >> n >> m;
    for(int i = 1; i <= n; i++) p[i] = i;
    
    while(m--){
        char op[2];
        int a, b;
        scanf("%s%d%d", op, &a, &b);
        if(op[0] == 'M') p[find(a)] = find(b); // 合并
        else {
            puts(find(a) == find(b) ? "Yes" : "No");
        }
    }
    return 0;
}

#include<iostream>
using namespace std;
const int N = 1e5 + 10;
int p[N], sizes[N], n, m;

int find(int x){
    if(p[x] != x) p[x] = find(p[x]);
    return p[x];
}

int main(){
    cin >> n >> m;
    for(int i = 1; i <= n; i++) {
        p[i] = i;
        sizes[i] = 1;
    }
    
    while(m--){
        char op[5];
        int a, b;
        scanf("%s", op);
        if(op[0] == 'C') {
            scanf("%d%d", &a, &b);
            int ra = find(a), rb = find(b);
            if(ra != rb) {
                sizes[rb] += sizes[ra];
                p[ra] = rb;
            }
        } else if(op[1] == '1') {
            scanf("%d%d", &a, &b);
            puts(find(a) == find(b) ? "Yes" : "No");
        } else {
            scanf("%d", &a);
            printf("%d\n", sizes[find(a)]);
        }
    }
    return 0;
}