Skip to content

Commit da34b75

Browse files
author
CodingPrakharKapoor
committed
Add Aho-Corasick string pattern matching algorithm in Java (fixes #149)
1 parent 7cb4b61 commit da34b75

1 file changed

Lines changed: 105 additions & 0 deletions

File tree

Java/strings/AhoCorasick.java

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
import java.util.*;
2+
3+
class AhoCorasick {
4+
5+
// Maximum possible states = sum of lengths of all keywords
6+
static final int MAX_STATES = 500;
7+
// Number of lowercase letters
8+
static final int ALPHABET_SIZE = 26;
9+
10+
// Output mask: bit i is 1 if word[i] ends at this state
11+
static int[] output = new int[MAX_STATES];
12+
// Failure links
13+
static int[] failure = new int[MAX_STATES];
14+
// Transition table (trie)
15+
static int[][] transition = new int[MAX_STATES][ALPHABET_SIZE];
16+
17+
// Build the automaton from the given keywords
18+
static int buildMachine(String[] words, int wordCount) {
19+
Arrays.fill(output, 0);
20+
for (int[] row : transition)
21+
Arrays.fill(row, -1);
22+
23+
int stateCount = 1; // root = 0
24+
25+
// Build trie
26+
for (int i = 0; i < wordCount; i++) {
27+
String word = words[i];
28+
int state = 0;
29+
for (char ch : word.toCharArray()) {
30+
int idx = ch - 'a';
31+
if (transition[state][idx] == -1)
32+
transition[state][idx] = stateCount++;
33+
state = transition[state][idx];
34+
}
35+
output[state] |= (1 << i);
36+
}
37+
38+
// Fill missing transitions from root to itself
39+
for (int c = 0; c < ALPHABET_SIZE; c++)
40+
if (transition[0][c] == -1)
41+
transition[0][c] = 0;
42+
43+
Arrays.fill(failure, -1);
44+
Queue<Integer> queue = new LinkedList<>();
45+
46+
// Initialize failure links for depth-1 states
47+
for (int c = 0; c < ALPHABET_SIZE; c++) {
48+
if (transition[0][c] != 0) {
49+
failure[transition[0][c]] = 0;
50+
queue.add(transition[0][c]);
51+
}
52+
}
53+
54+
// BFS to build failure links
55+
while (!queue.isEmpty()) {
56+
int state = queue.remove();
57+
for (int c = 0; c < ALPHABET_SIZE; c++) {
58+
int next = transition[state][c];
59+
if (next != -1) {
60+
int f = failure[state];
61+
while (transition[f][c] == -1)
62+
f = failure[f];
63+
f = transition[f][c];
64+
failure[next] = f;
65+
output[next] |= output[f];
66+
queue.add(next);
67+
}
68+
}
69+
}
70+
return stateCount;
71+
}
72+
73+
// Get next state for a given input character
74+
static int nextState(int current, char input) {
75+
int idx = input - 'a';
76+
while (transition[current][idx] == -1)
77+
current = failure[current];
78+
return transition[current][idx];
79+
}
80+
81+
// Search all occurrences of patterns in text
82+
static void search(String[] words, int wordCount, String text) {
83+
buildMachine(words, wordCount);
84+
int state = 0;
85+
86+
for (int i = 0; i < text.length(); i++) {
87+
state = nextState(state, text.charAt(i));
88+
if (output[state] == 0) continue;
89+
90+
for (int j = 0; j < wordCount; j++) {
91+
if ((output[state] & (1 << j)) > 0) {
92+
System.out.println("Word \"" + words[j] + "\" found at [" +
93+
(i - words[j].length() + 1) + ", " + i + "]");
94+
}
95+
}
96+
}
97+
}
98+
99+
// Driver
100+
public static void main(String[] args) {
101+
String[] patterns = { "he", "she", "hers", "his" };
102+
String text = "ahishers";
103+
search(patterns, patterns.length, text);
104+
}
105+
}

0 commit comments

Comments
 (0)