Skip to content

Commit d2d9ea4

Browse files
committed
GraphTokenStreamFiniteStrings#articulationPoints replacing recursion with iterative approach, removing the 1000-depth limit
Reimplementing Tarjan's articulation points as an interative DFS using a IntArrayList stack. Output is unchanged.
1 parent e720207 commit d2d9ea4

3 files changed

Lines changed: 67 additions & 50 deletions

File tree

lucene/CHANGES.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,9 @@ Improvements
388388

389389
* GITHUB#16144: Clarify LeafCollector batch collection scoring contract. (Costin Leau)
390390

391+
* GITHUB#16239: GraphTokenStreamFiniteStrings#articulationPoints now uses an iterative
392+
approach instead of recursion, removing the previous MAX_RECURSION_LEVEL of 1000. (Jakub Slowinski)
393+
391394
Optimizations
392395
---------------------
393396
* GITHUB#16222: MultiTermQuery constant-score wrapper now defers term collection to ScorerSupplier#get()

lucene/core/src/java/org/apache/lucene/util/graph/GraphTokenStreamFiniteStrings.java

Lines changed: 59 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,6 @@
4545
* different paths of the {@link Automaton}.
4646
*/
4747
public final class GraphTokenStreamFiniteStrings {
48-
/** Maximum level of recursion allowed in recursive operations. */
49-
private static final int MAX_RECURSION_LEVEL = 1000;
50-
5148
private AttributeSource[] tokens = new AttributeSource[4];
5249
private final Automaton det;
5350
private final Transition transition = new Transition();
@@ -176,15 +173,7 @@ public int[] articulationPoints() {
176173
undirect.addTransition(transition.dest, i, transition.min);
177174
}
178175
}
179-
int numStates = det.getNumStates();
180-
BitSet visited = new BitSet(numStates);
181-
int[] depth = new int[det.getNumStates()];
182-
int[] low = new int[det.getNumStates()];
183-
int[] parent = new int[det.getNumStates()];
184-
Arrays.fill(parent, -1);
185-
IntArrayList points = new IntArrayList();
186-
articulationPointsRecurse(undirect.finish(), 0, 0, depth, low, parent, visited, points);
187-
return points.reverse().toArray();
176+
return articulationPointsIterative(undirect.finish()).reverse().toArray();
188177
}
189178

190179
/** Build an automaton from the provided {@link TokenStream}. */
@@ -252,43 +241,67 @@ private Automaton build(final TokenStream in) throws IOException {
252241
return builder.finish();
253242
}
254243

255-
private static void articulationPointsRecurse(
256-
Automaton a,
257-
int state,
258-
int d,
259-
int[] depth,
260-
int[] low,
261-
int[] parent,
262-
BitSet visited,
263-
IntArrayList points) {
264-
visited.set(state);
265-
depth[state] = d;
266-
low[state] = d;
267-
int childCount = 0;
268-
boolean isArticulation = false;
269-
Transition t = new Transition();
270-
int numT = a.initTransition(state, t);
271-
for (int i = 0; i < numT; i++) {
272-
a.getNextTransition(t);
273-
if (visited.get(t.dest) == false) {
274-
parent[t.dest] = state;
275-
if (d < MAX_RECURSION_LEVEL) {
276-
articulationPointsRecurse(a, t.dest, d + 1, depth, low, parent, visited, points);
277-
} else {
278-
throw new IllegalArgumentException(
279-
"Exceeded maximum recursion level during graph analysis");
244+
/**
245+
* Iterative (IntArrayList stack) implementation of Tarjan's articulation points DFS, so that deep
246+
* undirected graphs can be analyzed without hitting a {@link StackOverflowError}. Each state
247+
* (vertex) is pushed at most once over the whole traversal. {@code transitionIndex[state]} holds
248+
* how many of that state's transitions (edges) we've already traversed. We resume at the next one
249+
* after exploring the current child and all its descendants.
250+
*/
251+
private static IntArrayList articulationPointsIterative(Automaton a) {
252+
final int numStates = a.getNumStates();
253+
final int[] depth = new int[numStates];
254+
final int[] low = new int[numStates];
255+
final int[] parent = new int[numStates];
256+
Arrays.fill(parent, -1);
257+
final int[] transitionIndex = new int[numStates];
258+
final BitSet visited = new BitSet(numStates);
259+
final BitSet isArticulation = new BitSet(numStates);
260+
final IntArrayList stack = new IntArrayList();
261+
final IntArrayList points = new IntArrayList();
262+
final Transition t = new Transition();
263+
264+
// The DFS starts at the root (state 0). depth[0] and low[0] are by default set to 0.
265+
stack.add(0);
266+
visited.set(0);
267+
// The root vertex must be handled separately: it is a cut vertex if and only if it has at least
268+
// two children in the DFS tree.
269+
int rootChildCount = 0;
270+
271+
while (stack.isEmpty() == false) {
272+
int state = stack.get(stack.size() - 1);
273+
int i = transitionIndex[state];
274+
if (i < a.getNumTransitions(state)) {
275+
transitionIndex[state] = i + 1;
276+
a.getTransition(state, i, t);
277+
if (visited.get(t.dest) == false) {
278+
visited.set(t.dest);
279+
parent[t.dest] = state;
280+
depth[t.dest] = low[t.dest] = depth[state] + 1;
281+
if (state == 0) {
282+
rootChildCount++;
283+
}
284+
stack.add(t.dest);
285+
} else if (t.dest != parent[state]) {
286+
// backedge.
287+
low[state] = Math.min(low[state], depth[t.dest]);
288+
}
289+
} else {
290+
// Current state has been fully explored.
291+
stack.removeLast();
292+
// Only root (state 0) has no parent (-1).
293+
int par = parent[state];
294+
if (par != -1) {
295+
low[par] = Math.min(low[par], low[state]);
296+
if (low[state] >= depth[par]) {
297+
isArticulation.set(par);
298+
}
280299
}
281-
childCount++;
282-
if (low[t.dest] >= depth[state]) {
283-
isArticulation = true;
300+
if ((par != -1 && isArticulation.get(state)) || (par == -1 && rootChildCount > 1)) {
301+
points.add(state);
284302
}
285-
low[state] = Math.min(low[state], low[t.dest]);
286-
} else if (t.dest != parent[state]) {
287-
low[state] = Math.min(low[state], depth[t.dest]);
288303
}
289304
}
290-
if ((parent[state] != -1 && isArticulation) || (parent[state] == -1 && childCount > 1)) {
291-
points.add(state);
292-
}
305+
return points;
293306
}
294307
}

lucene/core/src/test/org/apache/lucene/util/graph/TestGraphTokenStreamFiniteStrings.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -662,7 +662,7 @@ public void testMultipleSidePathsWithGaps() throws Exception {
662662
assertFalse(it.hasNext());
663663
}
664664

665-
public void testLongTokenStreamStackOverflowError() throws Exception {
665+
public void testLongTokenStreamDoesNotOverflow() throws Exception {
666666

667667
ArrayList<Token> tokens =
668668
new ArrayList<>() {
@@ -674,14 +674,15 @@ public void testLongTokenStreamStackOverflowError() throws Exception {
674674
}
675675
};
676676

677-
// Add in too many tokens to get a high depth graph
678-
for (int i = 0; i < 1024 + 1; i++) {
677+
// Add in many tokens to get a very deep graph.
678+
for (int i = 0; i < 50_000; i++) {
679679
tokens.add(token("network", 1, 1));
680680
}
681681

682682
TokenStream ts = new CannedTokenStream(tokens.toArray(Token[]::new));
683683
GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(ts);
684684

685-
assertThrows(IllegalArgumentException.class, graph::articulationPoints);
685+
int[] points = graph.articulationPoints();
686+
assertEquals(50_001, points.length);
686687
}
687688
}

0 commit comments

Comments
 (0)