Skip to content

Commit 70173a7

Browse files
lvcatae898
authored andcommitted
perf: improved queries where starting from edges (CSR) is faster
1 parent 5248bc0 commit 70173a7

3 files changed

Lines changed: 345 additions & 22 deletions

File tree

engine/src/main/java/com/arcadedb/query/opencypher/executor/steps/AntiJoinChainOp.java

Lines changed: 147 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,29 @@ public long execute(final GraphTraversalProvider provider, final Database db) {
121121
if (earlierIdx != 0)
122122
return executeGenericAntiJoin(provider, db, nodeCount, validBuckets);
123123

124+
// FAST PATH: Edge-scan with algebraic computation for 3-hop chains where:
125+
// - Chain is (A) ←[E0]- (B) ←[E1]- (C) -[E2]→ (D)
126+
// - Anti-join is NOT (C)-[E_anti]->(A) with E_anti == E0 (same edge type)
127+
// - Inequality A ≠ D
128+
// Formula: count = sum over E1 edges (B,C): (|E0_rev(B)| - |E0_rev(B) ∩ E2(C)|) × |E2(C)|
129+
// This avoids all per-anchor iteration.
130+
// Additional condition: the inequality must span the full chain (positions 0 and hops),
131+
// and the anti-join later endpoint must be at the second-to-last position (laterIdx == hops - 1).
132+
// This ensures the algebraic formula correctly computes the anti-join + inequality together.
133+
// Q8 matches: hops=3, anti-join (c at pos2, t1 at pos0), inequality (t1 at pos0, t2 at pos3)
134+
// Q9 does NOT match: hops=3, anti-join (p1 at pos0, p3 at pos2), inequality (p1 at pos0, p3 at pos2)
135+
// — Q9's inequality endpoints don't span the full chain.
136+
final int ineqMin = Math.min(inequalityIdxA, inequalityIdxB);
137+
final int ineqMax = Math.max(inequalityIdxA, inequalityIdxB);
138+
if (hops == 3 && laterIdx == hops - 1 && earlierIdx == 0
139+
&& antiJoinEdgeType.equals(edgeTypes[0])
140+
&& inequalityIdxA >= 0 && inequalityIdxB >= 0
141+
&& ineqMin == 0 && ineqMax == hops) {
142+
final long result = executeEdgeScanAlgebraic(provider, nodeCount, validBuckets);
143+
if (result >= 0)
144+
return result;
145+
}
146+
124147
// Per-source iteration from anchor (position 0)
125148
final String anchorLabel = nodeLabels[0];
126149
if (anchorLabel == null || !db.getSchema().existsType(anchorLabel))
@@ -160,6 +183,103 @@ public long execute(final GraphTraversalProvider provider, final Database db) {
160183
return totalCount;
161184
}
162185

186+
/**
187+
* Edge-scan algebraic computation for 3-hop anti-join chains.
188+
* <p>
189+
* For Q8: (t1:Tag) ←[HAS_TAG]- (m) ←[REPLY_OF]- (c) -[HAS_TAG]→ (t2:Tag)
190+
* WHERE NOT (c)-[:HAS_TAG]->(t1) AND t1 <> t2
191+
* <p>
192+
* For each REPLY_OF edge (c → m):
193+
* tags_m = reverse_E0 neighbors of m (tags of m)
194+
* tags_c = E2 neighbors of c (tags of c)
195+
* common = |tags_m ∩ tags_c| (sorted merge)
196+
* contribution = (|tags_m| - common) × |tags_c|
197+
* (tags of m that c doesn't have × tags of c — satisfies both anti-join and inequality)
198+
*
199+
* @return count, or -1 if NeighborViews unavailable (caller should fall back)
200+
*/
201+
private long executeEdgeScanAlgebraic(final GraphTraversalProvider provider,
202+
final int nodeCount, final Set<Integer>[] validBuckets) {
203+
final Vertex.DIRECTION revDir0 = directions[0] == Vertex.DIRECTION.OUT ? Vertex.DIRECTION.IN
204+
: directions[0] == Vertex.DIRECTION.IN ? Vertex.DIRECTION.OUT : Vertex.DIRECTION.BOTH;
205+
final NeighborView viewA = provider.getNeighborView(revDir0, edgeTypes[0]);
206+
final NeighborView viewE1 = provider.getNeighborView(directions[1], edgeTypes[1]);
207+
final NeighborView viewC = provider.getNeighborView(directions[2], edgeTypes[2]);
208+
209+
if (viewA == null || viewE1 == null || viewC == null)
210+
return -1; // fall back to per-source
211+
212+
final int[] aNbrs = viewA.neighbors();
213+
final int[] e1Nbrs = viewE1.neighbors();
214+
final int[] cNbrs = viewC.neighbors();
215+
216+
// Optional type filtering
217+
final int[] bucketIds;
218+
final Set<Integer> pos1Buckets = validBuckets[1];
219+
final Set<Integer> pos2Buckets = validBuckets[2];
220+
if ((pos1Buckets != null && !pos1Buckets.isEmpty()) || (pos2Buckets != null && !pos2Buckets.isEmpty())) {
221+
bucketIds = new int[nodeCount];
222+
for (int v = 0; v < nodeCount; v++)
223+
bucketIds[v] = provider.getRID(v).getBucketId();
224+
} else {
225+
bucketIds = null;
226+
}
227+
228+
long total = 0;
229+
230+
// Scan all E1 (middle) edges by iterating pos1 nodes
231+
for (int b = 0; b < nodeCount; b++) {
232+
if (pos1Buckets != null && !pos1Buckets.isEmpty()
233+
&& !pos1Buckets.contains(bucketIds[b]))
234+
continue;
235+
236+
final int e1Start = viewE1.offset(b);
237+
final int e1End = viewE1.offsetEnd(b);
238+
if (e1Start == e1End) continue;
239+
240+
// Get setA size = reverse-E0 neighbors of b (tags of message b)
241+
final int aStart = viewA.offset(b);
242+
final int aEnd = viewA.offsetEnd(b);
243+
if (aStart == aEnd) continue;
244+
final int tagsOfB = aEnd - aStart;
245+
246+
// For each E1 neighbor c (pos2 node):
247+
for (int j = e1Start; j < e1End; j++) {
248+
final int c = e1Nbrs[j];
249+
250+
if (pos2Buckets != null && !pos2Buckets.isEmpty()
251+
&& !pos2Buckets.contains(bucketIds[c]))
252+
continue;
253+
254+
// Get setC = E2 neighbors of c (tags of comment c)
255+
final int cStart = viewC.offset(c);
256+
final int cEnd = viewC.offsetEnd(c);
257+
if (cStart == cEnd) continue;
258+
final int tagsOfC = cEnd - cStart;
259+
260+
// Count |setA ∩ setC| via sorted merge
261+
final long common = sortedIntersectionCount(aNbrs, aStart, aEnd, cNbrs, cStart, cEnd);
262+
263+
// Contribution: (tags of m that c DOESN'T have) × (tags of c)
264+
// Anti-join ensures t1 ∉ tags(c). Inequality t1≠t2 is auto-satisfied since t1 ∉ tags(c) but t2 ∈ tags(c).
265+
total += (tagsOfB - common) * tagsOfC;
266+
}
267+
}
268+
return total;
269+
}
270+
271+
private static long sortedIntersectionCount(final int[] a, int aStart, final int aEnd,
272+
final int[] b, int bStart, final int bEnd) {
273+
long count = 0;
274+
while (aStart < aEnd && bStart < bEnd) {
275+
final int av = a[aStart], bv = b[bStart];
276+
if (av < bv) aStart++;
277+
else if (av > bv) bStart++;
278+
else { count++; aStart++; bStart++; }
279+
}
280+
return count;
281+
}
282+
163283
/**
164284
* Fallback for anti-join patterns where neither endpoint is at position 0.
165285
* Uses dense propagation + per-node anti-join checking.
@@ -252,21 +372,33 @@ && isInequalityViolation(anchorId, target, 0, checkPosition))
252372
}
253373
} else {
254374
// Case B (Q8): anchor is anti-join target. For each frontier node, check
255-
// whether it has an anti-join edge to the anchor. Use binary search on the
256-
// frontier node's sorted neighbor list.
257-
for (final int frontierNode : frontier) {
258-
// Inequality check
259-
if (inequalityIdxA >= 0 && inequalityIdxB >= 0
260-
&& isInequalityViolation(anchorId, frontierNode, 0, checkPosition))
261-
continue;
262-
263-
// Anti-join: check if frontierNode has an edge to anchor via the anti-join type/direction
264-
final int[] frontierAntiNbrs = provider.getNeighborIds(frontierNode,
265-
antiJoinDirection, antiJoinEdgeType);
266-
if (Arrays.binarySearch(frontierAntiNbrs, anchorId) >= 0)
267-
continue; // anti-join hit — exclude
268-
269-
count += computeTailCount(provider, frontierNode, validBuckets);
375+
// whether it has an anti-join edge to the anchor. Use pre-fetched NeighborView
376+
// + binary search on shared neighbors[] array to avoid per-node int[] allocation.
377+
final NeighborView antiView = provider.getNeighborView(antiJoinDirection, antiJoinEdgeType);
378+
if (antiView != null) {
379+
final int[] antiNbrs = antiView.neighbors();
380+
for (final int frontierNode : frontier) {
381+
if (inequalityIdxA >= 0 && inequalityIdxB >= 0
382+
&& isInequalityViolation(anchorId, frontierNode, 0, checkPosition))
383+
continue;
384+
// Binary search for anchorId in frontierNode's sorted anti-join neighbor range
385+
final int aStart = antiView.offset(frontierNode);
386+
final int aEnd = antiView.offsetEnd(frontierNode);
387+
if (Arrays.binarySearch(antiNbrs, aStart, aEnd, anchorId) >= 0)
388+
continue; // anti-join hit — exclude
389+
count += computeTailCount(provider, frontierNode, validBuckets);
390+
}
391+
} else {
392+
for (final int frontierNode : frontier) {
393+
if (inequalityIdxA >= 0 && inequalityIdxB >= 0
394+
&& isInequalityViolation(anchorId, frontierNode, 0, checkPosition))
395+
continue;
396+
final int[] frontierAntiNbrs = provider.getNeighborIds(frontierNode,
397+
antiJoinDirection, antiJoinEdgeType);
398+
if (Arrays.binarySearch(frontierAntiNbrs, anchorId) >= 0)
399+
continue;
400+
count += computeTailCount(provider, frontierNode, validBuckets);
401+
}
270402
}
271403
}
272404
return count;

engine/src/main/java/com/arcadedb/query/opencypher/executor/steps/PairHashJoinOp.java

Lines changed: 48 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,9 @@ private List<RID> walkArmOLTP(final Database db, final Vertex start, final Strin
228228
* For Q2 (2.6M Comments, arm1=1 hop, arm2=2 hops, probe=KNOWS BOTH):
229229
* Each Comment: 1 arm1 lookup + 2 arm2 lookups + 1 binary search = ~5 ops.
230230
* Total: 2.6M × 5 = 13M ops at ~3ns = ~39ms (vs ~400ms with HashMap).
231+
* <p>
232+
* When arm2 has exactly 2 hops (the Q2 case), fuses the arm2 walk with the probe
233+
* check inline to avoid allocating an intermediate int[] per build node.
231234
*/
232235
private long buildAndProbeInline(final NeighborView arm1View, final NeighborView[] arm2Views,
233236
final Set<Integer>[] arm2Buckets, final NeighborView probeView,
@@ -236,21 +239,63 @@ private long buildAndProbeInline(final NeighborView arm1View, final NeighborView
236239
final int[] probeNbrs = probeView.neighbors();
237240
long total = 0;
238241

242+
// FAST PATH: 2-hop arm2 with fused inline walk+probe (avoids per-node array allocation)
243+
if (arm2Views.length == 2) {
244+
final int[] arm2Nbrs0 = arm2Views[0].neighbors();
245+
final int[] arm2Nbrs1 = arm2Views[1].neighbors();
246+
final Set<Integer> arm2Filter0 = arm2Buckets != null ? arm2Buckets[0] : null;
247+
final Set<Integer> arm2Filter1 = arm2Buckets != null ? arm2Buckets[1] : null;
248+
249+
for (int startId = 0; startId < nodeCount; startId++) {
250+
final int a1Start = arm1View.offset(startId);
251+
final int a1End = arm1View.offsetEnd(startId);
252+
if (a1Start == a1End) continue;
253+
254+
// Inline arm2 hop 0: startId → intermediate nodes
255+
final int a2h0Start = arm2Views[0].offset(startId);
256+
final int a2h0End = arm2Views[0].offsetEnd(startId);
257+
if (a2h0Start == a2h0End) continue;
258+
259+
// For each arm1 endpoint, pre-fetch probe range
260+
for (int i = a1Start; i < a1End; i++) {
261+
final int ep1 = arm1Nbrs[i];
262+
final int pStart = probeView.offset(ep1);
263+
final int pEnd = probeView.offsetEnd(ep1);
264+
if (pStart == pEnd) continue;
265+
266+
// Walk arm2 inline: hop0 → hop1 → binary search probe
267+
for (int j = a2h0Start; j < a2h0End; j++) {
268+
final int mid = arm2Nbrs0[j];
269+
if (arm2Filter0 != null && !arm2Filter0.contains(bucketIds[mid])) continue;
270+
271+
final int a2h1Start = arm2Views[1].offset(mid);
272+
final int a2h1End = arm2Views[1].offsetEnd(mid);
273+
for (int k = a2h1Start; k < a2h1End; k++) {
274+
final int ep2 = arm2Nbrs1[k];
275+
if (arm2Filter1 != null && !arm2Filter1.contains(bucketIds[ep2])) continue;
276+
if (java.util.Arrays.binarySearch(probeNbrs, pStart, pEnd, ep2) >= 0)
277+
total++;
278+
}
279+
}
280+
}
281+
}
282+
return total;
283+
}
284+
285+
// GENERAL PATH: allocate arm2 endpoints array per build node
239286
for (int startId = 0; startId < nodeCount; startId++) {
240287
final int a1Start = arm1View.offset(startId);
241288
final int a1End = arm1View.offsetEnd(startId);
242289
if (a1Start == a1End) continue;
243290

244-
// Walk arm2 to get endpoints
245291
final int[] ep2Ids = walkArmWithViews(startId, arm2Views, arm2Buckets, bucketIds);
246292
if (ep2Ids.length == 0) continue;
247293

248-
// For each (ep1, ep2) pair, check if probe edge exists via binary search
249294
for (int i = a1Start; i < a1End; i++) {
250295
final int ep1 = arm1Nbrs[i];
251296
final int pStart = probeView.offset(ep1);
252297
final int pEnd = probeView.offsetEnd(ep1);
253-
if (pStart == pEnd) continue; // ep1 has no probe edges
298+
if (pStart == pEnd) continue;
254299

255300
for (final int ep2 : ep2Ids) {
256301
if (java.util.Arrays.binarySearch(probeNbrs, pStart, pEnd, ep2) >= 0)

0 commit comments

Comments
 (0)