Skip to content

Commit 1f98936

Browse files
committed
Fix LWT prepared statement routing failure in PRESERVE_REPLICA_ORDER mode
Addresses issue #830 where LWT prepared statements fail with "No node was available" error when using PRESERVE_REPLICA_ORDER routing (default in 4.19.0.5). Changes: - Enhanced newQueryPlanPreserveReplicas to include non-replica nodes with proper DC prioritization - Added fallback logic to prevent empty query plans for prepared statements - Implemented routing-key-based consistent rotation for non-replica nodes - Replaced diceRoll1d4() with universal randomNextInt(bound) method - Updated tests to validate new behavior including randomness testing The fix maintains replica order preservation while ensuring query plans always contain nodes, resolving the regression introduced in 4.19.0.5.
1 parent 93a3deb commit 1f98936

8 files changed

Lines changed: 616 additions & 335 deletions

File tree

core/src/main/java/com/datastax/oss/driver/internal/core/loadbalancing/BasicLoadBalancingPolicy.java

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
import com.datastax.oss.driver.api.core.ConsistencyLevel;
2727
import com.datastax.oss.driver.api.core.CqlIdentifier;
28+
import com.datastax.oss.driver.api.core.RequestRoutingType;
2829
import com.datastax.oss.driver.api.core.config.DefaultDriverOption;
2930
import com.datastax.oss.driver.api.core.config.DriverExecutionProfile;
3031
import com.datastax.oss.driver.api.core.context.DriverContext;
@@ -63,6 +64,9 @@
6364
import edu.umd.cs.findbugs.annotations.NonNull;
6465
import edu.umd.cs.findbugs.annotations.Nullable;
6566
import java.nio.ByteBuffer;
67+
import java.util.ArrayList;
68+
import java.util.Collections;
69+
import java.util.LinkedHashMap;
6670
import java.util.LinkedHashSet;
6771
import java.util.List;
6872
import java.util.Map;
@@ -71,8 +75,10 @@
7175
import java.util.Queue;
7276
import java.util.Set;
7377
import java.util.UUID;
78+
import java.util.concurrent.ThreadLocalRandom;
7479
import java.util.concurrent.atomic.AtomicInteger;
7580
import java.util.function.IntUnaryOperator;
81+
import java.util.stream.Collectors;
7682
import net.jcip.annotations.ThreadSafe;
7783
import org.slf4j.Logger;
7884
import org.slf4j.LoggerFactory;
@@ -113,6 +119,11 @@
113119
@ThreadSafe
114120
public class BasicLoadBalancingPolicy implements LoadBalancingPolicy {
115121

122+
public enum RequestRoutingMethod {
123+
REGULAR,
124+
PRESERVE_REPLICA_ORDER
125+
}
126+
116127
private static final Logger LOG = LoggerFactory.getLogger(BasicLoadBalancingPolicy.class);
117128

118129
protected static final IntUnaryOperator INCREMENT = i -> (i == Integer.MAX_VALUE) ? 0 : i + 1;
@@ -127,6 +138,7 @@ public class BasicLoadBalancingPolicy implements LoadBalancingPolicy {
127138
private final int maxNodesPerRemoteDc;
128139
private final boolean allowDcFailoverForLocalCl;
129140
private final ConsistencyLevel defaultConsistencyLevel;
141+
private final RequestRoutingMethod lwtRequestRoutingMethod;
130142

131143
// private because they should be set in init() and never be modified after
132144
private volatile DistanceReporter distanceReporter;
@@ -154,6 +166,34 @@ public BasicLoadBalancingPolicy(@NonNull DriverContext context, @NonNull String
154166
new LinkedHashSet<>(
155167
profile.getStringList(
156168
DefaultDriverOption.LOAD_BALANCING_DC_FAILOVER_PREFERRED_REMOTE_DCS));
169+
this.lwtRequestRoutingMethod = parseLwtRequestRoutingMethod();
170+
}
171+
172+
@NonNull
173+
private RequestRoutingMethod parseLwtRequestRoutingMethod() {
174+
String methodString =
175+
profile.getString(DefaultDriverOption.LOAD_BALANCING_DEFAULT_LWT_REQUEST_ROUTING_METHOD);
176+
try {
177+
return RequestRoutingMethod.valueOf(methodString.toUpperCase());
178+
} catch (IllegalArgumentException e) {
179+
LOG.warn(
180+
"[{}] Unknown request routing method '{}', defaulting to PRESERVE_REPLICA_ORDER",
181+
logPrefix,
182+
methodString);
183+
return RequestRoutingMethod.PRESERVE_REPLICA_ORDER;
184+
}
185+
}
186+
187+
@NonNull
188+
public RequestRoutingMethod getRequestRoutingMethod(@Nullable Request request) {
189+
if (request == null) {
190+
return RequestRoutingMethod.REGULAR;
191+
}
192+
if (request.getRequestRoutingType() == RequestRoutingType.LWT) {
193+
return lwtRequestRoutingMethod;
194+
} else {
195+
return RequestRoutingMethod.REGULAR;
196+
}
157197
}
158198

159199
/**
@@ -260,6 +300,17 @@ protected NodeDistanceEvaluator createNodeDistanceEvaluator(
260300
@NonNull
261301
@Override
262302
public Queue<Node> newQueryPlan(@Nullable Request request, @Nullable Session session) {
303+
switch (getRequestRoutingMethod(request)) {
304+
case PRESERVE_REPLICA_ORDER:
305+
return newQueryPlanPreserveReplicas(request, session);
306+
case REGULAR:
307+
default:
308+
return newQueryPlanRegular(request, session);
309+
}
310+
}
311+
312+
@NonNull
313+
protected Queue<Node> newQueryPlanRegular(@Nullable Request request, @Nullable Session session) {
263314
// Take a snapshot since the set is concurrent:
264315
Object[] currentNodes = liveNodes.dc(localDc).toArray();
265316

@@ -294,6 +345,117 @@ public Queue<Node> newQueryPlan(@Nullable Request request, @Nullable Session ses
294345
return maybeAddDcFailover(request, plan);
295346
}
296347

348+
/**
349+
* Builds a query plan that preserves replica order: local replicas, remote replicas, local
350+
* non-replicas (rotated), remote non-replicas (rotated).
351+
*/
352+
@NonNull
353+
protected Queue<Node> newQueryPlanPreserveReplicas(
354+
@Nullable Request request, @Nullable Session session) {
355+
List<Node> replicas = getReplicas(request, session);
356+
String localDc = getLocalDatacenter();
357+
List<Node> queryPlan = new ArrayList<>();
358+
359+
if (localDc == null) {
360+
// No local DC: all replicas first, then rotated non-replicas
361+
List<Node> allNodes = new ArrayList<>();
362+
for (Object obj : getLiveNodes().dc(null).toArray()) {
363+
allNodes.add((Node) obj);
364+
}
365+
queryPlan.addAll(replicas);
366+
addRotatedNonReplicas(queryPlan, allNodes, replicas, request);
367+
} else {
368+
// With local DC: prioritize local, then remote
369+
Map<String, List<Node>> nodesByDc = getAllNodesByDc();
370+
addReplicasByDc(queryPlan, replicas, localDc);
371+
addNonReplicasByDc(queryPlan, nodesByDc, replicas, localDc, request);
372+
}
373+
374+
return new SimpleQueryPlan(queryPlan.toArray());
375+
}
376+
377+
/** Collect all live nodes grouped by DC, with preferred remote DCs ordered first. */
378+
private Map<String, List<Node>> getAllNodesByDc() {
379+
Map<String, List<Node>> nodesByDc = new LinkedHashMap<>();
380+
Set<String> allDcs = getLiveNodes().dcs();
381+
// Add preferred remote DCs first (in configured order)
382+
for (String dc : preferredRemoteDcs) {
383+
if (allDcs.contains(dc)) {
384+
nodesByDc.put(dc, dcNodeList(dc));
385+
}
386+
}
387+
// Add remaining DCs
388+
for (String dc : allDcs) {
389+
if (!nodesByDc.containsKey(dc)) {
390+
nodesByDc.put(dc, dcNodeList(dc));
391+
}
392+
}
393+
return nodesByDc;
394+
}
395+
396+
private List<Node> dcNodeList(String dc) {
397+
List<Node> dcNodes = new ArrayList<>();
398+
for (Object obj : getLiveNodes().dc(dc).toArray()) {
399+
dcNodes.add((Node) obj);
400+
}
401+
return dcNodes;
402+
}
403+
404+
/** Add replicas with local DC first, then remote DCs. */
405+
private void addReplicasByDc(List<Node> queryPlan, List<Node> replicas, String localDc) {
406+
replicas.stream()
407+
.filter(r -> Objects.equals(r.getDatacenter(), localDc))
408+
.forEach(queryPlan::add);
409+
replicas.stream()
410+
.filter(r -> !Objects.equals(r.getDatacenter(), localDc))
411+
.forEach(queryPlan::add);
412+
}
413+
414+
/** Add non-replicas with local DC first, then remote DCs (all rotated). */
415+
private void addNonReplicasByDc(
416+
List<Node> queryPlan,
417+
Map<String, List<Node>> nodesByDc,
418+
List<Node> replicas,
419+
String localDc,
420+
Request request) {
421+
// Local DC non-replicas first
422+
List<Node> localNodes = nodesByDc.get(localDc);
423+
if (localNodes != null) {
424+
addRotatedNonReplicas(queryPlan, localNodes, replicas, request);
425+
}
426+
// Remote DC non-replicas
427+
for (Map.Entry<String, List<Node>> entry : nodesByDc.entrySet()) {
428+
if (!Objects.equals(entry.getKey(), localDc)) {
429+
addRotatedNonReplicas(queryPlan, entry.getValue(), replicas, request);
430+
}
431+
}
432+
}
433+
434+
/** Add non-replica nodes from given list with rotation. */
435+
private void addRotatedNonReplicas(
436+
List<Node> queryPlan, List<Node> nodes, List<Node> replicas, Request request) {
437+
List<Node> nonReplicas =
438+
nodes.stream().filter(n -> !replicas.contains(n)).collect(Collectors.toList());
439+
if (!nonReplicas.isEmpty()) {
440+
rotateNonReplicas(nonReplicas, request);
441+
queryPlan.addAll(nonReplicas);
442+
}
443+
}
444+
445+
/** Rotates nodes based on routing key (consistent) or randomly. */
446+
private void rotateNonReplicas(List<Node> nodes, @Nullable Request request) {
447+
if (nodes.size() <= 1) return;
448+
449+
int rotationAmount =
450+
(request != null && request.getRoutingKey() != null)
451+
? (request.getRoutingKey().hashCode() & 0x7fffffff) % nodes.size()
452+
: randomNextInt(nodes.size());
453+
454+
if (rotationAmount > 0) {
455+
Collections.rotate(nodes, -rotationAmount);
456+
}
457+
}
458+
297459
@NonNull
298460
protected List<Node> getReplicas(@Nullable Request request, @Nullable Session session) {
299461
if (request == null || session == null) {
@@ -441,6 +603,11 @@ protected Object[] computeNodes() {
441603
return new CompositeQueryPlan(queryPlans);
442604
}
443605

606+
/** Exposed as a protected method so that it can be accessed by tests */
607+
protected int randomNextInt(int bound) {
608+
return ThreadLocalRandom.current().nextInt(bound);
609+
}
610+
444611
/** Exposed as a protected method so that it can be accessed by tests */
445612
protected void shuffleHead(Object[] currentNodes, int headLength) {
446613
ArrayUtils.shuffleHead(currentNodes, headLength);

core/src/main/java/com/datastax/oss/driver/internal/core/loadbalancing/DefaultLoadBalancingPolicy.java

Lines changed: 3 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import static java.util.concurrent.TimeUnit.MILLISECONDS;
2121
import static java.util.concurrent.TimeUnit.MINUTES;
2222

23-
import com.datastax.oss.driver.api.core.RequestRoutingType;
2423
import com.datastax.oss.driver.api.core.config.DefaultDriverOption;
2524
import com.datastax.oss.driver.api.core.config.DriverExecutionProfile;
2625
import com.datastax.oss.driver.api.core.context.DriverContext;
@@ -48,7 +47,6 @@
4847
import java.util.UUID;
4948
import java.util.concurrent.ConcurrentHashMap;
5049
import java.util.concurrent.ConcurrentMap;
51-
import java.util.concurrent.ThreadLocalRandom;
5250
import java.util.concurrent.atomic.AtomicLongArray;
5351
import net.jcip.annotations.ThreadSafe;
5452
import org.slf4j.Logger;
@@ -96,11 +94,6 @@
9694
@ThreadSafe
9795
public class DefaultLoadBalancingPolicy extends BasicLoadBalancingPolicy implements RequestTracker {
9896

99-
public enum RequestRoutingMethod {
100-
REGULAR,
101-
PRESERVE_REPLICA_ORDER
102-
}
103-
10497
private static final Logger LOG = LoggerFactory.getLogger(DefaultLoadBalancingPolicy.class);
10598

10699
private static final long NEWLY_UP_INTERVAL_NANOS = MINUTES.toNanos(1);
@@ -110,31 +103,14 @@ public enum RequestRoutingMethod {
110103
protected final ConcurrentMap<Node, NodeResponseRateSample> responseTimes;
111104
protected final Map<Node, Long> upTimes = new ConcurrentHashMap<>();
112105
private final boolean avoidSlowReplicas;
113-
private final RequestRoutingMethod lwtRequestRoutingMethod;
114106

115107
public DefaultLoadBalancingPolicy(@NonNull DriverContext context, @NonNull String profileName) {
116108
super(context, profileName);
117109
this.avoidSlowReplicas =
118110
profile.getBoolean(DefaultDriverOption.LOAD_BALANCING_POLICY_SLOW_AVOIDANCE, true);
119-
this.lwtRequestRoutingMethod = getDefaultLWTRequestRoutingMethod();
120111
this.responseTimes = new MapMaker().weakKeys().makeMap();
121112
}
122113

123-
@NonNull
124-
private RequestRoutingMethod getDefaultLWTRequestRoutingMethod() {
125-
String methodString =
126-
profile.getString(DefaultDriverOption.LOAD_BALANCING_DEFAULT_LWT_REQUEST_ROUTING_METHOD);
127-
try {
128-
return RequestRoutingMethod.valueOf(methodString.toUpperCase());
129-
} catch (IllegalArgumentException e) {
130-
LOG.warn(
131-
"[{}] Unknown request routing method '{}', defaulting to PRESERVE_REPLICA_ORDER",
132-
logPrefix,
133-
methodString);
134-
return RequestRoutingMethod.PRESERVE_REPLICA_ORDER;
135-
}
136-
}
137-
138114
@NonNull
139115
@Override
140116
public Optional<RequestTracker> getRequestTracker() {
@@ -151,52 +127,13 @@ protected Optional<String> discoverLocalDc(@NonNull Map<UUID, Node> nodes) {
151127
return new MandatoryLocalDcHelper(context, profile, logPrefix).discoverLocalDc(nodes);
152128
}
153129

154-
@NonNull
155-
public RequestRoutingMethod getDefaultLWTRequestRoutingMethod(@Nullable Request request) {
156-
if (request == null) {
157-
return RequestRoutingMethod.REGULAR;
158-
}
159-
if (request.getRequestRoutingType() == RequestRoutingType.LWT) {
160-
return lwtRequestRoutingMethod;
161-
} else {
162-
return RequestRoutingMethod.REGULAR;
163-
}
164-
}
165-
166-
@NonNull
167-
@Override
168-
public Queue<Node> newQueryPlan(@Nullable Request request, @Nullable Session session) {
169-
switch (getDefaultLWTRequestRoutingMethod(request)) {
170-
case PRESERVE_REPLICA_ORDER:
171-
return newQueryPlanPreserveReplicas(request, session);
172-
case REGULAR:
173-
default:
174-
return newQueryPlanRegular(request, session);
175-
}
176-
}
177-
178-
/**
179-
* Builds a query plan that preserves the replica order as returned by the load balancing
180-
* strategy, while pushing non-local replicas after local ones.
181-
*/
182-
@NonNull
183-
public Queue<Node> newQueryPlanPreserveReplicas(
184-
@Nullable Request request, @Nullable Session session) {
185-
List<Node> replicas = getReplicas(request, session);
186-
String localDc = getLocalDatacenter();
187-
if (localDc == null || replicas.isEmpty()) {
188-
return new SimpleQueryPlan(replicas.toArray());
189-
}
190-
191-
return new SimpleQueryPlan(moveNonLocalReplicasToTheEnd(replicas, localDc));
192-
}
193-
194130
/**
195131
* Builds a query plan that prioritizes local replicas, shuffles them for balance, and then
196132
* round-robins the remaining local nodes.
197133
*/
198134
@NonNull
199-
public Queue<Node> newQueryPlanRegular(@Nullable Request request, @Nullable Session session) {
135+
@Override
136+
protected Queue<Node> newQueryPlanRegular(@Nullable Request request, @Nullable Session session) {
200137
List<Node> replicas = getReplicas(request, session);
201138
Object[] currentNodes = getLiveNodes().dc(getLocalDatacenter()).toArray();
202139
int replicaCount = 0; // in currentNodes
@@ -228,26 +165,6 @@ public Queue<Node> newQueryPlanRegular(@Nullable Request request, @Nullable Sess
228165
return maybeAddDcFailover(request, plan);
229166
}
230167

231-
/**
232-
* Returns a replica array with local-datacenter replicas first and remote replicas preserved at
233-
* the end.
234-
*/
235-
private static Object[] moveNonLocalReplicasToTheEnd(List<Node> replicas, String localDc) {
236-
Object[] orderedReplicas = new Object[replicas.size()];
237-
int index = 0;
238-
for (Node replica : replicas) {
239-
if (Objects.equals(replica.getDatacenter(), localDc)) {
240-
orderedReplicas[index++] = replica;
241-
}
242-
}
243-
for (Node replica : replicas) {
244-
if (!Objects.equals(replica.getDatacenter(), localDc)) {
245-
orderedReplicas[index++] = replica;
246-
}
247-
}
248-
return orderedReplicas;
249-
}
250-
251168
private int[] moveReplicasToFront(Object[] currentNodes, List<Node> allReplicas) {
252169
int replicaCount = 0, localRackReplicaCount = 0;
253170
for (int i = 0; i < currentNodes.length; i++) {
@@ -329,7 +246,7 @@ private void avoidSlowReplicas(
329246
// - the replica in first or second position is the most recent replica marked as UP and
330247
// - dice roll 1d4 != 1
331248
else if ((newestUpReplica == currentNodes[0] || newestUpReplica == currentNodes[1])
332-
&& diceRoll1d4() != 1) {
249+
&& randomNextInt(4) != 1) {
333250

334251
// Send it to the back of the replicas
335252
ArrayUtils.bubbleDown(
@@ -370,11 +287,6 @@ protected long nanoTime() {
370287
return System.nanoTime();
371288
}
372289

373-
/** Exposed as a protected method so that it can be accessed by tests */
374-
protected int diceRoll1d4() {
375-
return ThreadLocalRandom.current().nextInt(4);
376-
}
377-
378290
protected boolean isUnhealthy(@NonNull Node node, @NonNull Session session, long now) {
379291
return isBusy(node, session) && isResponseRateInsufficient(node, now);
380292
}

0 commit comments

Comments
 (0)