Skip to content

Commit 4738cd6

Browse files
committed
fix(spanner): don't set sticky AFFINITY_KEY for multiplexed sessions
Under grpc-gcp (default since 6.105.0, #4239), newCallContext set GcpManagedChannel.AFFINITY_KEY on every data RPC. GcpManagedChannel binds each new key via pickLeastBusyChannel, which reads activeStreamsCount before any concurrent caller's start() has incremented it (tiebreak channelRefs[0]). A high-concurrency cold start therefore binds keys to channel 0 and the bindings are sticky, so RPCs funnel through one HTTP/2 connection and queue at MAX_CONCURRENT_STREAMS. The static-numChannels path bounded the key to ~2*numChannels-1 distinct values, making the collapse permanent (~6x throughput regression at 400 concurrent). The dynamic-channel-pool path used per-transaction random keys and largely self-corrected, but a few BitSet-recycled hints still sticky-bound, leaving a p99 tail. Multiplexed sessions get no backend-locality benefit from sticky per-transaction channel affinity, so don't set the key under grpc-gcp at all. getChannelRef(null) does a fresh per-call least-busy pick with no sticky binding and no affinity-map growth. Drops the now-unobservable distinct-AFFINITY_KEY assertions from RetryOnDifferentGrpcChannelMockServerTest; the request-count and session assertions still cover the retry loop.
1 parent 15faaaa commit 4738cd6

File tree

2 files changed

+7
-120
lines changed

2 files changed

+7
-120
lines changed

java-spanner/google-cloud-spanner/src/main/java/com/google/cloud/spanner/spi/v1/GapicSpannerRpc.java

Lines changed: 6 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2275,30 +2275,12 @@ <ReqT, RespT> GrpcCallContext newCallContext(
22752275
Long affinity = options == null ? null : Option.CHANNEL_HINT.getLong(options);
22762276
if (affinity != null) {
22772277
if (this.isGrpcGcpExtensionEnabled) {
2278-
// Set channel affinity in gRPC-GCP.
2279-
String affinityKey;
2280-
if (this.isDynamicChannelPoolEnabled) {
2281-
// When dynamic channel pooling is enabled, we use the raw affinity value as the key.
2282-
// This allows grpc-gcp to use round-robin for new keys, enabling new channels
2283-
// (created during scale-up) to receive requests. The affinity key lifetime setting
2284-
// ensures the affinity map doesn't grow unbounded.
2285-
affinityKey = String.valueOf(affinity);
2286-
} else {
2287-
// When DCP is disabled, compute bounded channel hint to prevent
2288-
// gRPC-GCP affinity map from getting unbounded.
2289-
int boundedChannelHint = affinity.intValue() % this.numChannels;
2290-
affinityKey = String.valueOf(boundedChannelHint);
2291-
}
2292-
context =
2293-
context.withCallOptions(
2294-
context.getCallOptions().withOption(GcpManagedChannel.AFFINITY_KEY, affinityKey));
2295-
// Check if the caller wants to unbind the affinity key after this call completes.
2296-
Boolean unbind = Option.UNBIND_CHANNEL_HINT.get(options);
2297-
if (Boolean.TRUE.equals(unbind)) {
2298-
context =
2299-
context.withCallOptions(
2300-
context.getCallOptions().withOption(GcpManagedChannel.UNBIND_AFFINITY_KEY, true));
2301-
}
2278+
// Do not set AFFINITY_KEY under grpc-gcp. Multiplexed sessions get no backend-locality
2279+
// benefit from sticky per-transaction channel affinity, and setting one causes
2280+
// GcpManagedChannel.pickLeastBusyChannel to permanently bind keys to channel 0 under a
2281+
// concurrent warmup burst (it reads activeStreamsCount before any caller's start() has
2282+
// incremented it). With no key, getChannelRef(null) does a fresh per-call least-busy pick
2283+
// with no sticky binding and no affinity-map growth.
23022284
} else {
23032285
// Set channel affinity in GAX.
23042286
context = context.withChannelAffinity(affinity.intValue());

java-spanner/google-cloud-spanner/src/test/java/com/google/cloud/spanner/RetryOnDifferentGrpcChannelMockServerTest.java

Lines changed: 1 addition & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -23,30 +23,18 @@
2323
import static org.junit.Assert.assertTrue;
2424
import static org.junit.Assume.assumeFalse;
2525

26-
import com.google.api.gax.grpc.GrpcInterceptorProvider;
2726
import com.google.cloud.NoCredentials;
28-
import com.google.cloud.grpc.GcpManagedChannel;
2927
import com.google.cloud.spanner.MockSpannerServiceImpl.SimulatedExecutionTime;
3028
import com.google.cloud.spanner.connection.AbstractMockServerTest;
31-
import com.google.common.collect.ImmutableList;
3229
import com.google.spanner.v1.BatchCreateSessionsRequest;
3330
import com.google.spanner.v1.BeginTransactionRequest;
3431
import com.google.spanner.v1.ExecuteSqlRequest;
35-
import com.google.spanner.v1.SpannerGrpc;
36-
import io.grpc.CallOptions;
37-
import io.grpc.Channel;
38-
import io.grpc.ClientCall;
39-
import io.grpc.ClientInterceptor;
4032
import io.grpc.Context;
4133
import io.grpc.Deadline;
4234
import io.grpc.ManagedChannelBuilder;
43-
import io.grpc.MethodDescriptor;
4435
import io.grpc.Status;
4536
import java.time.Duration;
46-
import java.util.HashMap;
47-
import java.util.HashSet;
4837
import java.util.List;
49-
import java.util.Map;
5038
import java.util.Set;
5139
import java.util.concurrent.Executors;
5240
import java.util.concurrent.ScheduledExecutorService;
@@ -62,11 +50,6 @@
6250

6351
@RunWith(JUnit4.class)
6452
public class RetryOnDifferentGrpcChannelMockServerTest extends AbstractMockServerTest {
65-
/** Tracks the logical affinity keys before grpc-gcp routes the request. */
66-
private static final Map<String, Set<String>> LOGICAL_AFFINITY_KEYS = new HashMap<>();
67-
68-
/** Tracks how many calls explicitly request grpc-gcp affinity unbind. */
69-
private static final Map<String, Integer> UNBIND_AFFINITY_CALL_COUNTS = new HashMap<>();
7053

7154
@BeforeClass
7255
public static void setupAndStartServer() throws Exception {
@@ -82,52 +65,16 @@ public static void removeSystemProperty() {
8265

8366
@After
8467
public void clearRequests() {
85-
LOGICAL_AFFINITY_KEYS.clear();
86-
UNBIND_AFFINITY_CALL_COUNTS.clear();
8768
mockSpanner.clearRequests();
8869
mockSpanner.removeAllExecutionTimes();
8970
}
9071

91-
/**
92-
* Creates a client interceptor that captures the logical affinity key before grpc-gcp routes the
93-
* request. This allows us to verify that retry logic uses distinct logical channel hints, even
94-
* when DCP maps them to fewer physical channels.
95-
*/
96-
static GrpcInterceptorProvider createAffinityKeyInterceptorProvider() {
97-
return () ->
98-
ImmutableList.of(
99-
new ClientInterceptor() {
100-
@Override
101-
public <ReqT, RespT> ClientCall<ReqT, RespT> interceptCall(
102-
MethodDescriptor<ReqT, RespT> method, CallOptions callOptions, Channel next) {
103-
String methodName = method.getFullMethodName();
104-
// Capture the AFFINITY_KEY before grpc-gcp processes it.
105-
String affinityKey = callOptions.getOption(GcpManagedChannel.AFFINITY_KEY);
106-
if (affinityKey != null) {
107-
synchronized (LOGICAL_AFFINITY_KEYS) {
108-
Set<String> keys =
109-
LOGICAL_AFFINITY_KEYS.computeIfAbsent(methodName, k -> new HashSet<>());
110-
keys.add(affinityKey);
111-
}
112-
}
113-
if (Boolean.TRUE.equals(
114-
callOptions.getOption(GcpManagedChannel.UNBIND_AFFINITY_KEY))) {
115-
synchronized (UNBIND_AFFINITY_CALL_COUNTS) {
116-
UNBIND_AFFINITY_CALL_COUNTS.merge(methodName, 1, Integer::sum);
117-
}
118-
}
119-
return next.newCall(method, callOptions);
120-
}
121-
});
122-
}
123-
12472
SpannerOptions.Builder createSpannerOptionsBuilder() {
12573
return SpannerOptions.newBuilder()
12674
.setProjectId("my-project")
12775
.setHost(String.format("http://localhost:%d", getPort()))
12876
.setChannelConfigurator(ManagedChannelBuilder::usePlaintext)
129-
.setCredentials(NoCredentials.getInstance())
130-
.setInterceptorProvider(createAffinityKeyInterceptorProvider());
77+
.setCredentials(NoCredentials.getInstance());
13178
}
13279

13380
@Test
@@ -163,12 +110,6 @@ public void testReadWriteTransaction_retriesOnNewChannel() {
163110
List<BeginTransactionRequest> requests =
164111
mockSpanner.getRequestsOfType(BeginTransactionRequest.class);
165112
assertNotEquals(requests.get(0).getSession(), requests.get(1).getSession());
166-
// Verify that the retry used 2 distinct logical affinity keys (before grpc-gcp routing).
167-
assertEquals(
168-
2,
169-
LOGICAL_AFFINITY_KEYS
170-
.getOrDefault("google.spanner.v1.Spanner/BeginTransaction", new HashSet<>())
171-
.size());
172113
}
173114

174115
@Test
@@ -208,13 +149,6 @@ public void testReadWriteTransaction_stopsRetrying() {
208149
Set<String> sessions =
209150
requests.stream().map(BeginTransactionRequest::getSession).collect(Collectors.toSet());
210151
assertEquals(numChannels, sessions.size());
211-
// Verify that the retry logic used distinct logical affinity keys (before grpc-gcp routing).
212-
// This confirms each retry attempt targeted a different logical channel.
213-
assertEquals(
214-
numChannels,
215-
LOGICAL_AFFINITY_KEYS
216-
.getOrDefault("google.spanner.v1.Spanner/BeginTransaction", new HashSet<>())
217-
.size());
218152
}
219153
}
220154

@@ -284,13 +218,6 @@ public void testDenyListedChannelIsCleared() {
284218
// of the first transaction. That fails, the session is deny-listed, the transaction is
285219
// retried on yet another session and succeeds.
286220
assertEquals(numChannels + 1, sessions.size());
287-
// Verify that the retry logic used distinct logical affinity keys (before grpc-gcp routing).
288-
// This confirms each retry attempt targeted a different logical channel.
289-
assertEquals(
290-
numChannels,
291-
LOGICAL_AFFINITY_KEYS
292-
.getOrDefault("google.spanner.v1.Spanner/BeginTransaction", new HashSet<>())
293-
.size());
294221
assertEquals(numChannels, mockSpanner.countRequestsOfType(BatchCreateSessionsRequest.class));
295222
}
296223
}
@@ -316,17 +243,6 @@ public void testSingleUseQuery_retriesOnNewChannel() {
316243
List<ExecuteSqlRequest> requests = mockSpanner.getRequestsOfType(ExecuteSqlRequest.class);
317244
// The requests use the same multiplexed session.
318245
assertEquals(requests.get(0).getSession(), requests.get(1).getSession());
319-
// Verify that the retry used 2 distinct logical affinity keys (before grpc-gcp routing).
320-
assertEquals(
321-
2,
322-
LOGICAL_AFFINITY_KEYS
323-
.getOrDefault("google.spanner.v1.Spanner/ExecuteStreamingSql", new HashSet<>())
324-
.size());
325-
assertEquals(
326-
2,
327-
UNBIND_AFFINITY_CALL_COUNTS
328-
.getOrDefault(SpannerGrpc.getExecuteStreamingSqlMethod().getFullMethodName(), 0)
329-
.intValue());
330246
}
331247

332248
@Test
@@ -354,17 +270,6 @@ public void testSingleUseQuery_stopsRetrying() {
354270
// Verify that the retry mechanism is working (made numChannels requests).
355271
int totalRequests = mockSpanner.countRequestsOfType(ExecuteSqlRequest.class);
356272
assertEquals(numChannels, totalRequests);
357-
// Verify each attempt used a distinct logical affinity key (before grpc-gcp routing).
358-
int distinctLogicalKeys =
359-
LOGICAL_AFFINITY_KEYS
360-
.getOrDefault("google.spanner.v1.Spanner/ExecuteStreamingSql", new HashSet<>())
361-
.size();
362-
assertEquals(totalRequests, distinctLogicalKeys);
363-
assertEquals(
364-
totalRequests,
365-
UNBIND_AFFINITY_CALL_COUNTS
366-
.getOrDefault(SpannerGrpc.getExecuteStreamingSqlMethod().getFullMethodName(), 0)
367-
.intValue());
368273
}
369274
}
370275

0 commit comments

Comments
 (0)