Skip to content

Commit 7b3835c

Browse files
authored
[fix][broker] PIP-468: StreamConsumer / CheckpointConsumer DAG-replay with parent-drain ordering (#25642)
1 parent c800b52 commit 7b3835c

11 files changed

Lines changed: 1290 additions & 15 deletions

File tree

pulsar-broker/src/main/java/org/apache/pulsar/broker/admin/v2/Segments.java

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import javax.ws.rs.DELETE;
2929
import javax.ws.rs.DefaultValue;
3030
import javax.ws.rs.Encoded;
31+
import javax.ws.rs.GET;
3132
import javax.ws.rs.POST;
3233
import javax.ws.rs.PUT;
3334
import javax.ws.rs.Path;
@@ -174,6 +175,59 @@ public void terminateSegment(
174175
});
175176
}
176177

178+
@GET
179+
@Path("/{tenant}/{namespace}/{topic}/{descriptor}/subscription/{subscription}/backlog")
180+
@ApiOperation(value = "Number of unconsumed entries in the segment topic for the "
181+
+ "given subscription. Super-user only.")
182+
@ApiResponses(value = {
183+
@ApiResponse(code = 401, message = "This operation requires super-user access"),
184+
@ApiResponse(code = 403, message = "This operation requires super-user access"),
185+
@ApiResponse(code = 404, message = "Segment topic or subscription not found"),
186+
@ApiResponse(code = 500, message = "Internal server error")})
187+
public void getSubscriptionBacklog(
188+
@Suspended final AsyncResponse asyncResponse,
189+
@ApiParam(value = "Specify the tenant", required = true)
190+
@PathParam("tenant") String tenant,
191+
@ApiParam(value = "Specify the namespace", required = true)
192+
@PathParam("namespace") String namespace,
193+
@ApiParam(value = "Specify the parent topic name", required = true)
194+
@PathParam("topic") @Encoded String encodedTopic,
195+
@ApiParam(value = "Segment descriptor (e.g. 0000-7fff-1)", required = true)
196+
@PathParam("descriptor") String descriptor,
197+
@ApiParam(value = "Subscription name", required = true)
198+
@PathParam("subscription") String subscription,
199+
@ApiParam(value = "Whether leader broker redirected this call to this broker.")
200+
@QueryParam("authoritative") @DefaultValue("false") boolean authoritative) {
201+
validateNamespaceName(tenant, namespace);
202+
TopicName segmentTopic = segmentTopicName(tenant, namespace, encodedTopic, descriptor);
203+
204+
validateSuperUserAccessAsync()
205+
.thenCompose(__ -> validateTopicOwnershipAsync(segmentTopic, authoritative))
206+
.thenCompose(__ -> pulsar().getBrokerService().getTopicIfExists(segmentTopic.toString()))
207+
.thenAccept(optTopic -> {
208+
if (optTopic.isEmpty()) {
209+
// No topic loaded → no subscription cursor → no backlog. Returning
210+
// 0 here would be wrong (caller might mark the segment drained on
211+
// a topic that simply hasn't loaded yet); a 404 forces the caller
212+
// to retry, which matches our drain-poll contract.
213+
throw new RestException(Response.Status.NOT_FOUND,
214+
"Segment topic not loaded: " + segmentTopic);
215+
}
216+
var sub = optTopic.get().getSubscription(subscription);
217+
if (sub == null) {
218+
throw new RestException(Response.Status.NOT_FOUND,
219+
"Subscription not found on segment: " + subscription);
220+
}
221+
asyncResponse.resume(sub.getNumberOfEntriesInBacklog(false));
222+
})
223+
.exceptionally(ex -> {
224+
log.error().attr("clientAppId", clientAppId()).attr("segment", segmentTopic)
225+
.exception(ex).log("Failed to get segment subscription backlog");
226+
resumeAsyncResponseExceptionally(asyncResponse, ex);
227+
return null;
228+
});
229+
}
230+
177231
@DELETE
178232
@Path("/{tenant}/{namespace}/{topic}/{descriptor}")
179233
@ApiOperation(value = "Delete a segment topic. Super-user only.")

pulsar-broker/src/main/java/org/apache/pulsar/broker/service/ServerCnx.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -879,6 +879,8 @@ protected void handleCommandScalableTopicSubscribe(
879879
final String subscription = commandScalableTopicSubscribe.getSubscription();
880880
final String consumerName = commandScalableTopicSubscribe.getConsumerName();
881881
final long consumerId = commandScalableTopicSubscribe.getConsumerId();
882+
final org.apache.pulsar.common.api.proto.ScalableConsumerType consumerType =
883+
commandScalableTopicSubscribe.getConsumerType();
882884

883885
log.debug().attr("topic", topicStr).attr("subscription", subscription)
884886
.attr("consumerName", consumerName).attr("requestId", requestId)
@@ -920,7 +922,7 @@ protected void handleCommandScalableTopicSubscribe(
920922
return;
921923
}
922924
scalableTopicService.registerConsumer(topicName, subscription, consumerName,
923-
consumerId, this)
925+
consumerId, consumerType, this)
924926
.whenCompleteAsync((assignment, ex) -> {
925927
if (ex != null) {
926928
Throwable cause = ex.getCause() != null ? ex.getCause() : ex;

pulsar-broker/src/main/java/org/apache/pulsar/broker/service/scalable/ScalableTopicController.java

Lines changed: 94 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import org.apache.pulsar.broker.resources.ScalableTopicResources;
3232
import org.apache.pulsar.broker.service.BrokerService;
3333
import org.apache.pulsar.broker.service.TransportCnx;
34+
import org.apache.pulsar.common.api.proto.ScalableConsumerType;
3435
import org.apache.pulsar.common.naming.TopicName;
3536
import org.apache.pulsar.common.scalable.HashRange;
3637
import org.apache.pulsar.common.scalable.SegmentInfo;
@@ -155,7 +156,32 @@ private CompletableFuture<Void> restoreSubscription(String subscription) {
155156
});
156157
}
157158

159+
/**
160+
* Restore-path entry: consumer type isn't persisted in metadata yet, so we don't
161+
* know whether the original subscription was STREAM (needs parent-drain ordering)
162+
* or CHECKPOINT / QUEUE (mustn't have it — CHECKPOINT never drains parents because
163+
* it doesn't create per-segment cursors). Default to <em>no enforcement</em>; on the
164+
* first register-after-restore the controller calls
165+
* {@link SubscriptionCoordinator#installDrainChecker} if the type is STREAM.
166+
*/
158167
private SubscriptionCoordinator createCoordinator(String subscription) {
168+
return createCoordinator(subscription, null);
169+
}
170+
171+
private SubscriptionCoordinator createCoordinator(String subscription,
172+
ScalableConsumerType consumerType) {
173+
// Parent-drain ordering matters only for STREAM consumers (Exclusive per-segment
174+
// subscription with broker-tracked cursors → preserving per-key order across a
175+
// split requires waiting for the parent to drain before handing out children).
176+
// CHECKPOINT consumers track position client-side via Checkpoints and don't even
177+
// create per-segment cursors — their parent never reports as drained, so the
178+
// ordering machinery would block their children indefinitely. QUEUE consumers
179+
// are shared and accept out-of-order delivery by design. Null type (restore
180+
// path) starts without a checker; it's installed lazily on first STREAM
181+
// register.
182+
SegmentDrainChecker checker =
183+
consumerType == ScalableConsumerType.STREAM ? this::isSegmentDrained : null;
184+
159185
// Defensive: PulsarService.getConfig() is null in some unit-test mocks. Fall
160186
// back to the SubscriptionCoordinator's default grace period in that case.
161187
var config = brokerService.getPulsar().getConfig();
@@ -175,7 +201,42 @@ private SubscriptionCoordinator createCoordinator(String subscription) {
175201
currentLayout,
176202
resources,
177203
brokerService.getPulsar().getExecutor(),
178-
gracePeriod);
204+
gracePeriod,
205+
checker,
206+
SubscriptionCoordinator.DEFAULT_DRAIN_INITIAL_DELAY,
207+
SubscriptionCoordinator.DEFAULT_DRAIN_MAX_DELAY);
208+
}
209+
210+
/**
211+
* Drain check used by every {@link SubscriptionCoordinator} on this topic. Asks the
212+
* segment topic's owning broker for the per-subscription backlog via the
213+
* {@code /segments/.../subscription/.../backlog} admin endpoint, which redirects to
214+
* the topic owner — works whether the controller and the segment colocate or not.
215+
*
216+
* <p>Returns {@code false} if the segment topic or subscription is not yet loaded
217+
* (the admin endpoint replies 404). The next poll will succeed once the consumer's
218+
* subscribe lands the topic on its owning broker.
219+
*/
220+
private CompletableFuture<Boolean> isSegmentDrained(SegmentInfo segment, String subscription) {
221+
String segmentTopicName = toSegmentPersistentName(segment);
222+
try {
223+
return brokerService.getPulsar().getAdminClient()
224+
.scalableTopics()
225+
.getSegmentSubscriptionBacklogAsync(segmentTopicName, subscription)
226+
.thenApply(backlog -> backlog != null && backlog <= 0)
227+
.exceptionally(ex -> {
228+
Throwable cause =
229+
org.apache.pulsar.common.util.FutureUtil.unwrapCompletionException(ex);
230+
if (cause instanceof org.apache.pulsar.client.admin.PulsarAdminException.NotFoundException) {
231+
// Topic or subscription not loaded yet — try again on the
232+
// next poll. The consumer's subscribe will materialize it.
233+
return false;
234+
}
235+
throw org.apache.pulsar.common.util.FutureUtil.wrapToCompletionException(cause);
236+
});
237+
} catch (PulsarServerException e) {
238+
return CompletableFuture.failedFuture(e);
239+
}
179240
}
180241

181242
private CompletableFuture<Void> electLeader() {
@@ -309,14 +370,42 @@ public CompletableFuture<SegmentLayout> mergeSegments(long segmentId1, long segm
309370
* <p>If a session with the same {@code consumerName} already exists (for example
310371
* because the consumer is reconnecting within the grace period), the existing
311372
* assignment is reused and no rebalance occurs.
373+
*
374+
* <p>The {@code consumerType} is used at coordinator creation time to decide whether
375+
* to enforce parent-drain ordering on assignments — see
376+
* {@link SubscriptionCoordinator}. The coordinator's setting is fixed at first
377+
* registration (a subscription's type doesn't change in practice); subsequent
378+
* registers with a different type still work but won't change the ordering policy.
379+
*/
380+
/**
381+
* @deprecated Defaults to {@link ScalableConsumerType#STREAM}
382+
* for backward compatibility. New callers should pass the explicit type.
312383
*/
384+
@Deprecated
313385
public CompletableFuture<ConsumerAssignment> registerConsumer(String subscription,
314386
String consumerName,
315387
long consumerId,
316388
TransportCnx cnx) {
389+
return registerConsumer(subscription, consumerName, consumerId,
390+
ScalableConsumerType.STREAM, cnx);
391+
}
392+
393+
public CompletableFuture<ConsumerAssignment> registerConsumer(String subscription,
394+
String consumerName,
395+
long consumerId,
396+
ScalableConsumerType
397+
consumerType,
398+
TransportCnx cnx) {
317399
checkLeader();
318400
SubscriptionCoordinator coordinator = subscriptions.computeIfAbsent(
319-
subscription, this::createCoordinator);
401+
subscription, sub -> createCoordinator(sub, consumerType));
402+
// The coordinator may have been created on the failover-restore path (consumer
403+
// type unknown then; we defaulted to "no parent-drain enforcement"). Now that we
404+
// know the type, upgrade if it's STREAM. installDrainChecker is a no-op if the
405+
// coordinator already has a checker, so safe to call unconditionally.
406+
if (consumerType == ScalableConsumerType.STREAM) {
407+
coordinator.installDrainChecker(this::isSegmentDrained);
408+
}
320409
return coordinator.registerConsumer(consumerName, consumerId, cnx)
321410
.thenApply(assignments -> {
322411
// Look up by name since the key may have been an existing session
@@ -525,6 +614,9 @@ public CompletableFuture<org.apache.pulsar.common.policies.data.ScalableTopicSta
525614

526615
public CompletableFuture<Void> close() {
527616
closed = true;
617+
// Stop each coordinator's drain poller before clearing — otherwise the scheduler
618+
// task keeps running after the controller goes away.
619+
subscriptions.values().forEach(SubscriptionCoordinator::close);
528620
subscriptions.clear();
529621
return leaderElection.asyncClose();
530622
}

pulsar-broker/src/main/java/org/apache/pulsar/broker/service/scalable/ScalableTopicService.java

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import org.apache.pulsar.broker.resources.ScalableTopicMetadata;
2828
import org.apache.pulsar.broker.resources.ScalableTopicResources;
2929
import org.apache.pulsar.broker.service.BrokerService;
30+
import org.apache.pulsar.common.api.proto.ScalableConsumerType;
3031
import org.apache.pulsar.common.naming.TopicDomain;
3132
import org.apache.pulsar.common.naming.TopicName;
3233
import org.apache.pulsar.common.policies.data.ScalableTopicStats;
@@ -230,12 +231,35 @@ public CompletableFuture<Void> deleteScalableTopic(TopicName topic) {
230231
/**
231232
* Register a scalable consumer with the controller leader for {@code topic}.
232233
* Persists a durable session and returns the consumer's segment assignment.
234+
*
235+
* <p>The {@code consumerType} drives broker-side semantics that depend on the
236+
* consumer mode — most importantly whether the {@link SubscriptionCoordinator}
237+
* enforces parent-drain ordering before handing out children of a split. STREAM
238+
* consumers want it (per-key ordering); CHECKPOINT and QUEUE consumers don't
239+
* (they either track position client-side or have shared, already-out-of-order
240+
* delivery semantics).
233241
*/
234242
public CompletableFuture<ConsumerAssignment> registerConsumer(TopicName topic, String subscription,
235243
String consumerName, long consumerId,
244+
ScalableConsumerType
245+
consumerType,
236246
org.apache.pulsar.broker.service.TransportCnx cnx) {
237247
return getOrCreateController(topic)
238-
.thenCompose(controller -> controller.registerConsumer(subscription, consumerName, consumerId, cnx));
248+
.thenCompose(controller ->
249+
controller.registerConsumer(subscription, consumerName, consumerId,
250+
consumerType, cnx));
251+
}
252+
253+
/**
254+
* @deprecated Defaults to {@link ScalableConsumerType#STREAM}
255+
* for backward compatibility. New callers should pass the explicit consumer type.
256+
*/
257+
@Deprecated
258+
public CompletableFuture<ConsumerAssignment> registerConsumer(TopicName topic, String subscription,
259+
String consumerName, long consumerId,
260+
org.apache.pulsar.broker.service.TransportCnx cnx) {
261+
return registerConsumer(topic, subscription, consumerName, consumerId,
262+
ScalableConsumerType.STREAM, cnx);
239263
}
240264

241265
/**
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.apache.pulsar.broker.service.scalable;
20+
21+
import java.util.concurrent.CompletableFuture;
22+
import org.apache.pulsar.common.scalable.SegmentInfo;
23+
24+
/**
25+
* Resolves whether a (sealed) segment has been fully drained by a particular subscription.
26+
*
27+
* <p>Used by {@link SubscriptionCoordinator} to decide when newly-active children of a
28+
* split / merge can be assigned to consumers: an active child is only assignable once
29+
* <em>every</em> parent has been drained for the subscription, so message order with respect
30+
* to the split point is preserved.
31+
*
32+
* <p>Implementations typically read the segment topic's per-subscription backlog (the
33+
* cursor on a sealed topic with {@code msgBacklog == 0} is by definition at the end). For
34+
* subscriptions started with {@code Latest}, every sealed segment's cursor is created at
35+
* the topic's end, so the drain check completes immediately.
36+
*/
37+
@FunctionalInterface
38+
public interface SegmentDrainChecker {
39+
40+
/**
41+
* Returns {@code true} if the segment's cursor for {@code subscription} has reached the
42+
* end of the segment's data, {@code false} otherwise. Errors complete the future
43+
* exceptionally; callers should treat them as "not drained yet" and retry.
44+
*
45+
* @param segment the segment to check (only meaningful for sealed segments — active
46+
* segments still receive new messages, so they're never drained)
47+
* @param subscription the subscription whose cursor we're asking about
48+
*/
49+
CompletableFuture<Boolean> isDrained(SegmentInfo segment, String subscription);
50+
}

0 commit comments

Comments
 (0)