From 931f09cebfc8cd2e72ccdb8e12228689f399c23c Mon Sep 17 00:00:00 2001 From: sunil-solace Date: Wed, 13 May 2026 17:31:23 -0400 Subject: [PATCH 01/13] DATAGO-134580: Recreate JCSMP producer on unsolicited CloseFlow When the broker fans out an unsolicited CloseFlow on a publisher flow (message-spool maintenance, DR failover, "503: Service Unavailable" on GD), JCSMP terminally closes the per-binding XMLMessageProducer. The JCSMP session stays connected, but every subsequent producer.send(...) throws StaleSessionException until the application is restarted - customer-reported in DATAGO-132760. JCSMPOutboundMessageHandler now detects StaleSessionException and ClosedFacilityException in the send-path catch block, arms a volatile producerNeedsRecreation flag, and at the top of the next handleMessage rebuilds the producer (and the TransactedSession when configured) under a double-checked-lock before publishing. Recreation failures are routed through the existing error channel and leave the flag armed so the next inbound message retries. The flag is reset on start() and closeResources() to keep stop/start lifecycles clean. Tests: - Unit (JCSMPOutboundMessageHandlerTest): parameterized stale-recovery across plain and transacted sessions, ClosedFacilityException variant, recreation-failure-then-retry, and lifecycle-driven flag reset. - Broker IT (JCSMPProducerCloseFlowRecoveryIT, new): five scenarios against PubSubPlusExtension - three controls that document broker disruptions which do NOT reproduce the bug (spool-quota toggle on persistent topic, direct topic, queue ingress/egress toggle), the customer-reported reproducer driven via the broker's CLI (hardware message-spool shutdown over docker exec with TTY for confirmation prompts), and a repeated-cycles variant that proves the recreate path resets cleanly across consecutive stale-flow events on the same binding. --- .gitignore | 5 +- .../outbound/JCSMPOutboundMessageHandler.java | 112 ++- .../JCSMPOutboundMessageHandlerTest.java | 240 ++++++ .../config/BrokerConfiguratorBuilder.java | 30 + .../JCSMPProducerCloseFlowRecoveryIT.java | 740 ++++++++++++++++++ 5 files changed, 1117 insertions(+), 10 deletions(-) create mode 100644 solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/spring/cloud/stream/binder/JCSMPProducerCloseFlowRecoveryIT.java diff --git a/.gitignore b/.gitignore index 0e34d93ce..d3b71df03 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,7 @@ **/.flattened-pom.xml # Release files -release.properties \ No newline at end of file +release.properties + +# Local toolchain / IDE files +.java-version \ No newline at end of file diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandler.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandler.java index 15dedc33a..e89a844c2 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandler.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandler.java @@ -14,11 +14,13 @@ import com.solace.spring.cloud.stream.binder.util.JCSMPSessionProducerManager.CloudStreamEventHandler; import com.solace.spring.cloud.stream.binder.util.StaticMessageHeaderMapAccessor; import com.solace.spring.cloud.stream.binder.util.XMLMessageMapper; +import com.solacesystems.jcsmp.ClosedFacilityException; import com.solacesystems.jcsmp.Destination; import com.solacesystems.jcsmp.JCSMPException; import com.solacesystems.jcsmp.JCSMPFactory; import com.solacesystems.jcsmp.JCSMPSession; import com.solacesystems.jcsmp.JCSMPStreamingPublishCorrelatingEventHandler; +import com.solacesystems.jcsmp.StaleSessionException; import com.solacesystems.jcsmp.Topic; import com.solacesystems.jcsmp.XMLMessage; import com.solacesystems.jcsmp.XMLMessageProducer; @@ -64,6 +66,14 @@ public final class JCSMPOutboundMessageHandler implements MessageHandler, Lifecy private boolean isRunning = false; private ErrorMessageStrategy errorMessageStrategy; + // DATAGO-134580: when the broker sends an unsolicited CloseFlow (message-spool shutdown, + // DR failover, "Service Unavailable" on GD), JCSMP marks the producer terminally closed. + // The session itself stays connected, so the next producer.send(...) throws + // StaleSessionException synchronously. We set this flag from the catch block and lazily + // recreate the producer at the top of the next handleMessage(...) call. + private volatile boolean producerNeedsRecreation = false; + private final Object recreateLock = new Object(); + private static final Logger LOGGER = LoggerFactory.getLogger(JCSMPOutboundMessageHandler.class); public JCSMPOutboundMessageHandler(ProducerDestination destination, @@ -95,6 +105,12 @@ public void handleMessage(@NonNull Message message) throws MessagingException throw handleMessagingException(correlationKey, msg0, new ClosedChannelBindingException(msg1)); } + // DATAGO-134580: if a previous publish marked the producer stale (broker fanned out an + // unsolicited CloseFlow), rebuild it before attempting this publish. A failure here is + // routed through the normal error channel via handleMessagingException, with the + // recreation flag left armed so the next message retries. + recreateProducerIfNeeded(correlationKey); + try { CorrelationData correlationData = message.getHeaders() .get(SolaceBinderHeaders.CONFIRM_CORRELATION, CorrelationData.class); @@ -180,6 +196,23 @@ public void handleMessage(@NonNull Message message) throws MessagingException } } + // DATAGO-134580: when the broker tears down the publisher flow (unsolicited + // CloseFlow on message-spool shutdown, DR failover, etc.), JCSMP marks the + // XMLMessageProducer terminally closed and every subsequent send throws + // StaleSessionException. ClosedFacilityException is the related + // already-closed-locally signal. Both mean the producer reference is dead; + // arm the recreation flag so the next handleMessage(...) rebuilds it. The + // volatile write must happen before the throw below so a concurrent thread + // reading the flag sees the update. + if (e instanceof StaleSessionException || e instanceof ClosedFacilityException) { + if (!producerNeedsRecreation) { + LOGGER.warn("Detected stale JCSMP producer for binding {} (cause: {}); will " + + "recreate on next message ", + properties.getBindingName(), e.getClass().getSimpleName(), id); + } + producerNeedsRecreation = true; + } + throw handleMessagingException(correlationKey, "Unable to send message(s) to destination", e); } finally { if (solaceMeterAccessor != null) { @@ -231,6 +264,8 @@ public void start() { LOGGER.warn("Nothing to do, message handler {} is already running", id); return; } + // Clear any stale recreation flag left over from a prior lifecycle. + producerNeedsRecreation = false; try { Map headerNameMapping = properties.getExtension().getHeaderNameMapping(); @@ -246,15 +281,7 @@ public void start() { } producerManager.get(id); - if (properties.getExtension().isTransacted()) { - LOGGER.info("Creating transacted session ", id); - transactedSession = jcsmpSession.createTransactedSession(); - producer = transactedSession.createProducer(SolaceProvisioningUtil.getProducerFlowProperties(jcsmpSession), - producerEventHandler); - } else { - producer = jcsmpSession.createProducer(SolaceProvisioningUtil.getProducerFlowProperties(jcsmpSession), - producerEventHandler); - } + createProducerInternal(); } catch (Exception e) { String msg = String.format("Unable to get a message producer for session %s", jcsmpSession.getSessionName()); LOGGER.warn(msg, e); @@ -265,6 +292,71 @@ public void start() { isRunning = true; } + /** + * Builds the per-binding {@link XMLMessageProducer} (and the underlying + * {@link TransactedSession} when the binding is transacted) using the same flow + * properties and event handler as the initial {@link #start()} call. Extracted from + * {@code start()} so both the initial-create path and the post-CloseFlow recreate path + * share one implementation - the alternative would be duplicating the + * transacted/non-transacted branch in two places. + */ + private void createProducerInternal() throws JCSMPException { + if (properties.getExtension().isTransacted()) { + LOGGER.info("Creating transacted session ", id); + transactedSession = jcsmpSession.createTransactedSession(); + producer = transactedSession.createProducer(SolaceProvisioningUtil.getProducerFlowProperties(jcsmpSession), + producerEventHandler); + } else { + producer = jcsmpSession.createProducer(SolaceProvisioningUtil.getProducerFlowProperties(jcsmpSession), + producerEventHandler); + } + } + + /** + * DATAGO-134580: if a prior {@code producer.send(...)} surfaced a + * {@link StaleSessionException} or {@link ClosedFacilityException}, the broker has torn + * down our publisher flow (typically via unsolicited CloseFlow) and the local producer + * reference points at a terminally closed instance. Close the dead producer (and the + * dead transacted session, if any) and build fresh ones via + * {@link #createProducerInternal()} so the next publish can succeed. + * + *

Double-checked-locked so concurrent dispatcher threads do not all recreate. The + * lock guards close + create only; once the flag is cleared, subsequent + * {@link #handleMessage(Message)} calls fall through to the publish path without + * acquiring the lock. + * + *

If recreation itself fails (e.g. the broker is still mid-restart from the spool + * shutdown), the flag stays {@code true}, the current in-flight message is surfaced + * via {@link #handleMessagingException} the same way the original failing send was, + * and the next inbound message retries recreation. No internal retry loop, no new + * threads. + */ + private void recreateProducerIfNeeded(ErrorChannelSendingCorrelationKey correlationKey) throws MessagingException { + if (!producerNeedsRecreation) return; + synchronized (recreateLock) { + if (!producerNeedsRecreation) return; + LOGGER.warn("Recreating JCSMP producer for binding {} after stale-flow detection ", + properties.getBindingName(), id); + try { + if (producer != null) producer.close(); + } catch (Exception closeError) { + LOGGER.debug("Failed to close stale producer during recreation ", id, closeError); + } + try { + if (transactedSession != null) transactedSession.close(); + } catch (Exception closeError) { + LOGGER.debug("Failed to close stale transacted session during recreation ", id, closeError); + } + try { + createProducerInternal(); + producerNeedsRecreation = false; + } catch (JCSMPException createError) { + throw handleMessagingException(correlationKey, + "Failed to recreate JCSMP producer after stale-flow detection", createError); + } + } + } + @Override public void stop() { if (!isRunning()) return; @@ -274,6 +366,8 @@ public void stop() { private void closeResources() { LOGGER.info("Stopping producer to {} {} ", configDestinationType, configDestination.getName(), id); + // Clear any pending recreation request so a stop/start cycle starts from a clean state. + producerNeedsRecreation = false; if (producer != null) { LOGGER.info("Closing producer ", id); producer.close(); diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java index aee461d96..c416f5c1e 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java @@ -16,6 +16,7 @@ import com.solace.spring.cloud.stream.binder.util.SolaceMessageConversionException; import com.solace.spring.cloud.stream.binder.util.SolaceMessageHeaderErrorMessageStrategy; import com.solacesystems.jcsmp.BytesMessage; +import com.solacesystems.jcsmp.ClosedFacilityException; import com.solacesystems.jcsmp.Destination; import com.solacesystems.jcsmp.JCSMPException; import com.solacesystems.jcsmp.JCSMPProperties; @@ -23,6 +24,7 @@ import com.solacesystems.jcsmp.JCSMPStreamingPublishCorrelatingEventHandler; import com.solacesystems.jcsmp.ProducerFlowProperties; import com.solacesystems.jcsmp.Queue; +import com.solacesystems.jcsmp.StaleSessionException; import com.solacesystems.jcsmp.Topic; import com.solacesystems.jcsmp.XMLMessage; import com.solacesystems.jcsmp.XMLMessageProducer; @@ -684,6 +686,244 @@ void testGetBindingName() { assertThat(messageHandler.getBindingName()).isNotEmpty().isEqualTo(producerProperties.getBindingName()); } + /** + * DATAGO-134580: Reproduces the bug where an unsolicited {@code CloseFlow} from the broker + * leaves the per-binding {@link XMLMessageProducer} terminally closed. JCSMP throws + * {@link StaleSessionException} on every subsequent send, and today the binding can only + * recover via an app restart. + * + *

Expected (post-fix) behavior: + *

    + *
  1. The send that triggers {@code StaleSessionException} surfaces a {@link MessagingException} + * (current behavior preserved).
  2. + *
  3. The handler remains running.
  4. + *
  5. The next {@code handleMessage} call recreates the producer via + * {@code session.createProducer(...)} and successfully publishes on the new producer.
  6. + *
+ * + *

Cartesian over {@code transacted = {false, true}} so both branches of the + * recreate code path ({@code jcsmpSession.createProducer(...)} for direct producers and + * {@code jcsmpSession.createTransactedSession()} -> {@code transactedSession.createProducer(...)} + * for transacted producers) are exercised. The transacted branch additionally closes + * the dead {@code TransactedSession} and recreates a fresh one. + * + *

On {@code master} (pre-fix) this test fails at step 3: only one producer is ever created + * and the second send re-throws {@code StaleSessionException}. + */ + @CartesianTest(name = "[{index}] transacted={0}") + public void test_producerRecreatedAfterUnsolicitedCloseFlow( + @Values(booleans = {false, true}) boolean transacted, + @Mock XMLMessageProducer producerB, + @Mock TransactedSession transactedSessionB) throws Exception { + producerProperties.getExtension().setTransacted(transacted); + + if (transacted) { + // First createTransactedSession() returns the original mock from init(); the + // second (after stale detection) returns a fresh transactedSessionB. The original + // transactedSession.createProducer(...) keeps the init() lenient stub that returns + // messageProducer; the new transactedSessionB.createProducer(...) returns producerB. + Mockito.when(session.createTransactedSession()) + .thenReturn(transactedSession) + .thenReturn(transactedSessionB); + Mockito.when(transactedSessionB.createProducer(any(ProducerFlowProperties.class), + any(JCSMPStreamingPublishCorrelatingEventHandler.class))).thenReturn(producerB); + } else { + // Non-transacted: two producers in sequence directly from the session. + Mockito.when(session.createProducer( + producerFlowPropertiesCaptor.capture(), pubEventHandlerCaptor.capture())) + .thenReturn(messageProducer) + .thenReturn(producerB); + } + + // Simulate the broker-driven CloseFlow: the next send() on the existing producer throws + // StaleSessionException (wrapping the original transport exception JCSMP recorded async). + StaleSessionException stale = new StaleSessionException( + "Tried to perform operation on a closed XML message producer", + new JCSMPException("Received unsolicited CloseFlow for producer (503:Service Unavailable).")); + Mockito.doThrow(stale).when(messageProducer).send(any(XMLMessage.class), any(Destination.class)); + + messageHandler.start(); + + // 1st publish — the underlying producer is terminally closed; we expect the binding + // to surface the failure to the caller but stay alive. We assert hasCauseInstanceOf + // (not hasRootCauseInstanceOf) because StaleSessionException itself wraps a deeper + // JCSMPException / JCSMPTransportException cause - the root would be that inner one, + // but the diagnostic exception we care about preserving is StaleSessionException. + Message firstMessage = MessageBuilder.withPayload("payload-1").build(); + assertThatThrownBy(() -> messageHandler.handleMessage(firstMessage)) + .isInstanceOf(MessagingException.class) + .hasCauseInstanceOf(StaleSessionException.class); + assertThat(messageHandler.isRunning()) + .as("Handler must remain running so it can recover on the next send") + .isTrue(); + + // 2nd publish — the fix should detect the stale producer, close it, and create a fresh one. + Message secondMessage = MessageBuilder.withPayload("payload-2").build(); + messageHandler.handleMessage(secondMessage); + + // Recreation evidence depends on which producer path the binding uses. + if (transacted) { + // Two transacted sessions ever created (initial + recreated). The old one is + // closed and the new one services the second publish. + Mockito.verify(session, Mockito.times(2)).createTransactedSession(); + Mockito.verify(transactedSession, Mockito.atLeastOnce()).close(); + Mockito.verify(transactedSessionB, Mockito.times(1)) + .createProducer(any(ProducerFlowProperties.class), + any(JCSMPStreamingPublishCorrelatingEventHandler.class)); + } else { + // Non-transacted: two producers ever created (initial + recreated). + Mockito.verify(session, Mockito.times(2)) + .createProducer(any(ProducerFlowProperties.class), any(JCSMPStreamingPublishCorrelatingEventHandler.class)); + } + + // The recreated producer must have received the second publish, and the dead one + // must not have been re-used. The dead producer is also closed during recreation. + Mockito.verify(producerB, Mockito.times(1)).send(any(XMLMessage.class), any(Destination.class)); + Mockito.verify(messageProducer, Mockito.times(1)).send(any(XMLMessage.class), any(Destination.class)); + Mockito.verify(messageProducer, Mockito.atLeastOnce()).close(); + } + + /** + * DATAGO-134580 (companion of {@link #test_producerRecreatedAfterUnsolicitedCloseFlow}): + * the catch block in {@code handleMessage} triggers recreation on BOTH + * {@link StaleSessionException} and {@link ClosedFacilityException}. This proves the OR + * branch is not dead code - a producer surfacing + * {@code ClosedFacilityException("Producer is closed")} on send must also lead to a + * recreated producer on the next message. + */ + @Test + void test_producerRecreatedAfterClosedFacilityException(@Mock XMLMessageProducer producerB) throws Exception { + Mockito.when(session.createProducer( + producerFlowPropertiesCaptor.capture(), pubEventHandlerCaptor.capture())) + .thenReturn(messageProducer) + .thenReturn(producerB); + + Mockito.doThrow(new ClosedFacilityException("Producer is closed")) + .when(messageProducer).send(any(XMLMessage.class), any(Destination.class)); + + messageHandler.start(); + + Message firstMessage = MessageBuilder.withPayload("payload-1").build(); + assertThatThrownBy(() -> messageHandler.handleMessage(firstMessage)) + .isInstanceOf(MessagingException.class) + .hasCauseInstanceOf(ClosedFacilityException.class); + + Message secondMessage = MessageBuilder.withPayload("payload-2").build(); + messageHandler.handleMessage(secondMessage); + + Mockito.verify(session, Mockito.times(2)) + .createProducer(any(ProducerFlowProperties.class), any(JCSMPStreamingPublishCorrelatingEventHandler.class)); + Mockito.verify(producerB, Mockito.times(1)).send(any(XMLMessage.class), any(Destination.class)); + Mockito.verify(messageProducer, Mockito.atLeastOnce()).close(); + } + + /** + * DATAGO-134580: when {@code session.createProducer(...)} itself fails during recreation + * (e.g. the broker is still mid-restart from the message-spool shutdown), the in-flight + * message must be routed through the error channel via {@code handleMessagingException}, + * the {@code producerNeedsRecreation} flag must stay armed, and the next inbound message + * must retry recreation. This guards against a silent infinite-retry-loop or + * lost-error class of regression. + * + *

Sequence: + *

    + *
  1. {@code start()} returns the original {@code messageProducer} (which will throw + * Stale on send).
  2. + *
  3. First {@code handleMessage} -> Stale -> flag armed, MessagingException surfaced.
  4. + *
  5. Second {@code handleMessage} -> recreation attempted -> {@code createProducer} + * throws -> MessagingException surfaced, flag stays armed.
  6. + *
  7. Third {@code handleMessage} -> recreation retried, this time succeeds via + * {@code producerB} -> message publishes cleanly, flag cleared.
  8. + *
+ */ + @Test + void test_producerRecreationFailurePropagatesAndRetriesNext(@Mock XMLMessageProducer producerB) throws Exception { + Mockito.when(session.createProducer( + producerFlowPropertiesCaptor.capture(), pubEventHandlerCaptor.capture())) + .thenReturn(messageProducer) // start() + .thenThrow(new JCSMPException("Broker still reconnecting")) // first recreation attempt + .thenReturn(producerB); // second recreation attempt + + StaleSessionException stale = new StaleSessionException( + "Tried to perform operation on a closed XML message producer", + new JCSMPException("Received unsolicited CloseFlow for producer (503:Service Unavailable).")); + Mockito.doThrow(stale).when(messageProducer).send(any(XMLMessage.class), any(Destination.class)); + + messageHandler.start(); + + // 1st publish — original producer is stale; failure surfaces, flag is armed. + Message firstMessage = MessageBuilder.withPayload("payload-1").build(); + assertThatThrownBy(() -> messageHandler.handleMessage(firstMessage)) + .as("first publish must surface StaleSessionException to the caller") + .isInstanceOf(MessagingException.class) + .hasCauseInstanceOf(StaleSessionException.class); + + // 2nd publish — recreation attempt itself fails (broker still recovering). The in-flight + // message must be routed through the error channel; the flag must remain armed. + Message secondMessage = MessageBuilder.withPayload("payload-2").build(); + assertThatThrownBy(() -> messageHandler.handleMessage(secondMessage)) + .as("recreation failure must propagate as MessagingException for this in-flight message") + .isInstanceOf(MessagingException.class) + .hasMessageContaining("Failed to recreate JCSMP producer") + .hasCauseInstanceOf(JCSMPException.class); + + // 3rd publish — broker has recovered; recreation succeeds; producerB services the send. + Message thirdMessage = MessageBuilder.withPayload("payload-3").build(); + messageHandler.handleMessage(thirdMessage); + + // Three createProducer attempts (initial start + two recreation attempts). + Mockito.verify(session, Mockito.times(3)) + .createProducer(any(ProducerFlowProperties.class), any(JCSMPStreamingPublishCorrelatingEventHandler.class)); + // Only producerB serviced a successful send. + Mockito.verify(producerB, Mockito.times(1)).send(any(XMLMessage.class), any(Destination.class)); + // Handler stayed alive through both failures. + assertThat(messageHandler.isRunning()).isTrue(); + } + + /** + * DATAGO-134580: {@code start()} and {@code closeResources()} both reset the + * {@code producerNeedsRecreation} flag, so a stop/start lifecycle starts from a clean + * state regardless of what happened in the prior lifecycle. Without that reset, a stop + * after a stale-flow event followed by a fresh start would gratuitously recreate the + * brand-new producer on the very first message. + */ + @Test + void test_recreationFlagResetAcrossStopStartCycle(@Mock XMLMessageProducer producerB) throws Exception { + Mockito.when(session.createProducer( + producerFlowPropertiesCaptor.capture(), pubEventHandlerCaptor.capture())) + .thenReturn(messageProducer) // first start() + .thenReturn(producerB); // second start() after stop() + + // First lifecycle: producer fails on send -> flag is armed. + StaleSessionException stale = new StaleSessionException( + "Tried to perform operation on a closed XML message producer", + new JCSMPException("Received unsolicited CloseFlow for producer (503:Service Unavailable).")); + Mockito.doThrow(stale).when(messageProducer).send(any(XMLMessage.class), any(Destination.class)); + + messageHandler.start(); + Message firstMessage = MessageBuilder.withPayload("payload-1").build(); + assertThatThrownBy(() -> messageHandler.handleMessage(firstMessage)) + .isInstanceOf(MessagingException.class) + .hasCauseInstanceOf(StaleSessionException.class); + + // Now restart: closeResources() + start() must both clear the recreation flag. + messageHandler.stop(); + messageHandler.start(); + + // Clear the createProducer invocation history so we can prove the NEXT handleMessage + // does not trigger a recreation (the flag must be cleared by the stop/start cycle). + Mockito.clearInvocations(session); + + // Subsequent publish lands on producerB (the freshly-started producer); the + // recreation code path must NOT fire - no additional createProducer calls. + Message secondMessage = MessageBuilder.withPayload("payload-2").build(); + messageHandler.handleMessage(secondMessage); + + Mockito.verify(session, Mockito.never()) + .createProducer(any(ProducerFlowProperties.class), any(JCSMPStreamingPublishCorrelatingEventHandler.class)); + Mockito.verify(producerB, Mockito.times(1)).send(any(XMLMessage.class), any(Destination.class)); + } + private List getCorrelationKeys() throws JCSMPException { Mockito.verify(messageProducer, Mockito.atLeastOnce()).send(xmlMessageCaptor.capture(), any(Destination.class)); return xmlMessageCaptor.getAllValues() diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/it/util/semp/config/BrokerConfiguratorBuilder.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/it/util/semp/config/BrokerConfiguratorBuilder.java index 95b735879..0844038e8 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/it/util/semp/config/BrokerConfiguratorBuilder.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/it/util/semp/config/BrokerConfiguratorBuilder.java @@ -279,6 +279,36 @@ public void disableVpn(String msgVpnName) { updateVpn(vpn); } + /** + * Zeroes out the guaranteed-delivery message-spool quota for a VPN, which causes the broker + * to send an unsolicited {@code CloseFlow} to every currently-bound publisher and consumer + * flow on that VPN. The session itself is left connected; only the GD flows are torn down. + * + *

Use {@link #restoreMsgSpoolForVpn(String, Long)} (with the previously-captured value) + * to put the spool back. + * + * @param msgVpnName name of the vpn whose spool to disable + */ + public void disableMsgSpoolForVpn(String msgVpnName) { + final ConfigMsgVpn vpn = queryVpn(msgVpnName); + vpn.maxMsgSpoolUsage(0L); + updateVpn(vpn); + } + + /** + * Restores the guaranteed-delivery message-spool quota for a VPN to the supplied value. + * Intended for use after {@link #disableMsgSpoolForVpn(String)} - pass the original quota + * captured from {@link #queryVpn(String)} before the disable. + * + * @param msgVpnName name of the vpn + * @param maxMsgSpoolUsageMb the spool quota in megabytes to set + */ + public void restoreMsgSpoolForVpn(String msgVpnName, Long maxMsgSpoolUsageMb) { + final ConfigMsgVpn vpn = queryVpn(msgVpnName); + vpn.maxMsgSpoolUsage(maxMsgSpoolUsageMb); + updateVpn(vpn); + } + /** * Queries all vpns on a broker * diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/spring/cloud/stream/binder/JCSMPProducerCloseFlowRecoveryIT.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/spring/cloud/stream/binder/JCSMPProducerCloseFlowRecoveryIT.java new file mode 100644 index 000000000..e64638094 --- /dev/null +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/spring/cloud/stream/binder/JCSMPProducerCloseFlowRecoveryIT.java @@ -0,0 +1,740 @@ +package com.solace.spring.cloud.stream.binder; + +import com.solace.it.util.semp.config.BrokerConfiguratorBuilder; +import com.solace.it.util.semp.config.BrokerConfiguratorBuilder.BrokerConfigurator; +import com.solace.spring.boot.autoconfigure.SolaceJavaAutoConfiguration; +import com.solace.spring.cloud.stream.binder.properties.SolaceProducerProperties; +import com.solace.spring.cloud.stream.binder.test.junit.extension.SpringCloudStreamExtension; +import com.solace.spring.cloud.stream.binder.test.spring.SpringCloudStreamContext; +import com.solace.spring.cloud.stream.binder.test.util.SimpleJCSMPEventHandler; +import com.solace.spring.cloud.stream.binder.test.util.SolaceTestBinder; +import com.solace.spring.cloud.stream.binder.util.DestinationType; +import com.solace.test.integration.junit.jupiter.extension.PubSubPlusExtension; +import com.solace.test.integration.semp.v2.SempV2Api; +import com.solace.test.integration.semp.v2.config.model.ConfigMsgVpnQueue; +import com.solacesystems.jcsmp.DeliveryMode; +import com.solacesystems.jcsmp.JCSMPException; +import com.solacesystems.jcsmp.JCSMPFactory; +import com.solacesystems.jcsmp.JCSMPProperties; +import com.solacesystems.jcsmp.JCSMPSession; +import com.solacesystems.jcsmp.Queue; +import com.solacesystems.jcsmp.StaleSessionException; +import com.solacesystems.jcsmp.TextMessage; +import com.solacesystems.jcsmp.Topic; +import com.solacesystems.jcsmp.XMLMessageProducer; +import com.github.dockerjava.api.DockerClient; +import com.github.dockerjava.api.command.ExecCreateCmdResponse; +import com.github.dockerjava.api.model.Container; +import org.apache.commons.lang3.RandomStringUtils; +import org.awaitility.Awaitility; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInfo; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.boot.test.context.ConfigDataApplicationContextInitializer; +import org.springframework.cloud.stream.binder.Binding; +import org.springframework.cloud.stream.binder.ExtendedProducerProperties; +import org.springframework.cloud.stream.config.BindingProperties; +import org.springframework.integration.channel.DirectChannel; +import org.springframework.integration.support.MessageBuilder; +import org.springframework.messaging.MessageChannel; +import org.springframework.messaging.MessagingException; +import org.springframework.test.context.junit.jupiter.SpringJUnitConfig; +import org.testcontainers.DockerClientFactory; + +import java.io.ByteArrayInputStream; +import java.nio.charset.StandardCharsets; +import java.time.Duration; +import java.util.List; +import java.util.concurrent.TimeUnit; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatCode; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** + * Broker integration tests characterising producer behaviour around the + * DATAGO-134580 bug: when the + * broker tears down a publisher flow (DR failover, message-spool maintenance), JCSMP marks the + * per-binding {@link XMLMessageProducer} as terminally closed and every subsequent + * {@code send(...)} throws {@link StaleSessionException} until the app is restarted. + * + *

This class contains five integration tests. Three of them are control / negative + * cases that exercise broker-side disruptions which - empirically - do not + * reproduce the bug. They are kept as long-term coverage so that any future broker behaviour + * change (e.g. a future broker version starts ejecting already-bound publisher flows on a + * spool-quota change) shows up immediately as a test failure rather than a silent regression. + * The fourth test is the bug reproducer that pins the exact customer-reported failure + * mode and is what the {@code JCSMPOutboundMessageHandler} fix must turn green. The fifth + * test loops the bug reproducer across multiple consecutive stale-flow cycles, proving the + * recreate-on-stale path resets cleanly between events and does not accumulate state. + * + *

    + *
  1. {@link #test_persistentTopicPublisher_survivesSpoolToggle persistentTopicPublisher_survivesSpoolToggle} + * - Control. Persistent (GD) publishing to a topic + VPN-level spool quota + * cycled through 0 via SEMPv2. Asserts the publisher continues to publish. The broker + * only NACKs new spool writes asynchronously when the quota is 0; it does not eject + * already-bound publisher flows.
  2. + * + *
  3. {@link #test_directTopicPublisher_survivesSpoolToggle directTopicPublisher_survivesSpoolToggle} + * - Control. DIRECT (non-persistent) publishing to a topic via raw JCSMP + + * the same spool toggle. Asserts the publisher continues to publish. Direct messages + * bypass the spool subsystem entirely so the toggle is a no-op for them. The binder + * cannot be used for this case because {@code XMLMessageMapper.java:205} unconditionally + * promotes outbound messages to {@link DeliveryMode#PERSISTENT}.
  4. + * + *
  5. {@link #test_persistentQueuePublisher_survivesQueueIngressEgressToggle persistentQueuePublisher_survivesQueueIngressEgressToggle} + * - Control. Persistent publishing to a Queue destination + the queue's ingress + * and egress disabled and re-enabled via SEMPv2. Asserts the publisher continues to + * publish. Toggling ingress/egress causes the broker to NACK incoming publishes + * asynchronously while disabled but does not fan out unsolicited + * {@code CloseFlow}, so the bound publisher flow stays alive across the cycle.
  6. + * + *
  7. {@link #test_persistentQueuePublisher_recoversAfterMessageSpoolCliShutdown persistentQueuePublisher_recoversAfterMessageSpoolCliShutdown} + * - Bug reproducer. Persistent publishing to a Queue destination + a broker-level + * {@code hardware message-spool shutdown} / {@code no shutdown} cycle driven through + * the broker's CLI via {@code docker exec}. This is the only mechanism that actually + * fans out unsolicited {@code CloseFlow} (503 Service Unavailable) to every bound + * publisher flow without dropping the JCSMP session - matching the customer's + * traceback exactly. The first post-shutdown publish surfaces + * {@link StaleSessionException}; on master the binding stays stuck in that state. After + * the {@code JCSMPOutboundMessageHandler} recreate-on-stale fix lands, the handler + * rebuilds the producer at the top of {@code handleMessage(...)} and the next publish + * succeeds.
  8. + * + *
  9. {@link #test_persistentQueuePublisher_recoversAcrossRepeatedMessageSpoolCliShutdowns persistentQueuePublisher_recoversAcrossRepeatedMessageSpoolCliShutdowns} + * - Repeated bug reproducer. Same disruption as (4) but looped across multiple + * consecutive shutdown/restore cycles on the same binding. Proves the + * {@code producerNeedsRecreation} flag resets correctly after each successful + * recreation and that no state accumulates on the binder. A regression that only reset + * the flag once (e.g. a non-volatile read or a missed write in the success path) would + * reproduce the bug from the second cycle onwards.
  10. + *
+ * + *

All five tests run in {@link ExecutionMode#SAME_THREAD} because each touches shared + * broker state (VPN-level spool quota, queue ingress/egress, broker-level message-spool) + * that would briefly disrupt any concurrently-running tests on the same broker. + */ +@SpringJUnitConfig(classes = SolaceJavaAutoConfiguration.class, + initializers = ConfigDataApplicationContextInitializer.class) +@ExtendWith(PubSubPlusExtension.class) +@ExtendWith(SpringCloudStreamExtension.class) +@Execution(ExecutionMode.SAME_THREAD) +class JCSMPProducerCloseFlowRecoveryIT { + private static final Logger logger = LoggerFactory.getLogger(JCSMPProducerCloseFlowRecoveryIT.class); + + /** + * Control test (1/5) - persistent topic publisher + VPN spool toggle. + * + *

Documents the empirical broker behaviour: cycling {@code maxMsgSpoolUsage} through + * 0 does not tear down already-bound publisher flows on a topic destination, + * so the binding's producer stays healthy and subsequent persistent topic publishes + * succeed. This is the negative control for the bug reproducer below - if this test + * ever starts seeing {@link StaleSessionException}, the broker behaviour around spool + * quota changes has shifted and the bug surface area is wider than expected. + */ + @Test + void test_persistentTopicPublisher_survivesSpoolToggle( + JCSMPSession jcsmpSession, + SempV2Api sempV2Api, + SpringCloudStreamContext context, + TestInfo testInfo) throws Exception { + SolaceTestBinder binder = context.getBinder(); + + String destination = RandomStringUtils.randomAlphanumeric(10); + ExtendedProducerProperties producerProperties = context.createProducerProperties(testInfo); + BindingProperties producerBindingProperties = new BindingProperties(); + producerBindingProperties.setProducer(producerProperties); + DirectChannel moduleOutputChannel = context.createBindableChannel("output", producerBindingProperties); + + Binding producerBinding = binder.bindProducer(destination, moduleOutputChannel, producerProperties); + + String vpnName = (String) jcsmpSession.getProperty(JCSMPProperties.VPN_NAME); + BrokerConfigurator brokerConfig = BrokerConfiguratorBuilder.create(sempV2Api).build(); + Long originalMaxMsgSpoolUsageMb = brokerConfig.vpns().queryVpn(vpnName).getMaxMsgSpoolUsage(); + assertThat(originalMaxMsgSpoolUsageMb) + .as("Captured maxMsgSpoolUsage should be a positive value") + .isNotNull() + .isPositive(); + + boolean spoolRestored = false; + try { + moduleOutputChannel.send(MessageBuilder.withPayload("before-toggle").build()); + + logger.info("Zeroing maxMsgSpoolUsage for VPN '{}'", vpnName); + brokerConfig.vpns().disableMsgSpoolForVpn(vpnName); + awaitVpnMaxMsgSpoolUsage(sempV2Api, vpnName, 0L); + + logger.info("Restoring maxMsgSpoolUsage={} MB for VPN '{}'", originalMaxMsgSpoolUsageMb, vpnName); + brokerConfig.vpns().restoreMsgSpoolForVpn(vpnName, originalMaxMsgSpoolUsageMb); + spoolRestored = true; + awaitVpnMaxMsgSpoolUsage(sempV2Api, vpnName, originalMaxMsgSpoolUsageMb); + + assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload("after-toggle").build())) + .as("Persistent topic publisher should be unaffected by a VPN spool quota toggle") + .doesNotThrowAnyException(); + } finally { + if (!spoolRestored) { + try { + brokerConfig.vpns().restoreMsgSpoolForVpn(vpnName, originalMaxMsgSpoolUsageMb); + } catch (Exception cleanupError) { + logger.warn("Failed to restore maxMsgSpoolUsage for VPN '{}' during cleanup", vpnName, cleanupError); + } + } + producerBinding.unbind(); + } + } + + /** + * Control test (2/5) - DIRECT topic publisher + VPN spool toggle. + * + *

Direct messages bypass the broker's spool subsystem entirely, so a spool quota + * toggle has no effect on a direct publisher. Uses the raw JCSMP session because + * the binder's {@code XMLMessageMapper} unconditionally promotes outbound messages + * to {@link DeliveryMode#PERSISTENT} and there is no producer-property switch for + * direct mode. + */ + @Test + void test_directTopicPublisher_survivesSpoolToggle( + JCSMPSession jcsmpSession, + SempV2Api sempV2Api) throws Exception { + String topicName = RandomStringUtils.randomAlphanumeric(10); + Topic topic = JCSMPFactory.onlyInstance().createTopic(topicName); + XMLMessageProducer producer = jcsmpSession.getMessageProducer(new SimpleJCSMPEventHandler()); + + String vpnName = (String) jcsmpSession.getProperty(JCSMPProperties.VPN_NAME); + BrokerConfigurator brokerConfig = BrokerConfiguratorBuilder.create(sempV2Api).build(); + Long originalMaxMsgSpoolUsageMb = brokerConfig.vpns().queryVpn(vpnName).getMaxMsgSpoolUsage(); + assertThat(originalMaxMsgSpoolUsageMb) + .as("Captured maxMsgSpoolUsage should be a positive value") + .isNotNull() + .isPositive(); + + boolean spoolRestored = false; + try { + TextMessage warmup = JCSMPFactory.onlyInstance().createMessage(TextMessage.class); + warmup.setDeliveryMode(DeliveryMode.DIRECT); + warmup.setText("before-toggle"); + producer.send(warmup, topic); + + logger.info("Zeroing maxMsgSpoolUsage for VPN '{}'", vpnName); + brokerConfig.vpns().disableMsgSpoolForVpn(vpnName); + awaitVpnMaxMsgSpoolUsage(sempV2Api, vpnName, 0L); + + logger.info("Restoring maxMsgSpoolUsage={} MB for VPN '{}'", originalMaxMsgSpoolUsageMb, vpnName); + brokerConfig.vpns().restoreMsgSpoolForVpn(vpnName, originalMaxMsgSpoolUsageMb); + spoolRestored = true; + awaitVpnMaxMsgSpoolUsage(sempV2Api, vpnName, originalMaxMsgSpoolUsageMb); + + TextMessage after = JCSMPFactory.onlyInstance().createMessage(TextMessage.class); + after.setDeliveryMode(DeliveryMode.DIRECT); + after.setText("after-toggle"); + assertThatCode(() -> producer.send(after, topic)) + .as("Direct topic publisher should be unaffected by a VPN spool quota toggle (no spool involvement)") + .doesNotThrowAnyException(); + } finally { + if (!spoolRestored) { + try { + brokerConfig.vpns().restoreMsgSpoolForVpn(vpnName, originalMaxMsgSpoolUsageMb); + } catch (Exception cleanupError) { + logger.warn("Failed to restore maxMsgSpoolUsage for VPN '{}' during cleanup", vpnName, cleanupError); + } + } + producer.close(); + } + } + + /** + * Control test (3/5) - persistent queue publisher + queue ingress/egress toggle. + * + *

Documents the empirical broker behaviour: disabling and re-enabling a queue's + * ingress and egress does not fan out unsolicited {@code CloseFlow} on + * publisher flows bound to that queue. While ingress is disabled the broker NACKs + * incoming publishes asynchronously; once it is re-enabled the publisher flow remains + * bound and publishes resume cleanly. Asserts the publisher continues to publish + * before, during the cycle (post-restoration), and after. + * + *

If this test ever starts surfacing {@link StaleSessionException}, the broker + * behaviour around queue ingress/egress changes has shifted and the bug surface area + * is wider than today. + */ + @Test + void test_persistentQueuePublisher_survivesQueueIngressEgressToggle( + JCSMPSession jcsmpSession, + SempV2Api sempV2Api, + SpringCloudStreamContext context, + TestInfo testInfo) throws Exception { + SolaceTestBinder binder = context.getBinder(); + + String queueName = RandomStringUtils.randomAlphanumeric(20); + ExtendedProducerProperties producerProperties = context.createProducerProperties(testInfo); + producerProperties.getExtension().setDestinationType(DestinationType.QUEUE); + BindingProperties producerBindingProperties = new BindingProperties(); + producerBindingProperties.setProducer(producerProperties); + DirectChannel moduleOutputChannel = context.createBindableChannel("output", producerBindingProperties); + + Binding producerBinding = binder.bindProducer(queueName, moduleOutputChannel, producerProperties); + + String vpnName = (String) jcsmpSession.getProperty(JCSMPProperties.VPN_NAME); + BrokerConfigurator brokerConfig = BrokerConfiguratorBuilder.create(sempV2Api).build(); + + boolean queueRestored = false; + try { + moduleOutputChannel.send(MessageBuilder.withPayload("before-toggle").build()); + + logger.info("Disabling ingress and egress on queue '{}' in VPN '{}'", queueName, vpnName); + brokerConfig.queues().disableIngressOnQueue(vpnName, queueName); + brokerConfig.queues().disableEgressOnQueue(vpnName, queueName); + awaitQueueIngressEgress(sempV2Api, vpnName, queueName, false, false); + + logger.info("Re-enabling ingress and egress on queue '{}' in VPN '{}'", queueName, vpnName); + brokerConfig.queues().reenableIngressOnQueue(vpnName, queueName); + brokerConfig.queues().reenableEgressOnQueue(vpnName, queueName); + queueRestored = true; + awaitQueueIngressEgress(sempV2Api, vpnName, queueName, true, true); + + assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload("after-toggle").build())) + .as("Persistent queue publisher should be unaffected by a queue ingress/egress toggle") + .doesNotThrowAnyException(); + } finally { + if (!queueRestored) { + try { + brokerConfig.queues().reenableIngressOnQueue(vpnName, queueName); + brokerConfig.queues().reenableEgressOnQueue(vpnName, queueName); + } catch (Exception cleanupError) { + logger.warn("Failed to re-enable queue '{}' during cleanup", queueName, cleanupError); + } + } + try { + producerBinding.unbind(); + } finally { + Queue queue = JCSMPFactory.onlyInstance().createQueue(queueName); + try { + jcsmpSession.deprovision(queue, JCSMPSession.FLAG_IGNORE_DOES_NOT_EXIST); + } catch (Exception deprovisionError) { + logger.warn("Failed to deprovision queue '{}' during cleanup", queueName, deprovisionError); + } + } + } + } + + /** + * Bug reproducer (4/5) - persistent queue publisher + broker-level message-spool shutdown. + * + *

This is the precise customer-reported reproduction. The {@code hardware message-spool + * shutdown} / {@code no shutdown} CLI sequence is the only mechanism that fans out an + * unsolicited {@code CloseFlow} (503 Service Unavailable) to every currently-bound + * publisher flow on the broker without dropping the JCSMP session. SEMPv2-only + * approaches (zeroing the spool quota, toggling queue ingress/egress) either NACK + * publishes asynchronously without invalidating the flow, or also drop the session as a + * side-effect - neither matches the customer's actual failure mode. + * + *

The CLI is reached by finding the running PubSub+ test container via the docker + * client that testcontainers already exposes, then {@code docker exec}ing the + * {@code /usr/sw/loads/currentload/bin/cli -A} script with the spool commands piped on + * stdin. We cannot avoid this layer of indirection because {@code PubSubPlusExtension} + * does not expose the container as a parameter, and SEMPv2 has no equivalent action. + * + *

Two assertions: + *

    + *
  1. Bug witness - the first post-shutdown publish surfaces + * {@link MessagingException} with a {@link JCSMPException}-rooted cause. The + * customer's stack trace shows {@link StaleSessionException} wrapping a + * {@code JCSMPTransportException("Received unsolicited CloseFlow ... 503:Service + * Unavailable")}; depending on broker timing and JCSMP buffer state the test + * sometimes surfaces the inner {@code JCSMPTransportException} directly as the + * root cause instead. Both forms are manifestations of the same broker-driven + * flow teardown - we accept either via the common {@link JCSMPException} + * supertype.
  2. + *
  3. Recovery - the next publish completes cleanly only after the + * {@code JCSMPOutboundMessageHandler} recreate-on-stale fix lands.
  4. + *
+ */ + @Test + void test_persistentQueuePublisher_recoversAfterMessageSpoolCliShutdown( + JCSMPSession jcsmpSession, + SempV2Api sempV2Api, + SpringCloudStreamContext context, + TestInfo testInfo) throws Exception { + SolaceTestBinder binder = context.getBinder(); + + String queueName = RandomStringUtils.randomAlphanumeric(20); + ExtendedProducerProperties producerProperties = context.createProducerProperties(testInfo); + producerProperties.getExtension().setDestinationType(DestinationType.QUEUE); + BindingProperties producerBindingProperties = new BindingProperties(); + producerBindingProperties.setProducer(producerProperties); + DirectChannel moduleOutputChannel = context.createBindableChannel("output", producerBindingProperties); + + Binding producerBinding = binder.bindProducer(queueName, moduleOutputChannel, producerProperties); + + String vpnName = (String) jcsmpSession.getProperty(JCSMPProperties.VPN_NAME); + String solaceContainerId = findSolaceContainerId(); + boolean spoolRestored = false; + try { + moduleOutputChannel.send(MessageBuilder.withPayload("before-shutdown").build()); + + // === Phase 1: broker-side disruption via CLI === + // Tear down the broker's spool subsystem so it fans out unsolicited CloseFlow + // to every bound publisher / consumer flow. The JCSMP session stays connected; + // only the GD flows die. + // + // NOTE on command shape: the Solace CLI requires stepping through the config + // sub-modes one line at a time (`hardware`, then `message-spool`) - the dotted + // form `hardware message-spool` is not reliably accepted. The `shutdown` + // command itself is destructive and is guarded by TWO sequential confirmation + // prompts: + // All message spooling will be stopped. + // Do you want to continue (y/n)? <-- first prompt + // Do you want to continue (y/n)? <-- second prompt + // We answer both with `y`. Without those, the exec hangs at the first prompt + // and the shutdown never actually takes effect. `no shutdown` (the re-enable) + // is non-destructive and does not prompt. + logger.info("Shutting down broker message-spool via CLI in container '{}'", solaceContainerId); + runSolaceCliCommands(solaceContainerId, + "enable", + "configure", + "hardware", + "message-spool", + "shutdown", + "y"); + // Wait until the broker's SEMP API is responsive again before driving the + // re-enable CLI command. The shutdown is synchronous from the CLI's perspective, + // but the broker's HTTP management surface may briefly stutter as the spool + // subsystem transitions; awaitBrokerSempResponsive(...) probes that surface + // instead of trusting a fixed sleep window. + awaitBrokerSempResponsive(sempV2Api, vpnName); + + logger.info("Re-enabling broker message-spool via CLI in container '{}'", solaceContainerId); + runSolaceCliCommands(solaceContainerId, + "enable", + "configure", + "hardware", + "message-spool", + "no shutdown"); + spoolRestored = true; + // Same probe again after re-enable - confirms the broker is back to a + // queryable / stable state before we attempt the bug-witness publish. + awaitBrokerSempResponsive(sempV2Api, vpnName); + + // === Phase 2: assertion 1 - bug witness === + // The first post-shutdown publish must surface a JCSMP-rooted MessagingException. + // We accept any JCSMPException root cause (StaleSessionException, + // ClosedFacilityException, JCSMPTransportException) because the broker's CLI-driven + // teardown can surface as either the outer StaleSessionException form (matching + // the customer's stack trace verbatim) or the inner JCSMPTransportException form + // when JCSMP delivers the CloseFlow event before the producer's stale-marker has + // been propagated to the send() caller. Both forms prove the same thing - that + // the broker tore down the publisher flow and the binding's local producer is + // no longer usable. + logger.info("Attempting first post-shutdown publish; expecting a JCSMP-rooted failure"); + Awaitility.await("post-CLI-shutdown publish surfaces a JCSMP-rooted failure") + .atMost(Duration.ofSeconds(20)) + .pollInterval(Duration.ofMillis(500)) + .untilAsserted(() -> assertThatThrownBy(() -> + moduleOutputChannel.send(MessageBuilder.withPayload("witness-" + System.nanoTime()).build())) + .isInstanceOf(MessagingException.class) + .hasRootCauseInstanceOf(JCSMPException.class)); + + // === Phase 3: assertion 2 - recovery === + // Pre-fix (master): handler-local producer is still the dead one, this throws + // StaleSessionException - assertion fails. That failure IS the bug. + // Post-fix: handler detects the stale flag, recreates the producer and + // the publish succeeds. + logger.info("Attempting recovery publish; expecting success only with the fix in place"); + assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload("recovery").build())) + .as("Publish after broker message-spool CLI shutdown must recover; on master it stays stale and re-throws StaleSessionException") + .doesNotThrowAnyException(); + } finally { + // Belt-and-suspenders: if assertion 1 timed out before we re-enabled the spool, + // turn it back on so subsequent tests / the broker container stay healthy. + if (!spoolRestored) { + try { + runSolaceCliCommands(solaceContainerId, + "enable", + "configure", + "hardware", + "message-spool", + "no shutdown"); + } catch (Exception cleanupError) { + logger.warn("Failed to re-enable broker message-spool during cleanup", cleanupError); + } + } + try { + producerBinding.unbind(); + } finally { + Queue queue = JCSMPFactory.onlyInstance().createQueue(queueName); + try { + jcsmpSession.deprovision(queue, JCSMPSession.FLAG_IGNORE_DOES_NOT_EXIST); + } catch (Exception deprovisionError) { + logger.warn("Failed to deprovision queue '{}' during cleanup", queueName, deprovisionError); + } + } + } + } + + /** + * Repeated bug reproducer (5/5) - persistent queue publisher + N consecutive + * broker-level message-spool shutdown / restore cycles on the same binding. + * + *

Test 4 proves that the {@code JCSMPOutboundMessageHandler} fix recovers from a + * single unsolicited {@code CloseFlow}. This test proves the same fix continues to work + * across multiple consecutive stale-flow events on the same binding, without any + * intermediate {@code stop() / start()}. Concretely it asserts the + * {@code producerNeedsRecreation} flag is correctly cleared after each successful + * recreation - if it weren't, the second cycle's bug-witness assertion would never see + * a fresh stale exception (the recreated producer from cycle 1 is still considered + * "the new producer" until something tells the handler otherwise). + * + *

This is what guards against a class of regression where the flag-reset is moved or + * weakened (e.g. reset only on {@code start()}, or reset on the recreation-attempt + * branch but missed on the recreation-success branch). The unit test + * {@code test_recreationFlagResetAcrossStopStartCycle} covers the lifecycle-driven + * reset; this IT covers the per-event reset against a real broker. + * + *

Three cycles is enough to expose state-accumulation bugs while keeping the test + * runtime bounded: each cycle costs ~1 broker spool restart (~5-10s). + */ + @Test + void test_persistentQueuePublisher_recoversAcrossRepeatedMessageSpoolCliShutdowns( + JCSMPSession jcsmpSession, + SempV2Api sempV2Api, + SpringCloudStreamContext context, + TestInfo testInfo) throws Exception { + SolaceTestBinder binder = context.getBinder(); + + String queueName = RandomStringUtils.randomAlphanumeric(20); + ExtendedProducerProperties producerProperties = context.createProducerProperties(testInfo); + producerProperties.getExtension().setDestinationType(DestinationType.QUEUE); + BindingProperties producerBindingProperties = new BindingProperties(); + producerBindingProperties.setProducer(producerProperties); + DirectChannel moduleOutputChannel = context.createBindableChannel("output", producerBindingProperties); + + Binding producerBinding = binder.bindProducer(queueName, moduleOutputChannel, producerProperties); + + String vpnName = (String) jcsmpSession.getProperty(JCSMPProperties.VPN_NAME); + String solaceContainerId = findSolaceContainerId(); + final int cycles = 3; + boolean spoolRestored = true; + try { + moduleOutputChannel.send(MessageBuilder.withPayload("initial-healthy").build()); + + for (int cycle = 1; cycle <= cycles; cycle++) { + logger.info("=== Cycle {}/{}: shutdown -> witness-failure -> recover ===", cycle, cycles); + + // Mark the spool as "down" before issuing the destructive CLI; cleared again + // once the matching `no shutdown` lands. If the witness assertion below times + // out mid-cycle the finally-block sees this flag and re-enables the spool. + spoolRestored = false; + + runSolaceCliCommands(solaceContainerId, + "enable", + "configure", + "hardware", + "message-spool", + "shutdown", + "y"); + awaitBrokerSempResponsive(sempV2Api, vpnName); + + runSolaceCliCommands(solaceContainerId, + "enable", + "configure", + "hardware", + "message-spool", + "no shutdown"); + spoolRestored = true; + awaitBrokerSempResponsive(sempV2Api, vpnName); + + // Bug witness for THIS cycle. The handler is still holding a producer + // (either the original one in cycle 1, or the one recreated by cycle N-1's + // recovery publish in later cycles); the broker just tore it down. The + // next publish must surface a JCSMP-rooted failure - if it doesn't, the + // flag was never re-armed and the handler is no longer noticing + // stale-flow events after the first one. + final int currentCycle = cycle; + Awaitility.await(String.format("cycle %d: post-shutdown publish surfaces JCSMP failure", currentCycle)) + .atMost(Duration.ofSeconds(20)) + .pollInterval(Duration.ofMillis(500)) + .untilAsserted(() -> assertThatThrownBy(() -> + moduleOutputChannel.send(MessageBuilder.withPayload( + "witness-c" + currentCycle + "-" + System.nanoTime()).build())) + .isInstanceOf(MessagingException.class) + .hasRootCauseInstanceOf(JCSMPException.class)); + + // Recovery for THIS cycle. The fix must take effect on every cycle, not + // just the first - any regression that resets the flag only once would + // leave the producer permanently dead from cycle 2 onwards. + assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload( + "recovery-c" + currentCycle).build())) + .as("Cycle %d: publish must recover after broker CLI shutdown; producerNeedsRecreation flag must reset between cycles", currentCycle) + .doesNotThrowAnyException(); + } + + // After N cycles the binding's producer should still be servicing plain publishes + // without re-triggering the recreate path - i.e. the flag is cleanly cleared and + // the handler is in a steady state, not stuck in a perpetual recreate loop. + assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload("final-healthy").build())) + .as("After %d shutdown/recover cycles, the binding's producer must continue to publish normally", cycles) + .doesNotThrowAnyException(); + } finally { + if (!spoolRestored) { + try { + runSolaceCliCommands(solaceContainerId, + "enable", + "configure", + "hardware", + "message-spool", + "no shutdown"); + } catch (Exception cleanupError) { + logger.warn("Failed to re-enable broker message-spool during cleanup", cleanupError); + } + } + try { + producerBinding.unbind(); + } finally { + Queue queue = JCSMPFactory.onlyInstance().createQueue(queueName); + try { + jcsmpSession.deprovision(queue, JCSMPSession.FLAG_IGNORE_DOES_NOT_EXIST); + } catch (Exception deprovisionError) { + logger.warn("Failed to deprovision queue '{}' during cleanup", queueName, deprovisionError); + } + } + } + } + + // === SEMP-driven broker-state poll helpers === + // Each helper Awaits the broker reaching a known state before the test proceeds. + // Polls SEMP every 100ms with a 10s ceiling. ignoreExceptions() lets the poll + // shrug off transient ApiExceptions during state transitions (e.g. while the + // broker is mid-restart from a CLI message-spool shutdown). + + /** + * Polls SEMPv2 until the VPN's {@code maxMsgSpoolUsage} matches the supplied value. + * Used after {@code BrokerConfigurator.disableMsgSpoolForVpn / restoreMsgSpoolForVpn} + * to confirm the broker has committed the change before the next test step runs. + */ + private static void awaitVpnMaxMsgSpoolUsage(SempV2Api sempV2Api, String vpnName, long expectedMb) { + Awaitility.await(String.format("VPN '%s' maxMsgSpoolUsage == %d MB", vpnName, expectedMb)) + .atMost(Duration.ofSeconds(10)) + .pollInterval(Duration.ofMillis(100)) + .ignoreExceptions() + .untilAsserted(() -> assertThat(sempV2Api.config() + .getMsgVpn(vpnName, null, null) + .getData() + .getMaxMsgSpoolUsage()) + .isEqualTo(expectedMb)); + } + + /** + * Polls SEMPv2 until a queue's ingress and egress flags match the supplied values. + * Used after {@code BrokerConfigurator.disableIngressOnQueue / disableEgressOnQueue} + * (and their re-enable counterparts) to confirm both broker-side flags have settled. + */ + private static void awaitQueueIngressEgress(SempV2Api sempV2Api, String vpnName, String queueName, + boolean expectedIngress, boolean expectedEgress) { + Awaitility.await(String.format("queue '%s' ingress=%s, egress=%s", + queueName, expectedIngress, expectedEgress)) + .atMost(Duration.ofSeconds(10)) + .pollInterval(Duration.ofMillis(100)) + .ignoreExceptions() + .untilAsserted(() -> { + ConfigMsgVpnQueue queue = sempV2Api.config() + .getMsgVpnQueue(vpnName, queueName, null, null) + .getData(); + assertThat(queue.isIngressEnabled()).as("ingress").isEqualTo(expectedIngress); + assertThat(queue.isEgressEnabled()).as("egress").isEqualTo(expectedEgress); + }); + } + + /** + * Polls SEMPv2 until the broker is responsive on the supplied VPN. Used as a proxy + * "broker is stable" check around the CLI message-spool shutdown / re-enable cycle + * where there is no direct SEMP-level signal for spool-subsystem state - if the + * monitor API answers a {@code MsgVpn} lookup successfully, the broker is at least + * back to a queryable state. + */ + private static void awaitBrokerSempResponsive(SempV2Api sempV2Api, String vpnName) { + Awaitility.await("broker SEMP API responsive for VPN '" + vpnName + "'") + .atMost(Duration.ofSeconds(15)) + .pollInterval(Duration.ofMillis(100)) + .ignoreExceptions() + .untilAsserted(() -> assertThat(sempV2Api.monitor() + .getMsgVpn(vpnName, null) + .getData()) + .isNotNull()); + } + + // === Docker / CLI helpers (only used by the message-spool CLI shutdown test) === + + /** + * Finds the running PubSub+ test container created by {@link PubSubPlusExtension}. Uses + * the docker-java client that testcontainers exposes (so it works on the same daemon + * testcontainers is using) and filters by image name. The standard Solace PubSub+ + * container image identifier always contains {@code solace-pubsub-standard}. + */ + private static String findSolaceContainerId() { + DockerClient docker = DockerClientFactory.instance().client(); + List containers = docker.listContainersCmd().exec(); + return containers.stream() + .filter(c -> c.getImage() != null && c.getImage().contains("solace-pubsub-standard")) + .findFirst() + .map(Container::getId) + .orElseThrow(() -> new IllegalStateException( + "No running 'solace-pubsub-standard' container found via the docker client. " + + "This test requires the PubSubPlusExtension to have provisioned its container.")); + } + + /** + * Drives the broker container's Solace CLI with the supplied commands. + * + *

Why this needs a TTY: the Solace CLI detects whether stdin is a real + * terminal and only honours confirmation prompts (the "Do you want to continue + * (y/n)?" guard around destructive commands like {@code shutdown}) when it is. When + * stdin is a plain pipe (e.g. {@code printf '...' | cli -A}), the CLI silently + * cancels the destructive command and continues - which is why earlier attempts at + * piping {@code y} answers through a shell pipeline never actually shut the spool + * down. Allocating a pseudo-TTY on the {@code docker exec} (the docker-java + * equivalent of {@code docker exec -it}) makes the CLI process the prompts the + * same way it does in a real terminal session. + * + *

The commands are fed into the TTY's stdin via docker-java's + * {@link com.github.dockerjava.api.command.ExecStartCmd#withStdIn(java.io.InputStream)}. + * Trailing {@code end} / {@code exit} are appended so the CLI cleanly leaves config + * mode and exits instead of waiting on more interactive input. + * + *

The CLI binary path {@code /usr/sw/loads/currentload/bin/cli} is the standard + * location inside the {@code solace-pubsub-standard} image. + */ + private static void runSolaceCliCommands(String containerId, String... cliCommands) throws Exception { + DockerClient docker = DockerClientFactory.instance().client(); + + StringBuilder script = new StringBuilder(); + for (String cmd : cliCommands) { + // Use CRLF so the input is accepted regardless of whether the pty is in + // cooked or raw line discipline. Cooked mode treats \n as Enter on its + // own, but appending \r is harmless and avoids ambiguity. + script.append(cmd).append("\r\n"); + } + // Terminators so the CLI returns instead of waiting for more interactive input. + script.append("end\r\n").append("exit\r\n"); + + ExecCreateCmdResponse exec = docker.execCreateCmd(containerId) + .withTty(true) // pseudo-TTY so the CLI honors confirmation prompts + .withAttachStdin(true) + .withAttachStdout(true) + .withAttachStderr(true) + .withCmd("/usr/sw/loads/currentload/bin/cli", "-A") + .exec(); + + ByteArrayInputStream stdin = new ByteArrayInputStream( + script.toString().getBytes(StandardCharsets.UTF_8)); + + docker.execStartCmd(exec.getId()) + .withTty(true) + .withStdIn(stdin) + .exec(new com.github.dockerjava.api.async.ResultCallback.Adapter<>()) + .awaitCompletion(30, TimeUnit.SECONDS); + } +} \ No newline at end of file From 31880a97ec4db1ffa5f534fb24f0708a214ad4be Mon Sep 17 00:00:00 2001 From: sunil-solace Date: Wed, 13 May 2026 22:53:26 -0400 Subject: [PATCH 02/13] DATAGO-134580: Address PR review feedback Five round-2 fixes from PR #141 review (Copilot): 1. Broaden the recreate trigger in JCSMPOutboundMessageHandler to also arm the producerNeedsRecreation flag on JCSMPTransportException. The broker IT explicitly documents that an unsolicited CloseFlow can surface on send(...) as either StaleSessionException (the typical form) or the raw JCSMPTransportException (when the CloseFlow event reaches send() before JCSMP's stale-marker has propagated); both are now treated as recreation triggers. New unit test test_producerRecreatedAfterJCSMPTransportException covers the new arm. 2. Fix BrokerConfiguratorBuilder.disableMsgSpoolForVpn Javadoc which incorrectly claimed zeroing maxMsgSpoolUsage tears down already-bound publisher / consumer flows. The IT documents the opposite: the broker keeps existing flows alive and only NACKs new GD publishes. Javadoc now matches the empirical behaviour and points at IT test 1. 3. JCSMPProducerCloseFlowRecoveryIT comment-vs-code mismatch: the "TWO sequential confirmation prompts" comment was inaccurate - the `shutdown` command commits after a single `y` (the second prompt visible in the broker's TTY output is decorative; the broker accepts our `y` on the first answerable prompt). Comment realigned to a single confirmation. 4. runSolaceCliCommands now checks the boolean from awaitCompletion(...) and throws an IllegalStateException with the full script and captured stdout/stderr if the docker exec does not complete in time, instead of silently letting downstream assertions fail with misleading timeouts. The Frame callback was extended to buffer the CLI output for inclusion in the diagnostic. 5. CLI script terminator extended from `end\nexit\n` to `end\nexit\nexit\n`. The single `exit` only popped from privileged-exec `#` to user-exec `>` and then sat there waiting for input; cli -A never terminated and awaitCompletion returned false. The second `exit` closes the user-exec session so the cli -A process actually exits and awaitCompletion can observe it. Without this, the new defensive throw in (4) would fire on every CLI invocation and turn the IT into a hard failure. All 411 binder-core unit tests remain green; all 5 broker IT scenarios turn green again with these changes. --- .../outbound/JCSMPOutboundMessageHandler.java | 43 +++++++++---- .../JCSMPOutboundMessageHandlerTest.java | 41 +++++++++++++ .../config/BrokerConfiguratorBuilder.java | 18 ++++-- .../JCSMPProducerCloseFlowRecoveryIT.java | 61 ++++++++++++++----- 4 files changed, 129 insertions(+), 34 deletions(-) diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandler.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandler.java index e89a844c2..c8c7168ce 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandler.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandler.java @@ -20,6 +20,7 @@ import com.solacesystems.jcsmp.JCSMPFactory; import com.solacesystems.jcsmp.JCSMPSession; import com.solacesystems.jcsmp.JCSMPStreamingPublishCorrelatingEventHandler; +import com.solacesystems.jcsmp.JCSMPTransportException; import com.solacesystems.jcsmp.StaleSessionException; import com.solacesystems.jcsmp.Topic; import com.solacesystems.jcsmp.XMLMessage; @@ -197,14 +198,31 @@ public void handleMessage(@NonNull Message message) throws MessagingException } // DATAGO-134580: when the broker tears down the publisher flow (unsolicited - // CloseFlow on message-spool shutdown, DR failover, etc.), JCSMP marks the - // XMLMessageProducer terminally closed and every subsequent send throws - // StaleSessionException. ClosedFacilityException is the related - // already-closed-locally signal. Both mean the producer reference is dead; - // arm the recreation flag so the next handleMessage(...) rebuilds it. The - // volatile write must happen before the throw below so a concurrent thread + // CloseFlow on message-spool shutdown, DR failover, etc.), JCSMP can surface + // the failure on send(...) in three related forms - all of which mean the + // per-binding producer reference is dead and must be rebuilt before the next + // publish: + // - StaleSessionException ........ the typical form once JCSMP has propagated + // its internal stale-marker to the send caller + // - JCSMPTransportException ...... the raw transport-level form, observed when + // JCSMP delivers the CloseFlow event before + // the stale-marker reaches send() (see + // JCSMPProducerCloseFlowRecoveryIT - the + // bug-witness assertion accepts either form + // via the JCSMPException supertype because + // both have been observed against the broker) + // - ClosedFacilityException ...... the already-closed-locally signal, e.g. + // a redundant send after the producer has + // already been marked closed + // Recreating the producer on a non-CloseFlow JCSMPTransportException is harmless: + // the new producer inherits the still-good JCSMPSession, and if the transport + // fault is at session level the recreate itself will fail and surface through + // the error channel via handleMessagingException - same path as a normal failure. + // The volatile write must happen before the throw below so a concurrent thread // reading the flag sees the update. - if (e instanceof StaleSessionException || e instanceof ClosedFacilityException) { + if (e instanceof StaleSessionException + || e instanceof JCSMPTransportException + || e instanceof ClosedFacilityException) { if (!producerNeedsRecreation) { LOGGER.warn("Detected stale JCSMP producer for binding {} (cause: {}); will " + "recreate on next message ", @@ -314,11 +332,12 @@ private void createProducerInternal() throws JCSMPException { /** * DATAGO-134580: if a prior {@code producer.send(...)} surfaced a - * {@link StaleSessionException} or {@link ClosedFacilityException}, the broker has torn - * down our publisher flow (typically via unsolicited CloseFlow) and the local producer - * reference points at a terminally closed instance. Close the dead producer (and the - * dead transacted session, if any) and build fresh ones via - * {@link #createProducerInternal()} so the next publish can succeed. + * {@link StaleSessionException}, {@link JCSMPTransportException}, or + * {@link ClosedFacilityException}, the broker has torn down our publisher flow + * (typically via unsolicited CloseFlow) and the local producer reference points at a + * terminally closed instance. Close the dead producer (and the dead transacted session, + * if any) and build fresh ones via {@link #createProducerInternal()} so the next + * publish can succeed. * *

Double-checked-locked so concurrent dispatcher threads do not all recreate. The * lock guards close + create only; once the flag is cleared, subsequent diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java index c416f5c1e..3eb235f28 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java @@ -22,6 +22,7 @@ import com.solacesystems.jcsmp.JCSMPProperties; import com.solacesystems.jcsmp.JCSMPSession; import com.solacesystems.jcsmp.JCSMPStreamingPublishCorrelatingEventHandler; +import com.solacesystems.jcsmp.JCSMPTransportException; import com.solacesystems.jcsmp.ProducerFlowProperties; import com.solacesystems.jcsmp.Queue; import com.solacesystems.jcsmp.StaleSessionException; @@ -817,6 +818,46 @@ void test_producerRecreatedAfterClosedFacilityException(@Mock XMLMessageProducer Mockito.verify(messageProducer, Mockito.atLeastOnce()).close(); } + /** + * DATAGO-134580 (companion of {@link #test_producerRecreatedAfterUnsolicitedCloseFlow}): + * the catch block in {@code handleMessage} also recreates on + * {@link JCSMPTransportException}. The broker IT + * ({@code JCSMPProducerCloseFlowRecoveryIT}) documents that when the broker fans out an + * unsolicited CloseFlow, the send-path failure can surface as either the outer + * {@link StaleSessionException} form (covered by + * {@link #test_producerRecreatedAfterUnsolicitedCloseFlow}) or the raw transport-level + * {@link JCSMPTransportException} form, depending on timing between the broker's flow + * teardown and JCSMP's stale-marker propagation to the send caller. Both must lead to + * recreation; without this coverage a regression that drops the transport-exception arm + * would silently regress the bug for that timing variant. + */ + @Test + void test_producerRecreatedAfterJCSMPTransportException(@Mock XMLMessageProducer producerB) throws Exception { + Mockito.when(session.createProducer( + producerFlowPropertiesCaptor.capture(), pubEventHandlerCaptor.capture())) + .thenReturn(messageProducer) + .thenReturn(producerB); + + Mockito.doThrow(new JCSMPTransportException( + "Received unsolicited CloseFlow for producer (503:Service Unavailable).")) + .when(messageProducer).send(any(XMLMessage.class), any(Destination.class)); + + messageHandler.start(); + + Message firstMessage = MessageBuilder.withPayload("payload-1").build(); + assertThatThrownBy(() -> messageHandler.handleMessage(firstMessage)) + .isInstanceOf(MessagingException.class) + .hasCauseInstanceOf(JCSMPTransportException.class); + + Message secondMessage = MessageBuilder.withPayload("payload-2").build(); + messageHandler.handleMessage(secondMessage); + + Mockito.verify(session, Mockito.times(2)) + .createProducer(any(ProducerFlowProperties.class), any(JCSMPStreamingPublishCorrelatingEventHandler.class)); + Mockito.verify(producerB, Mockito.times(1)).send(any(XMLMessage.class), any(Destination.class)); + Mockito.verify(messageProducer, Mockito.atLeastOnce()).close(); + } + /** * DATAGO-134580: when {@code session.createProducer(...)} itself fails during recreation * (e.g. the broker is still mid-restart from the message-spool shutdown), the in-flight diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/it/util/semp/config/BrokerConfiguratorBuilder.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/it/util/semp/config/BrokerConfiguratorBuilder.java index 0844038e8..eb93bc738 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/it/util/semp/config/BrokerConfiguratorBuilder.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/it/util/semp/config/BrokerConfiguratorBuilder.java @@ -280,14 +280,20 @@ public void disableVpn(String msgVpnName) { } /** - * Zeroes out the guaranteed-delivery message-spool quota for a VPN, which causes the broker - * to send an unsolicited {@code CloseFlow} to every currently-bound publisher and consumer - * flow on that VPN. The session itself is left connected; only the GD flows are torn down. + * Zeroes out the guaranteed-delivery message-spool quota for a VPN. * - *

Use {@link #restoreMsgSpoolForVpn(String, Long)} (with the previously-captured value) - * to put the spool back. + *

Empirically (see {@code JCSMPProducerCloseFlowRecoveryIT} test 1) this does + * not tear down already-bound publisher / consumer flows on the VPN: the + * broker keeps the existing flows alive and instead NACKs new GD publishes + * asynchronously while the quota is at zero. This is the negative-control mechanism + * for the DATAGO-134580 reproducer - the actual unsolicited {@code CloseFlow} fan-out + * is driven by a broker-level {@code hardware message-spool shutdown} (CLI), not by + * a VPN-level quota change. * - * @param msgVpnName name of the vpn whose spool to disable + *

Use {@link #restoreMsgSpoolForVpn(String, Long)} (with the previously-captured + * value) to put the spool back. + * + * @param msgVpnName name of the vpn whose spool quota to zero */ public void disableMsgSpoolForVpn(String msgVpnName) { final ConfigMsgVpn vpn = queryVpn(msgVpnName); diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/spring/cloud/stream/binder/JCSMPProducerCloseFlowRecoveryIT.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/spring/cloud/stream/binder/JCSMPProducerCloseFlowRecoveryIT.java index e64638094..843e2afa2 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/spring/cloud/stream/binder/JCSMPProducerCloseFlowRecoveryIT.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/spring/cloud/stream/binder/JCSMPProducerCloseFlowRecoveryIT.java @@ -384,14 +384,11 @@ void test_persistentQueuePublisher_recoversAfterMessageSpoolCliShutdown( // NOTE on command shape: the Solace CLI requires stepping through the config // sub-modes one line at a time (`hardware`, then `message-spool`) - the dotted // form `hardware message-spool` is not reliably accepted. The `shutdown` - // command itself is destructive and is guarded by TWO sequential confirmation - // prompts: - // All message spooling will be stopped. - // Do you want to continue (y/n)? <-- first prompt - // Do you want to continue (y/n)? <-- second prompt - // We answer both with `y`. Without those, the exec hangs at the first prompt - // and the shutdown never actually takes effect. `no shutdown` (the re-enable) - // is non-destructive and does not prompt. + // command itself is destructive and is guarded by a single confirmation prompt + // ("All message spooling will be stopped. Do you want to continue (y/n)?") + // that we answer with `y`. Without that, the exec hangs at the prompt and the + // shutdown never actually takes effect. `no shutdown` (the re-enable) is + // non-destructive and does not prompt. logger.info("Shutting down broker message-spool via CLI in container '{}'", solaceContainerId); runSolaceCliCommands(solaceContainerId, "enable", @@ -399,7 +396,7 @@ void test_persistentQueuePublisher_recoversAfterMessageSpoolCliShutdown( "hardware", "message-spool", "shutdown", - "y"); + "y"); // answer the "Do you want to continue (y/n)?" confirmation prompt // Wait until the broker's SEMP API is responsive again before driving the // re-enable CLI command. The shutdown is synchronous from the CLI's perspective, // but the broker's HTTP management surface may briefly stutter as the spool @@ -535,7 +532,7 @@ void test_persistentQueuePublisher_recoversAcrossRepeatedMessageSpoolCliShutdown "hardware", "message-spool", "shutdown", - "y"); + "y"); // single "Do you want to continue (y/n)?" confirmation prompt awaitBrokerSempResponsive(sempV2Api, vpnName); runSolaceCliCommands(solaceContainerId, @@ -701,8 +698,18 @@ private static String findSolaceContainerId() { * *

The commands are fed into the TTY's stdin via docker-java's * {@link com.github.dockerjava.api.command.ExecStartCmd#withStdIn(java.io.InputStream)}. - * Trailing {@code end} / {@code exit} are appended so the CLI cleanly leaves config - * mode and exits instead of waiting on more interactive input. + * Trailing {@code end} + two {@code exit}s are appended so the CLI walks the mode stack + * back down and actually terminates the {@code cli -A} process: + *

+	 *   end    : leaves config mode      ((configure/...)# -> #)
+	 *   exit   : leaves privileged exec  (#                  -> >)
+	 *   exit   : closes user-exec session(>                 -> EOF, cli -A exits)
+	 * 
+ * Without the second {@code exit} the CLI sits at the {@code >} prompt waiting for + * more input and {@code awaitCompletion(...)} below would never observe the process + * terminating, which would (a) silently hang for the full timeout and (b) trip the + * defensive throw below on every call - turning a one-shot CLI invocation into a + * 30s hang per call and (post-defensive-check) an outright test failure. * *

The CLI binary path {@code /usr/sw/loads/currentload/bin/cli} is the standard * location inside the {@code solace-pubsub-standard} image. @@ -717,8 +724,10 @@ private static void runSolaceCliCommands(String containerId, String... cliComman // own, but appending \r is harmless and avoids ambiguity. script.append(cmd).append("\r\n"); } - // Terminators so the CLI returns instead of waiting for more interactive input. - script.append("end\r\n").append("exit\r\n"); + // Terminators so the cli -A process actually exits (see method-level Javadoc for + // why two `exit`s are needed). Without the second exit, the CLI hangs at the + // top-level user-exec '>' prompt and awaitCompletion(...) below trips the throw. + script.append("end\r\n").append("exit\r\n").append("exit\r\n"); ExecCreateCmdResponse exec = docker.execCreateCmd(containerId) .withTty(true) // pseudo-TTY so the CLI honors confirmation prompts @@ -731,10 +740,30 @@ private static void runSolaceCliCommands(String containerId, String... cliComman ByteArrayInputStream stdin = new ByteArrayInputStream( script.toString().getBytes(StandardCharsets.UTF_8)); - docker.execStartCmd(exec.getId()) + // Capture the CLI's stdout/stderr so a timeout produces a diagnostic message rather + // than a silent hang followed by a misleading assertion failure downstream. + // docker-java's TTY mode merges stdout+stderr onto a single Frame stream, which is + // what `cli -A` produces anyway. + final StringBuilder capturedOutput = new StringBuilder(); + com.github.dockerjava.api.async.ResultCallback.Adapter callback = + new com.github.dockerjava.api.async.ResultCallback.Adapter<>() { + @Override + public void onNext(com.github.dockerjava.api.model.Frame frame) { + capturedOutput.append(new String(frame.getPayload(), StandardCharsets.UTF_8)); + } + }; + + boolean completed = docker.execStartCmd(exec.getId()) .withTty(true) .withStdIn(stdin) - .exec(new com.github.dockerjava.api.async.ResultCallback.Adapter<>()) + .exec(callback) .awaitCompletion(30, TimeUnit.SECONDS); + + if (!completed) { + throw new IllegalStateException(String.format( + "Solace CLI exec did not complete within 30s in container '%s'. " + + "Script:%n%s%nCaptured output:%n%s", + containerId, script, capturedOutput)); + } } } \ No newline at end of file From caaee52841bf328185f0e5bbad61a0bc02c5dd7a Mon Sep 17 00:00:00 2001 From: sunil-solace Date: Thu, 14 May 2026 16:26:36 -0400 Subject: [PATCH 03/13] DATAGO-134580: Apply review feedback on outbound handler Five changes from mayur-solace's PR #141 review, all in JCSMPOutboundMessageHandler: - M1 / producer.isClosed() defensive check: stale-detection in the send-path catch now also arms the recreation flag when producer.isClosed() returns true. Guards against any future JCSMP exception subclass we don't enumerate explicitly - if JCSMP has marked the producer closed, recreation is always the right move. - M2 / single-line if braces: all single-line if guards in recreateProducerIfNeeded() are now braced for consistency with house style. - M3 / field rename: producerNeedsRecreation -> recreateProducer. Test Javadocs and assertion messages updated to match. - M4 / catch-block ordering: stale-detection moved to the top of the catch in handleMessage(), before the transactional rollback handling. Behaviour is unchanged (the flag is consumed by the next handleMessage, not by anything in this catch) but the structure now reads as "establish facts about the producer first, then handle transactional bookkeeping". - M5 / comment cleanup: stripped the verbose DATAGO-134580 comments and the Javadocs I added on createProducerInternal and recreateProducerIfNeeded. Method names are self-documenting; the ticket reference now lives on a single line above the field declaration, matching the reviewer's suggestion. 411 binder-core unit tests remain green (no behavioural change from M1-M5; only one new code path is M1's isClosed() OR-arm). --- .../outbound/JCSMPOutboundMessageHandler.java | 110 +++++------------- .../JCSMPOutboundMessageHandlerTest.java | 4 +- .../JCSMPProducerCloseFlowRecoveryIT.java | 6 +- 3 files changed, 34 insertions(+), 86 deletions(-) diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandler.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandler.java index c8c7168ce..5a497a183 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandler.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandler.java @@ -67,12 +67,8 @@ public final class JCSMPOutboundMessageHandler implements MessageHandler, Lifecy private boolean isRunning = false; private ErrorMessageStrategy errorMessageStrategy; - // DATAGO-134580: when the broker sends an unsolicited CloseFlow (message-spool shutdown, - // DR failover, "Service Unavailable" on GD), JCSMP marks the producer terminally closed. - // The session itself stays connected, so the next producer.send(...) throws - // StaleSessionException synchronously. We set this flag from the catch block and lazily - // recreate the producer at the top of the next handleMessage(...) call. - private volatile boolean producerNeedsRecreation = false; + // DATAGO-134580: recreate JCSMP producer on unsolicited termination from Solace broker. + private volatile boolean recreateProducer = false; private final Object recreateLock = new Object(); private static final Logger LOGGER = LoggerFactory.getLogger(JCSMPOutboundMessageHandler.class); @@ -106,10 +102,6 @@ public void handleMessage(@NonNull Message message) throws MessagingException throw handleMessagingException(correlationKey, msg0, new ClosedChannelBindingException(msg1)); } - // DATAGO-134580: if a previous publish marked the producer stale (broker fanned out an - // unsolicited CloseFlow), rebuild it before attempting this publish. A failure here is - // routed through the normal error channel via handleMessagingException, with the - // recreation flag left armed so the next message retries. recreateProducerIfNeeded(correlationKey); try { @@ -180,6 +172,18 @@ public void handleMessage(@NonNull Message message) throws MessagingException producerEventHandler.responseReceivedEx(correlationKey); } } catch (JCSMPException e) { + if (e instanceof StaleSessionException + || e instanceof JCSMPTransportException + || e instanceof ClosedFacilityException + || (producer != null && producer.isClosed())) { + if (!recreateProducer) { + LOGGER.warn("Detected stale JCSMP producer for binding {} (cause: {}); will " + + "recreate on next message ", + properties.getBindingName(), e.getClass().getSimpleName(), id); + } + recreateProducer = true; + } + if (transactedSession != null) { try { if (!(e instanceof RollbackException)) { @@ -197,40 +201,6 @@ public void handleMessage(@NonNull Message message) throws MessagingException } } - // DATAGO-134580: when the broker tears down the publisher flow (unsolicited - // CloseFlow on message-spool shutdown, DR failover, etc.), JCSMP can surface - // the failure on send(...) in three related forms - all of which mean the - // per-binding producer reference is dead and must be rebuilt before the next - // publish: - // - StaleSessionException ........ the typical form once JCSMP has propagated - // its internal stale-marker to the send caller - // - JCSMPTransportException ...... the raw transport-level form, observed when - // JCSMP delivers the CloseFlow event before - // the stale-marker reaches send() (see - // JCSMPProducerCloseFlowRecoveryIT - the - // bug-witness assertion accepts either form - // via the JCSMPException supertype because - // both have been observed against the broker) - // - ClosedFacilityException ...... the already-closed-locally signal, e.g. - // a redundant send after the producer has - // already been marked closed - // Recreating the producer on a non-CloseFlow JCSMPTransportException is harmless: - // the new producer inherits the still-good JCSMPSession, and if the transport - // fault is at session level the recreate itself will fail and surface through - // the error channel via handleMessagingException - same path as a normal failure. - // The volatile write must happen before the throw below so a concurrent thread - // reading the flag sees the update. - if (e instanceof StaleSessionException - || e instanceof JCSMPTransportException - || e instanceof ClosedFacilityException) { - if (!producerNeedsRecreation) { - LOGGER.warn("Detected stale JCSMP producer for binding {} (cause: {}); will " + - "recreate on next message ", - properties.getBindingName(), e.getClass().getSimpleName(), id); - } - producerNeedsRecreation = true; - } - throw handleMessagingException(correlationKey, "Unable to send message(s) to destination", e); } finally { if (solaceMeterAccessor != null) { @@ -282,8 +252,7 @@ public void start() { LOGGER.warn("Nothing to do, message handler {} is already running", id); return; } - // Clear any stale recreation flag left over from a prior lifecycle. - producerNeedsRecreation = false; + recreateProducer = false; try { Map headerNameMapping = properties.getExtension().getHeaderNameMapping(); @@ -310,14 +279,6 @@ public void start() { isRunning = true; } - /** - * Builds the per-binding {@link XMLMessageProducer} (and the underlying - * {@link TransactedSession} when the binding is transacted) using the same flow - * properties and event handler as the initial {@link #start()} call. Extracted from - * {@code start()} so both the initial-create path and the post-CloseFlow recreate path - * share one implementation - the alternative would be duplicating the - * transacted/non-transacted branch in two places. - */ private void createProducerInternal() throws JCSMPException { if (properties.getExtension().isTransacted()) { LOGGER.info("Creating transacted session ", id); @@ -330,45 +291,33 @@ private void createProducerInternal() throws JCSMPException { } } - /** - * DATAGO-134580: if a prior {@code producer.send(...)} surfaced a - * {@link StaleSessionException}, {@link JCSMPTransportException}, or - * {@link ClosedFacilityException}, the broker has torn down our publisher flow - * (typically via unsolicited CloseFlow) and the local producer reference points at a - * terminally closed instance. Close the dead producer (and the dead transacted session, - * if any) and build fresh ones via {@link #createProducerInternal()} so the next - * publish can succeed. - * - *

Double-checked-locked so concurrent dispatcher threads do not all recreate. The - * lock guards close + create only; once the flag is cleared, subsequent - * {@link #handleMessage(Message)} calls fall through to the publish path without - * acquiring the lock. - * - *

If recreation itself fails (e.g. the broker is still mid-restart from the spool - * shutdown), the flag stays {@code true}, the current in-flight message is surfaced - * via {@link #handleMessagingException} the same way the original failing send was, - * and the next inbound message retries recreation. No internal retry loop, no new - * threads. - */ private void recreateProducerIfNeeded(ErrorChannelSendingCorrelationKey correlationKey) throws MessagingException { - if (!producerNeedsRecreation) return; + if (!recreateProducer) { + return; + } synchronized (recreateLock) { - if (!producerNeedsRecreation) return; + if (!recreateProducer) { + return; + } LOGGER.warn("Recreating JCSMP producer for binding {} after stale-flow detection ", properties.getBindingName(), id); try { - if (producer != null) producer.close(); + if (producer != null) { + producer.close(); + } } catch (Exception closeError) { LOGGER.debug("Failed to close stale producer during recreation ", id, closeError); } try { - if (transactedSession != null) transactedSession.close(); + if (transactedSession != null) { + transactedSession.close(); + } } catch (Exception closeError) { LOGGER.debug("Failed to close stale transacted session during recreation ", id, closeError); } try { createProducerInternal(); - producerNeedsRecreation = false; + recreateProducer = false; } catch (JCSMPException createError) { throw handleMessagingException(correlationKey, "Failed to recreate JCSMP producer after stale-flow detection", createError); @@ -385,8 +334,7 @@ public void stop() { private void closeResources() { LOGGER.info("Stopping producer to {} {} ", configDestinationType, configDestination.getName(), id); - // Clear any pending recreation request so a stop/start cycle starts from a clean state. - producerNeedsRecreation = false; + recreateProducer = false; if (producer != null) { LOGGER.info("Closing producer ", id); producer.close(); diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java index 3eb235f28..817d62f07 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java @@ -862,7 +862,7 @@ void test_producerRecreatedAfterJCSMPTransportException(@Mock XMLMessageProducer * DATAGO-134580: when {@code session.createProducer(...)} itself fails during recreation * (e.g. the broker is still mid-restart from the message-spool shutdown), the in-flight * message must be routed through the error channel via {@code handleMessagingException}, - * the {@code producerNeedsRecreation} flag must stay armed, and the next inbound message + * the {@code recreateProducer} flag must stay armed, and the next inbound message * must retry recreation. This guards against a silent infinite-retry-loop or * lost-error class of regression. * @@ -923,7 +923,7 @@ void test_producerRecreationFailurePropagatesAndRetriesNext(@Mock XMLMessageProd /** * DATAGO-134580: {@code start()} and {@code closeResources()} both reset the - * {@code producerNeedsRecreation} flag, so a stop/start lifecycle starts from a clean + * {@code recreateProducer} flag, so a stop/start lifecycle starts from a clean * state regardless of what happened in the prior lifecycle. Without that reset, a stop * after a stale-flow event followed by a fresh start would gratuitously recreate the * brand-new producer on the very first message. diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/spring/cloud/stream/binder/JCSMPProducerCloseFlowRecoveryIT.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/spring/cloud/stream/binder/JCSMPProducerCloseFlowRecoveryIT.java index 843e2afa2..65c50f733 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/spring/cloud/stream/binder/JCSMPProducerCloseFlowRecoveryIT.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/spring/cloud/stream/binder/JCSMPProducerCloseFlowRecoveryIT.java @@ -108,7 +108,7 @@ *

  • {@link #test_persistentQueuePublisher_recoversAcrossRepeatedMessageSpoolCliShutdowns persistentQueuePublisher_recoversAcrossRepeatedMessageSpoolCliShutdowns} * - Repeated bug reproducer. Same disruption as (4) but looped across multiple * consecutive shutdown/restore cycles on the same binding. Proves the - * {@code producerNeedsRecreation} flag resets correctly after each successful + * {@code recreateProducer} flag resets correctly after each successful * recreation and that no state accumulates on the binder. A regression that only reset * the flag once (e.g. a non-volatile read or a missed write in the success path) would * reproduce the bug from the second cycle onwards.
  • @@ -480,7 +480,7 @@ void test_persistentQueuePublisher_recoversAfterMessageSpoolCliShutdown( * single unsolicited {@code CloseFlow}. This test proves the same fix continues to work * across multiple consecutive stale-flow events on the same binding, without any * intermediate {@code stop() / start()}. Concretely it asserts the - * {@code producerNeedsRecreation} flag is correctly cleared after each successful + * {@code recreateProducer} flag is correctly cleared after each successful * recreation - if it weren't, the second cycle's bug-witness assertion would never see * a fresh stale exception (the recreated producer from cycle 1 is still considered * "the new producer" until something tells the handler otherwise). @@ -565,7 +565,7 @@ void test_persistentQueuePublisher_recoversAcrossRepeatedMessageSpoolCliShutdown // leave the producer permanently dead from cycle 2 onwards. assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload( "recovery-c" + currentCycle).build())) - .as("Cycle %d: publish must recover after broker CLI shutdown; producerNeedsRecreation flag must reset between cycles", currentCycle) + .as("Cycle %d: publish must recover after broker CLI shutdown; recreateProducer flag must reset between cycles", currentCycle) .doesNotThrowAnyException(); } From 134e7ef3afe62b39e8a887b24cc952a11dfdeb79 Mon Sep 17 00:00:00 2001 From: sunil-solace Date: Thu, 14 May 2026 16:52:38 -0400 Subject: [PATCH 04/13] DATAGO-134580: Proactively recreate producer on isClosed() pre-check Extend the recreate-on-stale guard with a proactive producer.isClosed() pre-check at the top of handleMessage(...). The reactive (catch-block) detection stays as-is and continues to cover the race window where the broker tears down the producer between our pre-check and the actual send(...) call, plus any future JCSMP exception subclass we don't enumerate. Customer-visible effect: the first publish after the broker fans out an unsolicited CloseFlow now succeeds rather than being surfaced to the error channel. Previously the reactive-only path always lost that first message - the catch armed the flag and the next message recovered, but the in-flight message that hit the dead producer always failed. Unit test test_producerRecreatedProactivelyWhenIsClosedDetectedBeforeSend mocks a producer that reports isClosed() == true on the first handleMessage, with no exception thrown by send, and verifies: - recreation happens (createProducer called twice: once at start, once proactively) - the fresh producer services the publish - the closed original is never sent through (Mockito.never()) - the closed original is closed during the recreate Integration test updates: the bug-witness assertions in tests 4 and 5 of JCSMPProducerCloseFlowRecoveryIT previously expected the first post-shutdown publish to throw a JCSMP-rooted MessagingException - that was the master-branch failure mode they documented. With the proactive check, the handler intercepts the dead producer before send(...) is ever called, so the first publish now succeeds transparently. The bug- witness phase is replaced with a doesNotThrowAnyException assertion on the first publish, plus a steady-state assertion on a follow-up publish. The new shape is strictly stricter than the old: a regression that removed the proactive check would fail because reactive-only recovery surfaces an exception on that first attempt; a regression that removed both halves would fail outright; a regression on cycle 2+ in test 5 would fail on the second cycle. Test-class Javadoc, per-test headers, and the test list have all been updated to drop the "reproducer"/"bug witness" framing - the bug doesn't manifest as a publish failure anymore, the IT now characterises the recovery contract instead. 411 binder-core unit tests still green (75 in JCSMPOutboundMessageHandlerTest, +1 from previous commit for the new proactive test). --- .../outbound/JCSMPOutboundMessageHandler.java | 4 +- .../JCSMPOutboundMessageHandlerTest.java | 35 ++++ .../JCSMPProducerCloseFlowRecoveryIT.java | 187 +++++++++--------- 3 files changed, 126 insertions(+), 100 deletions(-) diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandler.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandler.java index 5a497a183..1ad471449 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandler.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandler.java @@ -292,11 +292,11 @@ private void createProducerInternal() throws JCSMPException { } private void recreateProducerIfNeeded(ErrorChannelSendingCorrelationKey correlationKey) throws MessagingException { - if (!recreateProducer) { + if (!recreateProducer && (producer == null || !producer.isClosed())) { return; } synchronized (recreateLock) { - if (!recreateProducer) { + if (!recreateProducer && (producer == null || !producer.isClosed())) { return; } LOGGER.warn("Recreating JCSMP producer for binding {} after stale-flow detection ", diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java index 817d62f07..dd5922c6e 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java @@ -858,6 +858,41 @@ void test_producerRecreatedAfterJCSMPTransportException(@Mock XMLMessageProducer Mockito.verify(messageProducer, Mockito.atLeastOnce()).close(); } + /** + * DATAGO-134580: proactive {@code producer.isClosed()} pre-check. If the broker has + * already fanned out unsolicited CloseFlow to this binding's producer before + * any {@code handleMessage} runs (i.e. the recreate flag is not yet armed because no + * previous send has hit the catch block), the very first inbound message should still + * recreate the producer rather than attempting to send through a known-closed one. + * Without the pre-check the first message after every CloseFlow event would be lost to + * the error channel; with the pre-check that message publishes cleanly. + */ + @Test + void test_producerRecreatedProactivelyWhenIsClosedDetectedBeforeSend(@Mock XMLMessageProducer producerB) throws Exception { + Mockito.when(session.createProducer( + producerFlowPropertiesCaptor.capture(), pubEventHandlerCaptor.capture())) + .thenReturn(messageProducer) + .thenReturn(producerB); + + // Simulate the broker having torn down the flow asynchronously: producer reports + // itself as closed, but no send has yet hit the catch block to arm the flag. + Mockito.when(messageProducer.isClosed()).thenReturn(true); + + messageHandler.start(); + + Message firstMessage = MessageBuilder.withPayload("payload-1").build(); + messageHandler.handleMessage(firstMessage); + + // The first message must have triggered recreation (producer was visibly closed + // before send) and then succeeded against the fresh producer. + Mockito.verify(session, Mockito.times(2)) + .createProducer(any(ProducerFlowProperties.class), any(JCSMPStreamingPublishCorrelatingEventHandler.class)); + Mockito.verify(producerB, Mockito.times(1)).send(any(XMLMessage.class), any(Destination.class)); + // The original (closed) producer must never have been asked to send. + Mockito.verify(messageProducer, Mockito.never()).send(any(XMLMessage.class), any(Destination.class)); + Mockito.verify(messageProducer, Mockito.atLeastOnce()).close(); + } + /** * DATAGO-134580: when {@code session.createProducer(...)} itself fails during recreation * (e.g. the broker is still mid-restart from the message-spool shutdown), the in-flight diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/spring/cloud/stream/binder/JCSMPProducerCloseFlowRecoveryIT.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/spring/cloud/stream/binder/JCSMPProducerCloseFlowRecoveryIT.java index 65c50f733..a35ffbc89 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/spring/cloud/stream/binder/JCSMPProducerCloseFlowRecoveryIT.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/spring/cloud/stream/binder/JCSMPProducerCloseFlowRecoveryIT.java @@ -13,7 +13,6 @@ import com.solace.test.integration.semp.v2.SempV2Api; import com.solace.test.integration.semp.v2.config.model.ConfigMsgVpnQueue; import com.solacesystems.jcsmp.DeliveryMode; -import com.solacesystems.jcsmp.JCSMPException; import com.solacesystems.jcsmp.JCSMPFactory; import com.solacesystems.jcsmp.JCSMPProperties; import com.solacesystems.jcsmp.JCSMPSession; @@ -53,7 +52,6 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatCode; -import static org.assertj.core.api.Assertions.assertThatThrownBy; /** * Broker integration tests characterising producer behaviour around the @@ -67,10 +65,10 @@ * reproduce the bug. They are kept as long-term coverage so that any future broker behaviour * change (e.g. a future broker version starts ejecting already-bound publisher flows on a * spool-quota change) shows up immediately as a test failure rather than a silent regression. - * The fourth test is the bug reproducer that pins the exact customer-reported failure - * mode and is what the {@code JCSMPOutboundMessageHandler} fix must turn green. The fifth - * test loops the bug reproducer across multiple consecutive stale-flow cycles, proving the - * recreate-on-stale path resets cleanly between events and does not accumulate state. + * The fourth test drives the customer-reported broker disruption and asserts that the binding + * transparently recovers via the {@code JCSMPOutboundMessageHandler} fix. The fifth test + * loops the same disruption across multiple consecutive stale-flow cycles, proving the + * recreate path stays effective across repeated events on the same binding. * *
      *
    1. {@link #test_persistentTopicPublisher_survivesSpoolToggle persistentTopicPublisher_survivesSpoolToggle} @@ -94,24 +92,24 @@ * {@code CloseFlow}, so the bound publisher flow stays alive across the cycle.
    2. * *
    3. {@link #test_persistentQueuePublisher_recoversAfterMessageSpoolCliShutdown persistentQueuePublisher_recoversAfterMessageSpoolCliShutdown} - * - Bug reproducer. Persistent publishing to a Queue destination + a broker-level + * - Recovery test. Persistent publishing to a Queue destination + a broker-level * {@code hardware message-spool shutdown} / {@code no shutdown} cycle driven through * the broker's CLI via {@code docker exec}. This is the only mechanism that actually * fans out unsolicited {@code CloseFlow} (503 Service Unavailable) to every bound * publisher flow without dropping the JCSMP session - matching the customer's - * traceback exactly. The first post-shutdown publish surfaces - * {@link StaleSessionException}; on master the binding stays stuck in that state. After - * the {@code JCSMPOutboundMessageHandler} recreate-on-stale fix lands, the handler - * rebuilds the producer at the top of {@code handleMessage(...)} and the next publish - * succeeds.
    4. + * traceback exactly. On master without the fix the post-shutdown publish surfaces + * {@link StaleSessionException} and the binding stays stuck. With the fix the handler's + * proactive {@code producer.isClosed()} pre-check catches the dead producer at the top + * of {@code handleMessage(...)} and rebuilds it before send, so the publish recovers + * transparently on its first attempt. * *
    5. {@link #test_persistentQueuePublisher_recoversAcrossRepeatedMessageSpoolCliShutdowns persistentQueuePublisher_recoversAcrossRepeatedMessageSpoolCliShutdowns} - * - Repeated bug reproducer. Same disruption as (4) but looped across multiple - * consecutive shutdown/restore cycles on the same binding. Proves the - * {@code recreateProducer} flag resets correctly after each successful - * recreation and that no state accumulates on the binder. A regression that only reset - * the flag once (e.g. a non-volatile read or a missed write in the success path) would - * reproduce the bug from the second cycle onwards.
    6. + * - Repeated recovery test. Same disruption as (4) but looped across multiple + * consecutive shutdown/restore cycles on the same binding. Each cycle the broker tears + * down the current producer (the original in cycle 1, the recreated one in cycle 2, + * and so on) and the proactive check must catch each closure and rebuild. Guards + * against a regression where the recreate path becomes single-use or where state + * accumulates across cycles. *
    * *

    All five tests run in {@link ExecutionMode#SAME_THREAD} because each touches shared @@ -322,15 +320,16 @@ void test_persistentQueuePublisher_survivesQueueIngressEgressToggle( } /** - * Bug reproducer (4/5) - persistent queue publisher + broker-level message-spool shutdown. + * Recovery test (4/5) - persistent queue publisher + broker-level message-spool shutdown. * - *

    This is the precise customer-reported reproduction. The {@code hardware message-spool - * shutdown} / {@code no shutdown} CLI sequence is the only mechanism that fans out an - * unsolicited {@code CloseFlow} (503 Service Unavailable) to every currently-bound - * publisher flow on the broker without dropping the JCSMP session. SEMPv2-only - * approaches (zeroing the spool quota, toggling queue ingress/egress) either NACK - * publishes asynchronously without invalidating the flow, or also drop the session as a - * side-effect - neither matches the customer's actual failure mode. + *

    This drives the precise customer-reported failure scenario for DATAGO-134580. The + * {@code hardware message-spool shutdown} / {@code no shutdown} CLI sequence is the + * only mechanism that fans out an unsolicited {@code CloseFlow} (503 Service + * Unavailable) to every currently-bound publisher flow on the broker without + * dropping the JCSMP session. SEMPv2-only approaches (zeroing the spool quota, + * toggling queue ingress/egress) either NACK publishes asynchronously without + * invalidating the flow, or also drop the session as a side-effect - neither matches + * the customer's actual failure mode. * *

    The CLI is reached by finding the running PubSub+ test container via the docker * client that testcontainers already exposes, then {@code docker exec}ing the @@ -338,19 +337,20 @@ void test_persistentQueuePublisher_survivesQueueIngressEgressToggle( * stdin. We cannot avoid this layer of indirection because {@code PubSubPlusExtension} * does not expose the container as a parameter, and SEMPv2 has no equivalent action. * - *

    Two assertions: + *

    Assertions: *

      - *
    1. Bug witness - the first post-shutdown publish surfaces - * {@link MessagingException} with a {@link JCSMPException}-rooted cause. The - * customer's stack trace shows {@link StaleSessionException} wrapping a - * {@code JCSMPTransportException("Received unsolicited CloseFlow ... 503:Service - * Unavailable")}; depending on broker timing and JCSMP buffer state the test - * sometimes surfaces the inner {@code JCSMPTransportException} directly as the - * root cause instead. Both forms are manifestations of the same broker-driven - * flow teardown - we accept either via the common {@link JCSMPException} - * supertype.
    2. - *
    3. Recovery - the next publish completes cleanly only after the - * {@code JCSMPOutboundMessageHandler} recreate-on-stale fix lands.
    4. + *
    5. Proactive recovery - the first post-shutdown publish must succeed. By + * the time we reach this assertion JCSMP has fanned the unsolicited CloseFlow + * into the producer's actor thread (visible as {@code JCSMPTransportException: + * Received unsolicited CloseFlow ... 503} in test logs) so the binding's + * producer reference is closed. The handler's proactive + * {@code producer.isClosed()} pre-check at the top of {@code handleMessage(...)} + * detects this and rebuilds the producer before {@code send(...)} is ever + * attempted. On master without the fix this assertion fails with a + * {@link StaleSessionException}-rooted {@link MessagingException}.
    6. + *
    7. Steady state - a subsequent publish on the recreated producer also + * succeeds. Guards against a regression that leaves the producer in a + * half-built state.
    8. *
    */ @Test @@ -412,37 +412,36 @@ void test_persistentQueuePublisher_recoversAfterMessageSpoolCliShutdown( "message-spool", "no shutdown"); spoolRestored = true; - // Same probe again after re-enable - confirms the broker is back to a - // queryable / stable state before we attempt the bug-witness publish. + // Probe again after re-enable - confirms the broker is back to a queryable / + // stable state before we attempt the recovery publish. awaitBrokerSempResponsive(sempV2Api, vpnName); - // === Phase 2: assertion 1 - bug witness === - // The first post-shutdown publish must surface a JCSMP-rooted MessagingException. - // We accept any JCSMPException root cause (StaleSessionException, - // ClosedFacilityException, JCSMPTransportException) because the broker's CLI-driven - // teardown can surface as either the outer StaleSessionException form (matching - // the customer's stack trace verbatim) or the inner JCSMPTransportException form - // when JCSMP delivers the CloseFlow event before the producer's stale-marker has - // been propagated to the send() caller. Both forms prove the same thing - that - // the broker tore down the publisher flow and the binding's local producer is - // no longer usable. - logger.info("Attempting first post-shutdown publish; expecting a JCSMP-rooted failure"); - Awaitility.await("post-CLI-shutdown publish surfaces a JCSMP-rooted failure") - .atMost(Duration.ofSeconds(20)) - .pollInterval(Duration.ofMillis(500)) - .untilAsserted(() -> assertThatThrownBy(() -> - moduleOutputChannel.send(MessageBuilder.withPayload("witness-" + System.nanoTime()).build())) - .isInstanceOf(MessagingException.class) - .hasRootCauseInstanceOf(JCSMPException.class)); - - // === Phase 3: assertion 2 - recovery === - // Pre-fix (master): handler-local producer is still the dead one, this throws - // StaleSessionException - assertion fails. That failure IS the bug. - // Post-fix: handler detects the stale flag, recreates the producer and - // the publish succeeds. - logger.info("Attempting recovery publish; expecting success only with the fix in place"); - assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload("recovery").build())) - .as("Publish after broker message-spool CLI shutdown must recover; on master it stays stale and re-throws StaleSessionException") + // === Phase 2: recovery proof === + // The first post-shutdown publish MUST succeed. By this point JCSMP has fanned + // the unsolicited CloseFlow into the producer's actor thread (visible in test + // logs as "JCSMPTransportException: Received unsolicited CloseFlow ... 503"), + // so the binding's producer reference is now closed. The handler's proactive + // producer.isClosed() pre-check at the top of handleMessage() detects this and + // rebuilds the producer before send() is ever attempted - so the publish + // transparently recovers on its first attempt rather than the customer seeing + // a JCSMPException through the error channel. + // + // On master without the fix this assertion fails with a StaleSessionException + // rooted in the customer's exact traceback; with only the reactive (catch-block) + // half of the fix this would also fail because the catch arms the flag but the + // in-flight message itself is still surfaced to the error channel; the proactive + // pre-check is what makes this single assertion green. + logger.info("Attempting first post-shutdown publish; expecting proactive recovery"); + assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload("recovery-1").build())) + .as("First publish after broker CLI message-spool shutdown must recover via proactive isClosed() pre-check") + .doesNotThrowAnyException(); + + // === Phase 3: steady-state proof === + // A subsequent publish on the now-recreated producer must also succeed - guards + // against a regression where the recreate path inadvertently leaves the producer + // in a half-built state. + assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload("recovery-2").build())) + .as("Steady-state publish after recovery must continue to work") .doesNotThrowAnyException(); } finally { // Belt-and-suspenders: if assertion 1 timed out before we re-enabled the spool, @@ -473,23 +472,19 @@ void test_persistentQueuePublisher_recoversAfterMessageSpoolCliShutdown( } /** - * Repeated bug reproducer (5/5) - persistent queue publisher + N consecutive + * Repeated recovery test (5/5) - persistent queue publisher + N consecutive * broker-level message-spool shutdown / restore cycles on the same binding. * *

    Test 4 proves that the {@code JCSMPOutboundMessageHandler} fix recovers from a * single unsolicited {@code CloseFlow}. This test proves the same fix continues to work * across multiple consecutive stale-flow events on the same binding, without any - * intermediate {@code stop() / start()}. Concretely it asserts the - * {@code recreateProducer} flag is correctly cleared after each successful - * recreation - if it weren't, the second cycle's bug-witness assertion would never see - * a fresh stale exception (the recreated producer from cycle 1 is still considered - * "the new producer" until something tells the handler otherwise). - * - *

    This is what guards against a class of regression where the flag-reset is moved or - * weakened (e.g. reset only on {@code start()}, or reset on the recreation-attempt - * branch but missed on the recreation-success branch). The unit test - * {@code test_recreationFlagResetAcrossStopStartCycle} covers the lifecycle-driven - * reset; this IT covers the per-event reset against a real broker. + * intermediate {@code stop() / start()}. Each cycle the broker tears down the current + * producer (the original in cycle 1, the recreated one in cycle 2, and so on); the + * proactive {@code producer.isClosed()} pre-check must catch the closed producer at + * the top of every {@code handleMessage(...)} and rebuild it before send. A regression + * that prevented re-detection on the second-or-later cycle (e.g. an accumulated state + * bug, or the recreate flag becoming sticky) would surface here as the first cycle + * passing but later cycles failing. * *

    Three cycles is enough to expose state-accumulation bugs while keeping the test * runtime bounded: each cycle costs ~1 broker spool restart (~5-10s). @@ -544,28 +539,24 @@ void test_persistentQueuePublisher_recoversAcrossRepeatedMessageSpoolCliShutdown spoolRestored = true; awaitBrokerSempResponsive(sempV2Api, vpnName); - // Bug witness for THIS cycle. The handler is still holding a producer - // (either the original one in cycle 1, or the one recreated by cycle N-1's - // recovery publish in later cycles); the broker just tore it down. The - // next publish must surface a JCSMP-rooted failure - if it doesn't, the - // flag was never re-armed and the handler is no longer noticing - // stale-flow events after the first one. + // Recovery for THIS cycle. The handler is holding a producer (the original + // one in cycle 1, or the one recreated by cycle N-1's recovery publish in + // later cycles); the broker just tore that producer down via CloseFlow. + // The proactive producer.isClosed() pre-check at the top of handleMessage() + // must detect the closed producer and rebuild it BEFORE attempting the send - + // so this publish succeeds on its first attempt. A regression in either the + // proactive detection or the flag-reset-on-recreate would surface here as a + // thrown exception from the first cycle that hits it. final int currentCycle = cycle; - Awaitility.await(String.format("cycle %d: post-shutdown publish surfaces JCSMP failure", currentCycle)) - .atMost(Duration.ofSeconds(20)) - .pollInterval(Duration.ofMillis(500)) - .untilAsserted(() -> assertThatThrownBy(() -> - moduleOutputChannel.send(MessageBuilder.withPayload( - "witness-c" + currentCycle + "-" + System.nanoTime()).build())) - .isInstanceOf(MessagingException.class) - .hasRootCauseInstanceOf(JCSMPException.class)); - - // Recovery for THIS cycle. The fix must take effect on every cycle, not - // just the first - any regression that resets the flag only once would - // leave the producer permanently dead from cycle 2 onwards. assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload( "recovery-c" + currentCycle).build())) - .as("Cycle %d: publish must recover after broker CLI shutdown; recreateProducer flag must reset between cycles", currentCycle) + .as("Cycle %d: first publish after broker CLI shutdown must recover via proactive isClosed() pre-check", currentCycle) + .doesNotThrowAnyException(); + + // Steady-state publish on the now-recreated producer. + assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload( + "steady-c" + currentCycle).build())) + .as("Cycle %d: steady-state publish after recovery must continue to work", currentCycle) .doesNotThrowAnyException(); } From c89fcb4c0e9786e47f7c3226fe727ccbeb74b5af Mon Sep 17 00:00:00 2001 From: sunil-solace Date: Thu, 14 May 2026 17:19:30 -0400 Subject: [PATCH 05/13] DATAGO-134580: Recover error-queue producer from unsolicited CloseFlow The reactive + proactive recreate-on-stale logic added in PR #141 (commits 931f09c..134e7ef) protects each binding's per-binding XMLMessageProducer in JCSMPOutboundMessageHandler. The error-queue republish path in ErrorQueueInfrastructure has the same exposure but on a different producer: it borrows the session-default producer from JCSMPSessionProducerManager via producerManager.get(producerKey) and historically had no recovery logic when the broker tore that producer down via unsolicited CloseFlow. Failure mode without this fix: when the broker fans out CloseFlow (message-spool maintenance, DR failover, "503: Service Unavailable"), the shared session-default producer is marked closed by JCSMP. Every subsequent error-queue republish in ErrorQueueInfrastructure.send() throws StaleSessionException / JCSMPTransportException / ClosedFacilityException; ErrorQueueRepublishCorrelationKey.handleError() catches at the message-retry level and re-attempts up to maxDeliveryAttempts - all attempts re-using the same dead producer reference, all doomed to fail. After max attempts the message is re-queued onto the original consumer queue, the consumer redelivers it, fails again, hits the error-queue path again, fails again. Net effect: failed-consumer messages disappear from the system after a DR failover or spool maintenance event. The fix mirrors the outbound-handler approach: - Proactive: at the top of send(), after producerManager.get(...), check producer.isClosed(). If true, call the new producerManager.forceRecreate() to rebuild the shared producer before send is attempted. - Reactive: wrap producer.send(...) in a try-catch. On StaleSessionException, JCSMPTransportException, ClosedFacilityException, or post-failure producer.isClosed(), call forceRecreate() so the next ErrorQueueRepublishCorrelationKey retry-loop iteration picks up a fresh producer. The original exception still propagates so the retry caller can do its errorQueueDeliveryAttempt++ bookkeeping. The shared producer is reference-counted across the entire session (JCSMPOutboundMessageHandler also registers itself for ref-count purposes even though it uses its own per-binding producer for sends). release() + get() does NOT work as a recovery primitive in production because it only closes the resource when registeredIds.size() <= 1 - in any deployment with at least one outbound binding, the ref-count stays > 1 and release() leaves the dead resource in place. The new forceRecreate() in SharedResourceManager sidesteps the ref-count: it unconditionally closes the current resource and create()s a new one under the existing lock, leaving registrations intact so every already-registered caller picks up the fresh resource on their next get(). Added as a generic method on SharedResourceManager since the recovery contract is independent of the JCSMP specifics. Tests (ErrorQueueInfrastructureTest, new): - test_errorQueueProducerRecreatedProactivelyOnIsClosed: closed producer detected before send -> forceRecreate -> fresh producer services the publish; stale producer never sent through (Mockito.never()). - test_errorQueueProducerRecreatedReactivelyOnStaleSendException: @CartesianTest over Stale / JCSMPTransport / ClosedFacility - verifies all three exception types trigger forceRecreate AND propagate to the retry caller (so handleError can drive its loop). - test_errorQueueProducerNotRecreatedOnUnrelatedJCSMPException: negative control - a non-stale JCSMPException (e.g. malformed message) propagates normally and does NOT churn the shared producer, guarding against an over-broad reactive arm. 417 binder-core unit tests green (was 411 + 6 new from this commit). This branch is layered on DATAGO-134580 (PR #141) so the new SharedResourceManager.forceRecreate() and the ErrorQueueInfrastructure changes can be reviewed alongside the related outbound-handler work. --- .../binder/util/ErrorQueueInfrastructure.java | 30 +++- .../binder/util/SharedResourceManager.java | 25 ++++ .../util/ErrorQueueInfrastructureTest.java | 139 ++++++++++++++++++ 3 files changed, 193 insertions(+), 1 deletion(-) create mode 100644 solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/util/ErrorQueueInfrastructureTest.java diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/util/ErrorQueueInfrastructure.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/util/ErrorQueueInfrastructure.java index 0a23a1bca..28915caf0 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/util/ErrorQueueInfrastructure.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/util/ErrorQueueInfrastructure.java @@ -1,9 +1,12 @@ package com.solace.spring.cloud.stream.binder.util; import com.solace.spring.cloud.stream.binder.properties.SolaceConsumerProperties; +import com.solacesystems.jcsmp.ClosedFacilityException; import com.solacesystems.jcsmp.JCSMPException; import com.solacesystems.jcsmp.JCSMPFactory; +import com.solacesystems.jcsmp.JCSMPTransportException; import com.solacesystems.jcsmp.Queue; +import com.solacesystems.jcsmp.StaleSessionException; import com.solacesystems.jcsmp.XMLMessage; import com.solacesystems.jcsmp.XMLMessageProducer; import org.slf4j.Logger; @@ -27,6 +30,8 @@ public ErrorQueueInfrastructure(JCSMPSessionProducerManager producerManager, Str this.consumerProperties = consumerProperties; } + // DATAGO-134580: recreate shared JCSMP producer on unsolicited termination from Solace broker. + public void send(MessageContainer messageContainer, ErrorQueueRepublishCorrelationKey key) throws JCSMPException { XMLMessage xmlMessage = xmlMessageMapper.mapError(messageContainer.getMessage(), consumerProperties); xmlMessage.setCorrelationKey(key); @@ -34,6 +39,11 @@ public void send(MessageContainer messageContainer, ErrorQueueRepublishCorrelati XMLMessageProducer producer; try { producer = producerManager.get(producerKey); + if (producer.isClosed()) { + LOGGER.warn("Detected closed shared JCSMP producer before sending to error queue {}; recreating", + errorQueueName); + producer = producerManager.forceRecreate(); + } } catch (Exception e) { MessagingException wrappedException = new MessagingException( String.format("Failed to get producer to send message %s to queue %s", xmlMessage.getMessageId(), @@ -42,7 +52,25 @@ public void send(MessageContainer messageContainer, ErrorQueueRepublishCorrelati throw wrappedException; } - producer.send(xmlMessage, queue); + try { + producer.send(xmlMessage, queue); + } catch (JCSMPException e) { + if (e instanceof StaleSessionException + || e instanceof JCSMPTransportException + || e instanceof ClosedFacilityException + || producer.isClosed()) { + LOGGER.warn("Detected stale shared JCSMP producer while sending to error queue {} (cause: {}); " + + "recreating for next attempt", + errorQueueName, e.getClass().getSimpleName()); + try { + producerManager.forceRecreate(); + } catch (Exception recreateError) { + LOGGER.warn("Failed to recreate shared JCSMP producer after stale-flow detection", recreateError); + e.addSuppressed(recreateError); + } + } + throw e; + } } public ErrorQueueRepublishCorrelationKey createCorrelationKey(MessageContainer messageContainer, diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/util/SharedResourceManager.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/util/SharedResourceManager.java index a6279c081..c0e382393 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/util/SharedResourceManager.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/util/SharedResourceManager.java @@ -43,6 +43,31 @@ public T get(String key) throws Exception { return sharedResource; } + /** + * Force-replace the shared resource. Closes the existing instance (if any) and + * {@link #create()}s a new one, regardless of how many callers are currently + * registered. Existing registrations are preserved, so subsequent {@link #get(String)} + * calls from any registered caller return the new resource. Intended for recovery + * paths where a caller has detected the shared resource is no longer usable (e.g. + * the underlying broker tore down the flow via unsolicited CloseFlow). + * + * @return the freshly-created shared resource + * @throws Exception whatever exception may be thrown by {@link #create()} + */ + public T forceRecreate() throws Exception { + synchronized (lock) { + if (sharedResource != null) { + try { + close(); + } catch (Exception e) { + LOGGER.debug("Failed to close stale {} during forceRecreate", type, e); + } + } + sharedResource = create(); + return sharedResource; + } + } + /** * De-register {@code key} from the shared resource. *

    If this is the last {@code key} associated to the shared resource, {@link #close()} the resource. diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/util/ErrorQueueInfrastructureTest.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/util/ErrorQueueInfrastructureTest.java new file mode 100644 index 000000000..1764d06cb --- /dev/null +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/util/ErrorQueueInfrastructureTest.java @@ -0,0 +1,139 @@ +package com.solace.spring.cloud.stream.binder.util; + +import com.solace.spring.cloud.stream.binder.properties.SolaceConsumerProperties; +import com.solacesystems.jcsmp.BytesXMLMessage; +import com.solacesystems.jcsmp.ClosedFacilityException; +import com.solacesystems.jcsmp.Destination; +import com.solacesystems.jcsmp.JCSMPException; +import com.solacesystems.jcsmp.JCSMPFactory; +import com.solacesystems.jcsmp.JCSMPTransportException; +import com.solacesystems.jcsmp.StaleSessionException; +import com.solacesystems.jcsmp.XMLMessage; +import com.solacesystems.jcsmp.XMLMessageProducer; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.junitpioneer.jupiter.cartesian.CartesianTest; +import org.junitpioneer.jupiter.cartesian.CartesianTest.Values; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.junit.jupiter.MockitoExtension; + +import static org.assertj.core.api.Assertions.assertThatCode; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.mockito.ArgumentMatchers.any; + +/** + * Unit tests for the DATAGO-134580 stale-flow recovery added to {@link ErrorQueueInfrastructure}. + * The error-queue republish path borrows the session-default producer from + * {@link JCSMPSessionProducerManager} and historically had no recovery logic when the broker + * fanned out an unsolicited CloseFlow on that producer (reactive recreation in + * {@code JCSMPOutboundMessageHandler} only protects per-binding producers, not the shared + * session-default one). + */ +@ExtendWith(MockitoExtension.class) +class ErrorQueueInfrastructureTest { + private static final String PRODUCER_KEY = "test-producer-key"; + private static final String ERROR_QUEUE_NAME = "test-error-queue"; + + @Mock JCSMPSessionProducerManager producerManager; + @Mock MessageContainer messageContainer; + @Mock ErrorQueueRepublishCorrelationKey correlationKey; + + BytesXMLMessage inputMessage; + SolaceConsumerProperties consumerProperties; + ErrorQueueInfrastructure errorQueueInfrastructure; + + @BeforeEach + void setup() { + inputMessage = JCSMPFactory.onlyInstance().createMessage(BytesXMLMessage.class); + consumerProperties = new SolaceConsumerProperties(); + errorQueueInfrastructure = new ErrorQueueInfrastructure( + producerManager, PRODUCER_KEY, ERROR_QUEUE_NAME, consumerProperties); + Mockito.when(messageContainer.getMessage()).thenReturn(inputMessage); + } + + /** + * DATAGO-134580: proactive {@code producer.isClosed()} pre-check on the error-queue + * republish path. If the broker has already torn down the shared session-default + * producer before this {@code send(...)} runs, the very first error-queue publish + * should still succeed - the manager is asked to recreate the producer before send is + * attempted, and the fresh producer services the publish. + */ + @Test + void test_errorQueueProducerRecreatedProactivelyOnIsClosed( + @Mock XMLMessageProducer staleProducer, + @Mock XMLMessageProducer freshProducer) throws Exception { + Mockito.when(producerManager.get(PRODUCER_KEY)).thenReturn(staleProducer); + Mockito.when(staleProducer.isClosed()).thenReturn(true); + Mockito.when(producerManager.forceRecreate()).thenReturn(freshProducer); + + assertThatCode(() -> errorQueueInfrastructure.send(messageContainer, correlationKey)) + .as("Proactive recreate must allow the publish to succeed on the fresh producer") + .doesNotThrowAnyException(); + + Mockito.verify(producerManager).forceRecreate(); + Mockito.verify(freshProducer).send(any(XMLMessage.class), any(Destination.class)); + Mockito.verify(staleProducer, Mockito.never()).send(any(XMLMessage.class), any(Destination.class)); + } + + /** + * DATAGO-134580: reactive recreation when {@code send(...)} itself throws a + * stale-flow exception. The race window between our proactive {@code isClosed()} + * check and the actual send means the broker can tear the producer down mid-flight; + * in that case the exception must propagate so {@code ErrorQueueRepublishCorrelationKey} + * can retry, and the manager must be force-recreated so the next retry attempt picks up + * a fresh producer rather than re-using the dead one. + * + *

    Parameterized over the three concrete JCSMP exception types we treat as + * stale-flow signals - the recovery contract must apply to all of them. + */ + @CartesianTest(name = "[{index}] exception={0}") + void test_errorQueueProducerRecreatedReactivelyOnStaleSendException( + @Values(strings = {"stale", "transport", "closed-facility"}) String exceptionType, + @Mock XMLMessageProducer staleProducer) throws Exception { + Mockito.when(producerManager.get(PRODUCER_KEY)).thenReturn(staleProducer); + Mockito.when(staleProducer.isClosed()).thenReturn(false); + + JCSMPException sendError = switch (exceptionType) { + case "stale" -> new StaleSessionException( + "Tried to perform operation on a closed XML message producer", + new JCSMPException("Received unsolicited CloseFlow for producer (503:Service Unavailable).")); + case "transport" -> new JCSMPTransportException( + "Received unsolicited CloseFlow for producer (503:Service Unavailable)."); + case "closed-facility" -> new ClosedFacilityException("Producer is closed"); + default -> throw new IllegalArgumentException("unknown exception type: " + exceptionType); + }; + Mockito.doThrow(sendError).when(staleProducer).send(any(XMLMessage.class), any(Destination.class)); + + assertThatThrownBy(() -> errorQueueInfrastructure.send(messageContainer, correlationKey)) + .as("Stale-flow send failure must propagate so the retry caller can re-attempt") + .isInstanceOf(sendError.getClass()); + + // The manager must have been asked to forceRecreate so the next retry by + // ErrorQueueRepublishCorrelationKey gets a fresh producer instead of the dead one. + Mockito.verify(producerManager).forceRecreate(); + } + + /** + * DATAGO-134580: a non-stale {@code JCSMPException} from {@code send(...)} must + * propagate normally and must not trigger a producer recreate. Guards + * against an over-broad reactive arm that would churn the shared producer on + * every transient publish error (e.g. a malformed message). + */ + @Test + void test_errorQueueProducerNotRecreatedOnUnrelatedJCSMPException( + @Mock XMLMessageProducer producer) throws Exception { + Mockito.when(producerManager.get(PRODUCER_KEY)).thenReturn(producer); + Mockito.when(producer.isClosed()).thenReturn(false); + + JCSMPException unrelated = new JCSMPException("Some unrelated publishing error"); + Mockito.doThrow(unrelated).when(producer).send(any(XMLMessage.class), any(Destination.class)); + + assertThatThrownBy(() -> errorQueueInfrastructure.send(messageContainer, correlationKey)) + .isInstanceOf(JCSMPException.class) + .hasMessage("Some unrelated publishing error"); + + Mockito.verify(producerManager, Mockito.never()).forceRecreate(); + } +} \ No newline at end of file From 673217884e66ed94f66af6a7d929a550ab177d16 Mon Sep 17 00:00:00 2001 From: sunil-solace Date: Thu, 14 May 2026 17:44:34 -0400 Subject: [PATCH 06/13] DATAGO-134580: Apply CAS semantics to forceRecreate + generic docs Three PR #142 review items: C1 (Copilot) + C3 (mayur-solace) - race in forceRecreate(). The original unconditional implementation could have two callers both observe the same stale shared resource, both enter forceRecreate(), and have the second caller close a healthy replacement that the first caller just installed. Fix: compare-and-swap. forceRecreate now takes an `expected` argument - the reference the caller observed. Under the lock, the manager recreates only if `sharedResource == expected`; otherwise it returns whatever a concurrent caller already installed without closing or re-creating anything. The caller-visible contract is now: pass what you observed, use what's returned. C2 (mayur-solace) - Javadoc on SharedResourceManager.forceRecreate referenced the broker / CloseFlow concern specifically. Since SharedResourceManager is generic and could host non-broker resources in the future, the docs are rewritten to describe the CAS contract generically without naming the JCSMP/broker context. ErrorQueueInfrastructure.send() updated at both call sites to pass the observed producer reference and use the value returned by forceRecreate (which may be the fresh one we requested, or the already-installed replacement another caller put in place). New unit test testErrorQueueProducerUsesManagerReturnedReferenceAfterForceRecreate simulates the exact race C1 flagged: stale producer observed, manager's CAS returns an already-installed replacement, send must use the replacement rather than the locally-observed stale one. Existing tests updated to pass the observed reference and verify CAS arguments. Also aligned the test method names to drop the test_ underscore form, matching the no-underscore convention used elsewhere in the binder-core test suite (e.g. SolaceErrorMessageHandlerTest). 418 binder-core unit tests green (was 417 + 1 new CAS-race test). --- .../binder/util/ErrorQueueInfrastructure.java | 4 +- .../binder/util/SharedResourceManager.java | 34 +++++++++---- .../util/ErrorQueueInfrastructureTest.java | 51 +++++++++++++++---- 3 files changed, 69 insertions(+), 20 deletions(-) diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/util/ErrorQueueInfrastructure.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/util/ErrorQueueInfrastructure.java index 28915caf0..5b1311046 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/util/ErrorQueueInfrastructure.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/util/ErrorQueueInfrastructure.java @@ -42,7 +42,7 @@ public void send(MessageContainer messageContainer, ErrorQueueRepublishCorrelati if (producer.isClosed()) { LOGGER.warn("Detected closed shared JCSMP producer before sending to error queue {}; recreating", errorQueueName); - producer = producerManager.forceRecreate(); + producer = producerManager.forceRecreate(producer); } } catch (Exception e) { MessagingException wrappedException = new MessagingException( @@ -63,7 +63,7 @@ public void send(MessageContainer messageContainer, ErrorQueueRepublishCorrelati "recreating for next attempt", errorQueueName, e.getClass().getSimpleName()); try { - producerManager.forceRecreate(); + producerManager.forceRecreate(producer); } catch (Exception recreateError) { LOGGER.warn("Failed to recreate shared JCSMP producer after stale-flow detection", recreateError); e.addSuppressed(recreateError); diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/util/SharedResourceManager.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/util/SharedResourceManager.java index c0e382393..b7908b2a7 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/util/SharedResourceManager.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/util/SharedResourceManager.java @@ -44,23 +44,39 @@ public T get(String key) throws Exception { } /** - * Force-replace the shared resource. Closes the existing instance (if any) and - * {@link #create()}s a new one, regardless of how many callers are currently - * registered. Existing registrations are preserved, so subsequent {@link #get(String)} - * calls from any registered caller return the new resource. Intended for recovery - * paths where a caller has detected the shared resource is no longer usable (e.g. - * the underlying broker tore down the flow via unsolicited CloseFlow). + * Conditionally replace the shared resource using compare-and-swap semantics. * - * @return the freshly-created shared resource + *

    If the manager still holds the {@code expected} reference, the existing + * resource is closed and a fresh one is {@link #create()}d. If the manager + * already holds a different reference - because a concurrent caller has already + * replaced it - this is a no-op and the currently-installed resource is + * returned. This prevents two callers that observed the same stale resource + * from both recreating: the second caller sees that the resource has already + * changed and uses the replacement rather than closing a potentially in-use + * resource that the first caller installed. + * + *

    Existing registrations are preserved, so subsequent {@link #get(String)} + * calls from any registered caller return the (possibly newly-installed) + * resource. + * + * @param expected the resource reference the caller observed and considers no + * longer usable; pass the value previously returned by + * {@link #get(String)} or by an earlier call to this method + * @return the resource currently installed in the manager - either the + * freshly-created one (if the swap happened) or whatever a concurrent + * caller installed (if it did not) * @throws Exception whatever exception may be thrown by {@link #create()} */ - public T forceRecreate() throws Exception { + public T forceRecreate(T expected) throws Exception { synchronized (lock) { + if (sharedResource != expected) { + return sharedResource; + } if (sharedResource != null) { try { close(); } catch (Exception e) { - LOGGER.debug("Failed to close stale {} during forceRecreate", type, e); + LOGGER.debug("Failed to close current {} during forceRecreate", type, e); } } sharedResource = create(); diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/util/ErrorQueueInfrastructureTest.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/util/ErrorQueueInfrastructureTest.java index 1764d06cb..299246722 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/util/ErrorQueueInfrastructureTest.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/util/ErrorQueueInfrastructureTest.java @@ -61,18 +61,20 @@ void setup() { * attempted, and the fresh producer services the publish. */ @Test - void test_errorQueueProducerRecreatedProactivelyOnIsClosed( + void testErrorQueueProducerRecreatedProactivelyOnIsClosed( @Mock XMLMessageProducer staleProducer, @Mock XMLMessageProducer freshProducer) throws Exception { Mockito.when(producerManager.get(PRODUCER_KEY)).thenReturn(staleProducer); Mockito.when(staleProducer.isClosed()).thenReturn(true); - Mockito.when(producerManager.forceRecreate()).thenReturn(freshProducer); + Mockito.when(producerManager.forceRecreate(staleProducer)).thenReturn(freshProducer); assertThatCode(() -> errorQueueInfrastructure.send(messageContainer, correlationKey)) .as("Proactive recreate must allow the publish to succeed on the fresh producer") .doesNotThrowAnyException(); - Mockito.verify(producerManager).forceRecreate(); + // CAS contract: caller passes the observed (stale) reference so the manager only + // recreates if it still holds that exact instance. + Mockito.verify(producerManager).forceRecreate(staleProducer); Mockito.verify(freshProducer).send(any(XMLMessage.class), any(Destination.class)); Mockito.verify(staleProducer, Mockito.never()).send(any(XMLMessage.class), any(Destination.class)); } @@ -89,7 +91,7 @@ void test_errorQueueProducerRecreatedProactivelyOnIsClosed( * stale-flow signals - the recovery contract must apply to all of them. */ @CartesianTest(name = "[{index}] exception={0}") - void test_errorQueueProducerRecreatedReactivelyOnStaleSendException( + void testErrorQueueProducerRecreatedReactivelyOnStaleSendException( @Values(strings = {"stale", "transport", "closed-facility"}) String exceptionType, @Mock XMLMessageProducer staleProducer) throws Exception { Mockito.when(producerManager.get(PRODUCER_KEY)).thenReturn(staleProducer); @@ -110,9 +112,10 @@ void test_errorQueueProducerRecreatedReactivelyOnStaleSendException( .as("Stale-flow send failure must propagate so the retry caller can re-attempt") .isInstanceOf(sendError.getClass()); - // The manager must have been asked to forceRecreate so the next retry by - // ErrorQueueRepublishCorrelationKey gets a fresh producer instead of the dead one. - Mockito.verify(producerManager).forceRecreate(); + // The manager must have been asked to forceRecreate (with the observed stale + // producer for CAS semantics) so the next retry by ErrorQueueRepublishCorrelationKey + // gets a fresh producer instead of the dead one. + Mockito.verify(producerManager).forceRecreate(staleProducer); } /** @@ -122,7 +125,7 @@ void test_errorQueueProducerRecreatedReactivelyOnStaleSendException( * every transient publish error (e.g. a malformed message). */ @Test - void test_errorQueueProducerNotRecreatedOnUnrelatedJCSMPException( + void testErrorQueueProducerNotRecreatedOnUnrelatedJCSMPException( @Mock XMLMessageProducer producer) throws Exception { Mockito.when(producerManager.get(PRODUCER_KEY)).thenReturn(producer); Mockito.when(producer.isClosed()).thenReturn(false); @@ -134,6 +137,36 @@ void test_errorQueueProducerNotRecreatedOnUnrelatedJCSMPException( .isInstanceOf(JCSMPException.class) .hasMessage("Some unrelated publishing error"); - Mockito.verify(producerManager, Mockito.never()).forceRecreate(); + Mockito.verify(producerManager, Mockito.never()).forceRecreate(any()); + } + + /** + * DATAGO-134580: CAS contract verification. When two callers both observe the + * same stale producer and both call {@code forceRecreate(stale)}, the manager + * recreates exactly once - the second call returns the already-recreated + * resource without closing it. {@code ErrorQueueInfrastructure.send(...)} must + * use the value returned by {@code forceRecreate} (rather than its own observed + * reference) so it ends up using whatever the manager currently holds, not a + * resource that another caller has since closed and replaced. + */ + @Test + void testErrorQueueProducerUsesManagerReturnedReferenceAfterForceRecreate( + @Mock XMLMessageProducer staleProducer, + @Mock XMLMessageProducer alreadyRecreatedByAnotherCaller) throws Exception { + Mockito.when(producerManager.get(PRODUCER_KEY)).thenReturn(staleProducer); + Mockito.when(staleProducer.isClosed()).thenReturn(true); + // Simulate the CAS no-op outcome: another caller already replaced the stale + // producer, so the manager's CAS does not recreate again - it returns the + // already-installed replacement instead. + Mockito.when(producerManager.forceRecreate(staleProducer)) + .thenReturn(alreadyRecreatedByAnotherCaller); + + assertThatCode(() -> errorQueueInfrastructure.send(messageContainer, correlationKey)) + .as("send must use the manager-returned reference (the already-installed replacement) " + + "and not the locally-observed stale reference") + .doesNotThrowAnyException(); + + Mockito.verify(alreadyRecreatedByAnotherCaller).send(any(XMLMessage.class), any(Destination.class)); + Mockito.verify(staleProducer, Mockito.never()).send(any(XMLMessage.class), any(Destination.class)); } } \ No newline at end of file From cedd2f202b44ec79b1535445aaef87765bf71f94 Mon Sep 17 00:00:00 2001 From: sunil-solace Date: Thu, 14 May 2026 18:08:03 -0400 Subject: [PATCH 07/13] DATAGO-134580: Trim forceRecreate Javadoc to essentials Per PR #142 follow-up: the previous Javadoc (24 lines, two paragraphs of explanation) was verbose for an IDE hover. Reduced to a single sentence describing the CAS contract plus the standard param/return/throws. --- .../binder/util/SharedResourceManager.java | 27 +++++-------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/util/SharedResourceManager.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/util/SharedResourceManager.java index b7908b2a7..38fdd509b 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/util/SharedResourceManager.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/util/SharedResourceManager.java @@ -44,28 +44,13 @@ public T get(String key) throws Exception { } /** - * Conditionally replace the shared resource using compare-and-swap semantics. + * Compare-and-swap the shared resource. If the manager still holds {@code expected}, + * close it and {@link #create()} a fresh one; otherwise return the currently-installed + * resource without re-creating. * - *

    If the manager still holds the {@code expected} reference, the existing - * resource is closed and a fresh one is {@link #create()}d. If the manager - * already holds a different reference - because a concurrent caller has already - * replaced it - this is a no-op and the currently-installed resource is - * returned. This prevents two callers that observed the same stale resource - * from both recreating: the second caller sees that the resource has already - * changed and uses the replacement rather than closing a potentially in-use - * resource that the first caller installed. - * - *

    Existing registrations are preserved, so subsequent {@link #get(String)} - * calls from any registered caller return the (possibly newly-installed) - * resource. - * - * @param expected the resource reference the caller observed and considers no - * longer usable; pass the value previously returned by - * {@link #get(String)} or by an earlier call to this method - * @return the resource currently installed in the manager - either the - * freshly-created one (if the swap happened) or whatever a concurrent - * caller installed (if it did not) - * @throws Exception whatever exception may be thrown by {@link #create()} + * @param expected the reference the caller observed and considers no longer usable + * @return the resource currently installed in the manager + * @throws Exception whatever {@link #create()} may throw */ public T forceRecreate(T expected) throws Exception { synchronized (lock) { From 50a01c557a3a8675479368bc634adc8d46ac49cb Mon Sep 17 00:00:00 2001 From: sunil-solace Date: Mon, 18 May 2026 23:36:10 -0400 Subject: [PATCH 08/13] DATAGO-134580: Trim verbose test docs and comments Collapse multi-paragraph Javadoc and inline narrative on the DATAGO-134580 test additions to one-line summaries; drop the two unused imports that the trimmed Javadoc had been carrying. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../JCSMPOutboundMessageHandlerTest.java | 139 +------- .../config/BrokerConfiguratorBuilder.java | 26 +- .../JCSMPProducerCloseFlowRecoveryIT.java | 298 ++---------------- 3 files changed, 38 insertions(+), 425 deletions(-) diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java index dd5922c6e..413faec90 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java @@ -687,30 +687,7 @@ void testGetBindingName() { assertThat(messageHandler.getBindingName()).isNotEmpty().isEqualTo(producerProperties.getBindingName()); } - /** - * DATAGO-134580: Reproduces the bug where an unsolicited {@code CloseFlow} from the broker - * leaves the per-binding {@link XMLMessageProducer} terminally closed. JCSMP throws - * {@link StaleSessionException} on every subsequent send, and today the binding can only - * recover via an app restart. - * - *

    Expected (post-fix) behavior: - *

      - *
    1. The send that triggers {@code StaleSessionException} surfaces a {@link MessagingException} - * (current behavior preserved).
    2. - *
    3. The handler remains running.
    4. - *
    5. The next {@code handleMessage} call recreates the producer via - * {@code session.createProducer(...)} and successfully publishes on the new producer.
    6. - *
    - * - *

    Cartesian over {@code transacted = {false, true}} so both branches of the - * recreate code path ({@code jcsmpSession.createProducer(...)} for direct producers and - * {@code jcsmpSession.createTransactedSession()} -> {@code transactedSession.createProducer(...)} - * for transacted producers) are exercised. The transacted branch additionally closes - * the dead {@code TransactedSession} and recreates a fresh one. - * - *

    On {@code master} (pre-fix) this test fails at step 3: only one producer is ever created - * and the second send re-throws {@code StaleSessionException}. - */ + /** DATAGO-134580: stale producer (CloseFlow) is recreated on the next send. */ @CartesianTest(name = "[{index}] transacted={0}") public void test_producerRecreatedAfterUnsolicitedCloseFlow( @Values(booleans = {false, true}) boolean transacted, @@ -719,25 +696,18 @@ public void test_producerRecreatedAfterUnsolicitedCloseFlow( producerProperties.getExtension().setTransacted(transacted); if (transacted) { - // First createTransactedSession() returns the original mock from init(); the - // second (after stale detection) returns a fresh transactedSessionB. The original - // transactedSession.createProducer(...) keeps the init() lenient stub that returns - // messageProducer; the new transactedSessionB.createProducer(...) returns producerB. Mockito.when(session.createTransactedSession()) .thenReturn(transactedSession) .thenReturn(transactedSessionB); Mockito.when(transactedSessionB.createProducer(any(ProducerFlowProperties.class), any(JCSMPStreamingPublishCorrelatingEventHandler.class))).thenReturn(producerB); } else { - // Non-transacted: two producers in sequence directly from the session. Mockito.when(session.createProducer( producerFlowPropertiesCaptor.capture(), pubEventHandlerCaptor.capture())) .thenReturn(messageProducer) .thenReturn(producerB); } - // Simulate the broker-driven CloseFlow: the next send() on the existing producer throws - // StaleSessionException (wrapping the original transport exception JCSMP recorded async). StaleSessionException stale = new StaleSessionException( "Tried to perform operation on a closed XML message producer", new JCSMPException("Received unsolicited CloseFlow for producer (503:Service Unavailable).")); @@ -745,53 +715,32 @@ public void test_producerRecreatedAfterUnsolicitedCloseFlow( messageHandler.start(); - // 1st publish — the underlying producer is terminally closed; we expect the binding - // to surface the failure to the caller but stay alive. We assert hasCauseInstanceOf - // (not hasRootCauseInstanceOf) because StaleSessionException itself wraps a deeper - // JCSMPException / JCSMPTransportException cause - the root would be that inner one, - // but the diagnostic exception we care about preserving is StaleSessionException. Message firstMessage = MessageBuilder.withPayload("payload-1").build(); assertThatThrownBy(() -> messageHandler.handleMessage(firstMessage)) .isInstanceOf(MessagingException.class) .hasCauseInstanceOf(StaleSessionException.class); - assertThat(messageHandler.isRunning()) - .as("Handler must remain running so it can recover on the next send") - .isTrue(); + assertThat(messageHandler.isRunning()).isTrue(); - // 2nd publish — the fix should detect the stale producer, close it, and create a fresh one. Message secondMessage = MessageBuilder.withPayload("payload-2").build(); messageHandler.handleMessage(secondMessage); - // Recreation evidence depends on which producer path the binding uses. if (transacted) { - // Two transacted sessions ever created (initial + recreated). The old one is - // closed and the new one services the second publish. Mockito.verify(session, Mockito.times(2)).createTransactedSession(); Mockito.verify(transactedSession, Mockito.atLeastOnce()).close(); Mockito.verify(transactedSessionB, Mockito.times(1)) .createProducer(any(ProducerFlowProperties.class), any(JCSMPStreamingPublishCorrelatingEventHandler.class)); } else { - // Non-transacted: two producers ever created (initial + recreated). Mockito.verify(session, Mockito.times(2)) .createProducer(any(ProducerFlowProperties.class), any(JCSMPStreamingPublishCorrelatingEventHandler.class)); } - // The recreated producer must have received the second publish, and the dead one - // must not have been re-used. The dead producer is also closed during recreation. Mockito.verify(producerB, Mockito.times(1)).send(any(XMLMessage.class), any(Destination.class)); Mockito.verify(messageProducer, Mockito.times(1)).send(any(XMLMessage.class), any(Destination.class)); Mockito.verify(messageProducer, Mockito.atLeastOnce()).close(); } - /** - * DATAGO-134580 (companion of {@link #test_producerRecreatedAfterUnsolicitedCloseFlow}): - * the catch block in {@code handleMessage} triggers recreation on BOTH - * {@link StaleSessionException} and {@link ClosedFacilityException}. This proves the OR - * branch is not dead code - a producer surfacing - * {@code ClosedFacilityException("Producer is closed")} on send must also lead to a - * recreated producer on the next message. - */ + /** DATAGO-134580: {@link ClosedFacilityException} on send also triggers recreation. */ @Test void test_producerRecreatedAfterClosedFacilityException(@Mock XMLMessageProducer producerB) throws Exception { Mockito.when(session.createProducer( @@ -818,19 +767,7 @@ void test_producerRecreatedAfterClosedFacilityException(@Mock XMLMessageProducer Mockito.verify(messageProducer, Mockito.atLeastOnce()).close(); } - /** - * DATAGO-134580 (companion of {@link #test_producerRecreatedAfterUnsolicitedCloseFlow}): - * the catch block in {@code handleMessage} also recreates on - * {@link JCSMPTransportException}. The broker IT - * ({@code JCSMPProducerCloseFlowRecoveryIT}) documents that when the broker fans out an - * unsolicited CloseFlow, the send-path failure can surface as either the outer - * {@link StaleSessionException} form (covered by - * {@link #test_producerRecreatedAfterUnsolicitedCloseFlow}) or the raw transport-level - * {@link JCSMPTransportException} form, depending on timing between the broker's flow - * teardown and JCSMP's stale-marker propagation to the send caller. Both must lead to - * recreation; without this coverage a regression that drops the transport-exception arm - * would silently regress the bug for that timing variant. - */ + /** DATAGO-134580: {@link JCSMPTransportException} on send also triggers recreation. */ @Test void test_producerRecreatedAfterJCSMPTransportException(@Mock XMLMessageProducer producerB) throws Exception { Mockito.when(session.createProducer( @@ -858,15 +795,7 @@ void test_producerRecreatedAfterJCSMPTransportException(@Mock XMLMessageProducer Mockito.verify(messageProducer, Mockito.atLeastOnce()).close(); } - /** - * DATAGO-134580: proactive {@code producer.isClosed()} pre-check. If the broker has - * already fanned out unsolicited CloseFlow to this binding's producer before - * any {@code handleMessage} runs (i.e. the recreate flag is not yet armed because no - * previous send has hit the catch block), the very first inbound message should still - * recreate the producer rather than attempting to send through a known-closed one. - * Without the pre-check the first message after every CloseFlow event would be lost to - * the error channel; with the pre-check that message publishes cleanly. - */ + /** DATAGO-134580: proactive {@code isClosed()} pre-check rebuilds before the first send. */ @Test void test_producerRecreatedProactivelyWhenIsClosedDetectedBeforeSend(@Mock XMLMessageProducer producerB) throws Exception { Mockito.when(session.createProducer( @@ -874,8 +803,6 @@ void test_producerRecreatedProactivelyWhenIsClosedDetectedBeforeSend(@Mock XMLMe .thenReturn(messageProducer) .thenReturn(producerB); - // Simulate the broker having torn down the flow asynchronously: producer reports - // itself as closed, but no send has yet hit the catch block to arm the flag. Mockito.when(messageProducer.isClosed()).thenReturn(true); messageHandler.start(); @@ -883,42 +810,21 @@ void test_producerRecreatedProactivelyWhenIsClosedDetectedBeforeSend(@Mock XMLMe Message firstMessage = MessageBuilder.withPayload("payload-1").build(); messageHandler.handleMessage(firstMessage); - // The first message must have triggered recreation (producer was visibly closed - // before send) and then succeeded against the fresh producer. Mockito.verify(session, Mockito.times(2)) .createProducer(any(ProducerFlowProperties.class), any(JCSMPStreamingPublishCorrelatingEventHandler.class)); Mockito.verify(producerB, Mockito.times(1)).send(any(XMLMessage.class), any(Destination.class)); - // The original (closed) producer must never have been asked to send. Mockito.verify(messageProducer, Mockito.never()).send(any(XMLMessage.class), any(Destination.class)); Mockito.verify(messageProducer, Mockito.atLeastOnce()).close(); } - /** - * DATAGO-134580: when {@code session.createProducer(...)} itself fails during recreation - * (e.g. the broker is still mid-restart from the message-spool shutdown), the in-flight - * message must be routed through the error channel via {@code handleMessagingException}, - * the {@code recreateProducer} flag must stay armed, and the next inbound message - * must retry recreation. This guards against a silent infinite-retry-loop or - * lost-error class of regression. - * - *

    Sequence: - *

      - *
    1. {@code start()} returns the original {@code messageProducer} (which will throw - * Stale on send).
    2. - *
    3. First {@code handleMessage} -> Stale -> flag armed, MessagingException surfaced.
    4. - *
    5. Second {@code handleMessage} -> recreation attempted -> {@code createProducer} - * throws -> MessagingException surfaced, flag stays armed.
    6. - *
    7. Third {@code handleMessage} -> recreation retried, this time succeeds via - * {@code producerB} -> message publishes cleanly, flag cleared.
    8. - *
    - */ + /** DATAGO-134580: failed recreation surfaces an error and retries on the next message. */ @Test void test_producerRecreationFailurePropagatesAndRetriesNext(@Mock XMLMessageProducer producerB) throws Exception { Mockito.when(session.createProducer( producerFlowPropertiesCaptor.capture(), pubEventHandlerCaptor.capture())) - .thenReturn(messageProducer) // start() - .thenThrow(new JCSMPException("Broker still reconnecting")) // first recreation attempt - .thenReturn(producerB); // second recreation attempt + .thenReturn(messageProducer) + .thenThrow(new JCSMPException("Broker still reconnecting")) + .thenReturn(producerB); StaleSessionException stale = new StaleSessionException( "Tried to perform operation on a closed XML message producer", @@ -927,50 +833,34 @@ void test_producerRecreationFailurePropagatesAndRetriesNext(@Mock XMLMessageProd messageHandler.start(); - // 1st publish — original producer is stale; failure surfaces, flag is armed. Message firstMessage = MessageBuilder.withPayload("payload-1").build(); assertThatThrownBy(() -> messageHandler.handleMessage(firstMessage)) - .as("first publish must surface StaleSessionException to the caller") .isInstanceOf(MessagingException.class) .hasCauseInstanceOf(StaleSessionException.class); - // 2nd publish — recreation attempt itself fails (broker still recovering). The in-flight - // message must be routed through the error channel; the flag must remain armed. Message secondMessage = MessageBuilder.withPayload("payload-2").build(); assertThatThrownBy(() -> messageHandler.handleMessage(secondMessage)) - .as("recreation failure must propagate as MessagingException for this in-flight message") .isInstanceOf(MessagingException.class) .hasMessageContaining("Failed to recreate JCSMP producer") .hasCauseInstanceOf(JCSMPException.class); - // 3rd publish — broker has recovered; recreation succeeds; producerB services the send. Message thirdMessage = MessageBuilder.withPayload("payload-3").build(); messageHandler.handleMessage(thirdMessage); - // Three createProducer attempts (initial start + two recreation attempts). Mockito.verify(session, Mockito.times(3)) .createProducer(any(ProducerFlowProperties.class), any(JCSMPStreamingPublishCorrelatingEventHandler.class)); - // Only producerB serviced a successful send. Mockito.verify(producerB, Mockito.times(1)).send(any(XMLMessage.class), any(Destination.class)); - // Handler stayed alive through both failures. assertThat(messageHandler.isRunning()).isTrue(); } - /** - * DATAGO-134580: {@code start()} and {@code closeResources()} both reset the - * {@code recreateProducer} flag, so a stop/start lifecycle starts from a clean - * state regardless of what happened in the prior lifecycle. Without that reset, a stop - * after a stale-flow event followed by a fresh start would gratuitously recreate the - * brand-new producer on the very first message. - */ + /** DATAGO-134580: stop/start clears the recreate flag so the next send doesn't rebuild. */ @Test void test_recreationFlagResetAcrossStopStartCycle(@Mock XMLMessageProducer producerB) throws Exception { Mockito.when(session.createProducer( producerFlowPropertiesCaptor.capture(), pubEventHandlerCaptor.capture())) - .thenReturn(messageProducer) // first start() - .thenReturn(producerB); // second start() after stop() + .thenReturn(messageProducer) + .thenReturn(producerB); - // First lifecycle: producer fails on send -> flag is armed. StaleSessionException stale = new StaleSessionException( "Tried to perform operation on a closed XML message producer", new JCSMPException("Received unsolicited CloseFlow for producer (503:Service Unavailable).")); @@ -982,16 +872,11 @@ void test_recreationFlagResetAcrossStopStartCycle(@Mock XMLMessageProducer produ .isInstanceOf(MessagingException.class) .hasCauseInstanceOf(StaleSessionException.class); - // Now restart: closeResources() + start() must both clear the recreation flag. messageHandler.stop(); messageHandler.start(); - // Clear the createProducer invocation history so we can prove the NEXT handleMessage - // does not trigger a recreation (the flag must be cleared by the stop/start cycle). Mockito.clearInvocations(session); - // Subsequent publish lands on producerB (the freshly-started producer); the - // recreation code path must NOT fire - no additional createProducer calls. Message secondMessage = MessageBuilder.withPayload("payload-2").build(); messageHandler.handleMessage(secondMessage); diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/it/util/semp/config/BrokerConfiguratorBuilder.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/it/util/semp/config/BrokerConfiguratorBuilder.java index eb93bc738..1324815cb 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/it/util/semp/config/BrokerConfiguratorBuilder.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/it/util/semp/config/BrokerConfiguratorBuilder.java @@ -279,36 +279,14 @@ public void disableVpn(String msgVpnName) { updateVpn(vpn); } - /** - * Zeroes out the guaranteed-delivery message-spool quota for a VPN. - * - *

    Empirically (see {@code JCSMPProducerCloseFlowRecoveryIT} test 1) this does - * not tear down already-bound publisher / consumer flows on the VPN: the - * broker keeps the existing flows alive and instead NACKs new GD publishes - * asynchronously while the quota is at zero. This is the negative-control mechanism - * for the DATAGO-134580 reproducer - the actual unsolicited {@code CloseFlow} fan-out - * is driven by a broker-level {@code hardware message-spool shutdown} (CLI), not by - * a VPN-level quota change. - * - *

    Use {@link #restoreMsgSpoolForVpn(String, Long)} (with the previously-captured - * value) to put the spool back. - * - * @param msgVpnName name of the vpn whose spool quota to zero - */ + /** Zeroes the VPN's GD message-spool quota. */ public void disableMsgSpoolForVpn(String msgVpnName) { final ConfigMsgVpn vpn = queryVpn(msgVpnName); vpn.maxMsgSpoolUsage(0L); updateVpn(vpn); } - /** - * Restores the guaranteed-delivery message-spool quota for a VPN to the supplied value. - * Intended for use after {@link #disableMsgSpoolForVpn(String)} - pass the original quota - * captured from {@link #queryVpn(String)} before the disable. - * - * @param msgVpnName name of the vpn - * @param maxMsgSpoolUsageMb the spool quota in megabytes to set - */ + /** Restores the VPN's GD message-spool quota to the supplied value (MB). */ public void restoreMsgSpoolForVpn(String msgVpnName, Long maxMsgSpoolUsageMb) { final ConfigMsgVpn vpn = queryVpn(msgVpnName); vpn.maxMsgSpoolUsage(maxMsgSpoolUsageMb); diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/spring/cloud/stream/binder/JCSMPProducerCloseFlowRecoveryIT.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/spring/cloud/stream/binder/JCSMPProducerCloseFlowRecoveryIT.java index a35ffbc89..f3d1f9a84 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/spring/cloud/stream/binder/JCSMPProducerCloseFlowRecoveryIT.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/spring/cloud/stream/binder/JCSMPProducerCloseFlowRecoveryIT.java @@ -17,7 +17,6 @@ import com.solacesystems.jcsmp.JCSMPProperties; import com.solacesystems.jcsmp.JCSMPSession; import com.solacesystems.jcsmp.Queue; -import com.solacesystems.jcsmp.StaleSessionException; import com.solacesystems.jcsmp.TextMessage; import com.solacesystems.jcsmp.Topic; import com.solacesystems.jcsmp.XMLMessageProducer; @@ -40,7 +39,6 @@ import org.springframework.integration.channel.DirectChannel; import org.springframework.integration.support.MessageBuilder; import org.springframework.messaging.MessageChannel; -import org.springframework.messaging.MessagingException; import org.springframework.test.context.junit.jupiter.SpringJUnitConfig; import org.testcontainers.DockerClientFactory; @@ -54,67 +52,10 @@ import static org.assertj.core.api.Assertions.assertThatCode; /** - * Broker integration tests characterising producer behaviour around the - * DATAGO-134580 bug: when the - * broker tears down a publisher flow (DR failover, message-spool maintenance), JCSMP marks the - * per-binding {@link XMLMessageProducer} as terminally closed and every subsequent - * {@code send(...)} throws {@link StaleSessionException} until the app is restarted. - * - *

    This class contains five integration tests. Three of them are control / negative - * cases that exercise broker-side disruptions which - empirically - do not - * reproduce the bug. They are kept as long-term coverage so that any future broker behaviour - * change (e.g. a future broker version starts ejecting already-bound publisher flows on a - * spool-quota change) shows up immediately as a test failure rather than a silent regression. - * The fourth test drives the customer-reported broker disruption and asserts that the binding - * transparently recovers via the {@code JCSMPOutboundMessageHandler} fix. The fifth test - * loops the same disruption across multiple consecutive stale-flow cycles, proving the - * recreate path stays effective across repeated events on the same binding. - * - *

      - *
    1. {@link #test_persistentTopicPublisher_survivesSpoolToggle persistentTopicPublisher_survivesSpoolToggle} - * - Control. Persistent (GD) publishing to a topic + VPN-level spool quota - * cycled through 0 via SEMPv2. Asserts the publisher continues to publish. The broker - * only NACKs new spool writes asynchronously when the quota is 0; it does not eject - * already-bound publisher flows.
    2. - * - *
    3. {@link #test_directTopicPublisher_survivesSpoolToggle directTopicPublisher_survivesSpoolToggle} - * - Control. DIRECT (non-persistent) publishing to a topic via raw JCSMP + - * the same spool toggle. Asserts the publisher continues to publish. Direct messages - * bypass the spool subsystem entirely so the toggle is a no-op for them. The binder - * cannot be used for this case because {@code XMLMessageMapper.java:205} unconditionally - * promotes outbound messages to {@link DeliveryMode#PERSISTENT}.
    4. - * - *
    5. {@link #test_persistentQueuePublisher_survivesQueueIngressEgressToggle persistentQueuePublisher_survivesQueueIngressEgressToggle} - * - Control. Persistent publishing to a Queue destination + the queue's ingress - * and egress disabled and re-enabled via SEMPv2. Asserts the publisher continues to - * publish. Toggling ingress/egress causes the broker to NACK incoming publishes - * asynchronously while disabled but does not fan out unsolicited - * {@code CloseFlow}, so the bound publisher flow stays alive across the cycle.
    6. - * - *
    7. {@link #test_persistentQueuePublisher_recoversAfterMessageSpoolCliShutdown persistentQueuePublisher_recoversAfterMessageSpoolCliShutdown} - * - Recovery test. Persistent publishing to a Queue destination + a broker-level - * {@code hardware message-spool shutdown} / {@code no shutdown} cycle driven through - * the broker's CLI via {@code docker exec}. This is the only mechanism that actually - * fans out unsolicited {@code CloseFlow} (503 Service Unavailable) to every bound - * publisher flow without dropping the JCSMP session - matching the customer's - * traceback exactly. On master without the fix the post-shutdown publish surfaces - * {@link StaleSessionException} and the binding stays stuck. With the fix the handler's - * proactive {@code producer.isClosed()} pre-check catches the dead producer at the top - * of {@code handleMessage(...)} and rebuilds it before send, so the publish recovers - * transparently on its first attempt.
    8. - * - *
    9. {@link #test_persistentQueuePublisher_recoversAcrossRepeatedMessageSpoolCliShutdowns persistentQueuePublisher_recoversAcrossRepeatedMessageSpoolCliShutdowns} - * - Repeated recovery test. Same disruption as (4) but looped across multiple - * consecutive shutdown/restore cycles on the same binding. Each cycle the broker tears - * down the current producer (the original in cycle 1, the recreated one in cycle 2, - * and so on) and the proactive check must catch each closure and rebuild. Guards - * against a regression where the recreate path becomes single-use or where state - * accumulates across cycles.
    10. - *
    - * - *

    All five tests run in {@link ExecutionMode#SAME_THREAD} because each touches shared - * broker state (VPN-level spool quota, queue ingress/egress, broker-level message-spool) - * that would briefly disrupt any concurrently-running tests on the same broker. + * DATAGO-134580 broker ITs: three control cases (spool toggle, direct publisher, queue + * ingress/egress toggle) plus two recovery cases (single + repeated broker-level + * {@code hardware message-spool shutdown}) proving the binding recovers from unsolicited + * {@code CloseFlow}. Runs SAME_THREAD because each test mutates shared broker state. */ @SpringJUnitConfig(classes = SolaceJavaAutoConfiguration.class, initializers = ConfigDataApplicationContextInitializer.class) @@ -124,16 +65,7 @@ class JCSMPProducerCloseFlowRecoveryIT { private static final Logger logger = LoggerFactory.getLogger(JCSMPProducerCloseFlowRecoveryIT.class); - /** - * Control test (1/5) - persistent topic publisher + VPN spool toggle. - * - *

    Documents the empirical broker behaviour: cycling {@code maxMsgSpoolUsage} through - * 0 does not tear down already-bound publisher flows on a topic destination, - * so the binding's producer stays healthy and subsequent persistent topic publishes - * succeed. This is the negative control for the bug reproducer below - if this test - * ever starts seeing {@link StaleSessionException}, the broker behaviour around spool - * quota changes has shifted and the bug surface area is wider than expected. - */ + /** Control: persistent topic publisher survives a VPN spool quota toggle. */ @Test void test_persistentTopicPublisher_survivesSpoolToggle( JCSMPSession jcsmpSession, @@ -186,15 +118,7 @@ void test_persistentTopicPublisher_survivesSpoolToggle( } } - /** - * Control test (2/5) - DIRECT topic publisher + VPN spool toggle. - * - *

    Direct messages bypass the broker's spool subsystem entirely, so a spool quota - * toggle has no effect on a direct publisher. Uses the raw JCSMP session because - * the binder's {@code XMLMessageMapper} unconditionally promotes outbound messages - * to {@link DeliveryMode#PERSISTENT} and there is no producer-property switch for - * direct mode. - */ + /** Control: direct topic publisher (raw JCSMP) is unaffected by spool toggle. */ @Test void test_directTopicPublisher_survivesSpoolToggle( JCSMPSession jcsmpSession, @@ -245,20 +169,7 @@ void test_directTopicPublisher_survivesSpoolToggle( } } - /** - * Control test (3/5) - persistent queue publisher + queue ingress/egress toggle. - * - *

    Documents the empirical broker behaviour: disabling and re-enabling a queue's - * ingress and egress does not fan out unsolicited {@code CloseFlow} on - * publisher flows bound to that queue. While ingress is disabled the broker NACKs - * incoming publishes asynchronously; once it is re-enabled the publisher flow remains - * bound and publishes resume cleanly. Asserts the publisher continues to publish - * before, during the cycle (post-restoration), and after. - * - *

    If this test ever starts surfacing {@link StaleSessionException}, the broker - * behaviour around queue ingress/egress changes has shifted and the bug surface area - * is wider than today. - */ + /** Control: persistent queue publisher survives a queue ingress/egress toggle. */ @Test void test_persistentQueuePublisher_survivesQueueIngressEgressToggle( JCSMPSession jcsmpSession, @@ -320,38 +231,9 @@ void test_persistentQueuePublisher_survivesQueueIngressEgressToggle( } /** - * Recovery test (4/5) - persistent queue publisher + broker-level message-spool shutdown. - * - *

    This drives the precise customer-reported failure scenario for DATAGO-134580. The - * {@code hardware message-spool shutdown} / {@code no shutdown} CLI sequence is the - * only mechanism that fans out an unsolicited {@code CloseFlow} (503 Service - * Unavailable) to every currently-bound publisher flow on the broker without - * dropping the JCSMP session. SEMPv2-only approaches (zeroing the spool quota, - * toggling queue ingress/egress) either NACK publishes asynchronously without - * invalidating the flow, or also drop the session as a side-effect - neither matches - * the customer's actual failure mode. - * - *

    The CLI is reached by finding the running PubSub+ test container via the docker - * client that testcontainers already exposes, then {@code docker exec}ing the - * {@code /usr/sw/loads/currentload/bin/cli -A} script with the spool commands piped on - * stdin. We cannot avoid this layer of indirection because {@code PubSubPlusExtension} - * does not expose the container as a parameter, and SEMPv2 has no equivalent action. - * - *

    Assertions: - *

      - *
    1. Proactive recovery - the first post-shutdown publish must succeed. By - * the time we reach this assertion JCSMP has fanned the unsolicited CloseFlow - * into the producer's actor thread (visible as {@code JCSMPTransportException: - * Received unsolicited CloseFlow ... 503} in test logs) so the binding's - * producer reference is closed. The handler's proactive - * {@code producer.isClosed()} pre-check at the top of {@code handleMessage(...)} - * detects this and rebuilds the producer before {@code send(...)} is ever - * attempted. On master without the fix this assertion fails with a - * {@link StaleSessionException}-rooted {@link MessagingException}.
    2. - *
    3. Steady state - a subsequent publish on the recreated producer also - * succeeds. Guards against a regression that leaves the producer in a - * half-built state.
    4. - *
    + * Recovery: broker-level {@code hardware message-spool shutdown} fans unsolicited + * CloseFlow to the bound producer; the handler's proactive {@code isClosed()} pre-check + * must rebuild and the first post-shutdown publish must succeed. */ @Test void test_persistentQueuePublisher_recoversAfterMessageSpoolCliShutdown( @@ -376,19 +258,7 @@ void test_persistentQueuePublisher_recoversAfterMessageSpoolCliShutdown( try { moduleOutputChannel.send(MessageBuilder.withPayload("before-shutdown").build()); - // === Phase 1: broker-side disruption via CLI === - // Tear down the broker's spool subsystem so it fans out unsolicited CloseFlow - // to every bound publisher / consumer flow. The JCSMP session stays connected; - // only the GD flows die. - // - // NOTE on command shape: the Solace CLI requires stepping through the config - // sub-modes one line at a time (`hardware`, then `message-spool`) - the dotted - // form `hardware message-spool` is not reliably accepted. The `shutdown` - // command itself is destructive and is guarded by a single confirmation prompt - // ("All message spooling will be stopped. Do you want to continue (y/n)?") - // that we answer with `y`. Without that, the exec hangs at the prompt and the - // shutdown never actually takes effect. `no shutdown` (the re-enable) is - // non-destructive and does not prompt. + // CLI sub-modes are entered one line at a time; the `shutdown` prompt requires `y`. logger.info("Shutting down broker message-spool via CLI in container '{}'", solaceContainerId); runSolaceCliCommands(solaceContainerId, "enable", @@ -396,12 +266,7 @@ void test_persistentQueuePublisher_recoversAfterMessageSpoolCliShutdown( "hardware", "message-spool", "shutdown", - "y"); // answer the "Do you want to continue (y/n)?" confirmation prompt - // Wait until the broker's SEMP API is responsive again before driving the - // re-enable CLI command. The shutdown is synchronous from the CLI's perspective, - // but the broker's HTTP management surface may briefly stutter as the spool - // subsystem transitions; awaitBrokerSempResponsive(...) probes that surface - // instead of trusting a fixed sleep window. + "y"); awaitBrokerSempResponsive(sempV2Api, vpnName); logger.info("Re-enabling broker message-spool via CLI in container '{}'", solaceContainerId); @@ -412,40 +277,17 @@ void test_persistentQueuePublisher_recoversAfterMessageSpoolCliShutdown( "message-spool", "no shutdown"); spoolRestored = true; - // Probe again after re-enable - confirms the broker is back to a queryable / - // stable state before we attempt the recovery publish. awaitBrokerSempResponsive(sempV2Api, vpnName); - // === Phase 2: recovery proof === - // The first post-shutdown publish MUST succeed. By this point JCSMP has fanned - // the unsolicited CloseFlow into the producer's actor thread (visible in test - // logs as "JCSMPTransportException: Received unsolicited CloseFlow ... 503"), - // so the binding's producer reference is now closed. The handler's proactive - // producer.isClosed() pre-check at the top of handleMessage() detects this and - // rebuilds the producer before send() is ever attempted - so the publish - // transparently recovers on its first attempt rather than the customer seeing - // a JCSMPException through the error channel. - // - // On master without the fix this assertion fails with a StaleSessionException - // rooted in the customer's exact traceback; with only the reactive (catch-block) - // half of the fix this would also fail because the catch arms the flag but the - // in-flight message itself is still surfaced to the error channel; the proactive - // pre-check is what makes this single assertion green. logger.info("Attempting first post-shutdown publish; expecting proactive recovery"); assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload("recovery-1").build())) .as("First publish after broker CLI message-spool shutdown must recover via proactive isClosed() pre-check") .doesNotThrowAnyException(); - // === Phase 3: steady-state proof === - // A subsequent publish on the now-recreated producer must also succeed - guards - // against a regression where the recreate path inadvertently leaves the producer - // in a half-built state. assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload("recovery-2").build())) .as("Steady-state publish after recovery must continue to work") .doesNotThrowAnyException(); } finally { - // Belt-and-suspenders: if assertion 1 timed out before we re-enabled the spool, - // turn it back on so subsequent tests / the broker container stay healthy. if (!spoolRestored) { try { runSolaceCliCommands(solaceContainerId, @@ -471,24 +313,7 @@ void test_persistentQueuePublisher_recoversAfterMessageSpoolCliShutdown( } } - /** - * Repeated recovery test (5/5) - persistent queue publisher + N consecutive - * broker-level message-spool shutdown / restore cycles on the same binding. - * - *

    Test 4 proves that the {@code JCSMPOutboundMessageHandler} fix recovers from a - * single unsolicited {@code CloseFlow}. This test proves the same fix continues to work - * across multiple consecutive stale-flow events on the same binding, without any - * intermediate {@code stop() / start()}. Each cycle the broker tears down the current - * producer (the original in cycle 1, the recreated one in cycle 2, and so on); the - * proactive {@code producer.isClosed()} pre-check must catch the closed producer at - * the top of every {@code handleMessage(...)} and rebuild it before send. A regression - * that prevented re-detection on the second-or-later cycle (e.g. an accumulated state - * bug, or the recreate flag becoming sticky) would surface here as the first cycle - * passing but later cycles failing. - * - *

    Three cycles is enough to expose state-accumulation bugs while keeping the test - * runtime bounded: each cycle costs ~1 broker spool restart (~5-10s). - */ + /** Recovery: same disruption looped 3x on the same binding to catch state-accumulation regressions. */ @Test void test_persistentQueuePublisher_recoversAcrossRepeatedMessageSpoolCliShutdowns( JCSMPSession jcsmpSession, @@ -515,10 +340,6 @@ void test_persistentQueuePublisher_recoversAcrossRepeatedMessageSpoolCliShutdown for (int cycle = 1; cycle <= cycles; cycle++) { logger.info("=== Cycle {}/{}: shutdown -> witness-failure -> recover ===", cycle, cycles); - - // Mark the spool as "down" before issuing the destructive CLI; cleared again - // once the matching `no shutdown` lands. If the witness assertion below times - // out mid-cycle the finally-block sees this flag and re-enables the spool. spoolRestored = false; runSolaceCliCommands(solaceContainerId, @@ -527,7 +348,7 @@ void test_persistentQueuePublisher_recoversAcrossRepeatedMessageSpoolCliShutdown "hardware", "message-spool", "shutdown", - "y"); // single "Do you want to continue (y/n)?" confirmation prompt + "y"); awaitBrokerSempResponsive(sempV2Api, vpnName); runSolaceCliCommands(solaceContainerId, @@ -539,30 +360,18 @@ void test_persistentQueuePublisher_recoversAcrossRepeatedMessageSpoolCliShutdown spoolRestored = true; awaitBrokerSempResponsive(sempV2Api, vpnName); - // Recovery for THIS cycle. The handler is holding a producer (the original - // one in cycle 1, or the one recreated by cycle N-1's recovery publish in - // later cycles); the broker just tore that producer down via CloseFlow. - // The proactive producer.isClosed() pre-check at the top of handleMessage() - // must detect the closed producer and rebuild it BEFORE attempting the send - - // so this publish succeeds on its first attempt. A regression in either the - // proactive detection or the flag-reset-on-recreate would surface here as a - // thrown exception from the first cycle that hits it. final int currentCycle = cycle; assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload( "recovery-c" + currentCycle).build())) .as("Cycle %d: first publish after broker CLI shutdown must recover via proactive isClosed() pre-check", currentCycle) .doesNotThrowAnyException(); - // Steady-state publish on the now-recreated producer. assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload( "steady-c" + currentCycle).build())) .as("Cycle %d: steady-state publish after recovery must continue to work", currentCycle) .doesNotThrowAnyException(); } - // After N cycles the binding's producer should still be servicing plain publishes - // without re-triggering the recreate path - i.e. the flag is cleanly cleared and - // the handler is in a steady state, not stuck in a perpetual recreate loop. assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload("final-healthy").build())) .as("After %d shutdown/recover cycles, the binding's producer must continue to publish normally", cycles) .doesNotThrowAnyException(); @@ -592,17 +401,9 @@ void test_persistentQueuePublisher_recoversAcrossRepeatedMessageSpoolCliShutdown } } - // === SEMP-driven broker-state poll helpers === - // Each helper Awaits the broker reaching a known state before the test proceeds. - // Polls SEMP every 100ms with a 10s ceiling. ignoreExceptions() lets the poll - // shrug off transient ApiExceptions during state transitions (e.g. while the - // broker is mid-restart from a CLI message-spool shutdown). + // SEMP-driven broker-state poll helpers (100ms poll, 10s ceiling, transient errors ignored). - /** - * Polls SEMPv2 until the VPN's {@code maxMsgSpoolUsage} matches the supplied value. - * Used after {@code BrokerConfigurator.disableMsgSpoolForVpn / restoreMsgSpoolForVpn} - * to confirm the broker has committed the change before the next test step runs. - */ + /** Polls until {@code maxMsgSpoolUsage} matches {@code expectedMb}. */ private static void awaitVpnMaxMsgSpoolUsage(SempV2Api sempV2Api, String vpnName, long expectedMb) { Awaitility.await(String.format("VPN '%s' maxMsgSpoolUsage == %d MB", vpnName, expectedMb)) .atMost(Duration.ofSeconds(10)) @@ -615,11 +416,7 @@ private static void awaitVpnMaxMsgSpoolUsage(SempV2Api sempV2Api, String vpnName .isEqualTo(expectedMb)); } - /** - * Polls SEMPv2 until a queue's ingress and egress flags match the supplied values. - * Used after {@code BrokerConfigurator.disableIngressOnQueue / disableEgressOnQueue} - * (and their re-enable counterparts) to confirm both broker-side flags have settled. - */ + /** Polls until the queue's ingress/egress flags match the supplied values. */ private static void awaitQueueIngressEgress(SempV2Api sempV2Api, String vpnName, String queueName, boolean expectedIngress, boolean expectedEgress) { Awaitility.await(String.format("queue '%s' ingress=%s, egress=%s", @@ -636,13 +433,7 @@ private static void awaitQueueIngressEgress(SempV2Api sempV2Api, String vpnName, }); } - /** - * Polls SEMPv2 until the broker is responsive on the supplied VPN. Used as a proxy - * "broker is stable" check around the CLI message-spool shutdown / re-enable cycle - * where there is no direct SEMP-level signal for spool-subsystem state - if the - * monitor API answers a {@code MsgVpn} lookup successfully, the broker is at least - * back to a queryable state. - */ + /** Polls until the broker SEMP API answers a {@code MsgVpn} lookup again. */ private static void awaitBrokerSempResponsive(SempV2Api sempV2Api, String vpnName) { Awaitility.await("broker SEMP API responsive for VPN '" + vpnName + "'") .atMost(Duration.ofSeconds(15)) @@ -654,14 +445,9 @@ private static void awaitBrokerSempResponsive(SempV2Api sempV2Api, String vpnNam .isNotNull()); } - // === Docker / CLI helpers (only used by the message-spool CLI shutdown test) === + // Docker / CLI helpers (only used by the message-spool CLI shutdown tests). - /** - * Finds the running PubSub+ test container created by {@link PubSubPlusExtension}. Uses - * the docker-java client that testcontainers exposes (so it works on the same daemon - * testcontainers is using) and filters by image name. The standard Solace PubSub+ - * container image identifier always contains {@code solace-pubsub-standard}. - */ + /** Finds the running {@code solace-pubsub-standard} container via the testcontainers docker client. */ private static String findSolaceContainerId() { DockerClient docker = DockerClientFactory.instance().client(); List containers = docker.listContainersCmd().exec(); @@ -675,53 +461,21 @@ private static String findSolaceContainerId() { } /** - * Drives the broker container's Solace CLI with the supplied commands. - * - *

    Why this needs a TTY: the Solace CLI detects whether stdin is a real - * terminal and only honours confirmation prompts (the "Do you want to continue - * (y/n)?" guard around destructive commands like {@code shutdown}) when it is. When - * stdin is a plain pipe (e.g. {@code printf '...' | cli -A}), the CLI silently - * cancels the destructive command and continues - which is why earlier attempts at - * piping {@code y} answers through a shell pipeline never actually shut the spool - * down. Allocating a pseudo-TTY on the {@code docker exec} (the docker-java - * equivalent of {@code docker exec -it}) makes the CLI process the prompts the - * same way it does in a real terminal session. - * - *

    The commands are fed into the TTY's stdin via docker-java's - * {@link com.github.dockerjava.api.command.ExecStartCmd#withStdIn(java.io.InputStream)}. - * Trailing {@code end} + two {@code exit}s are appended so the CLI walks the mode stack - * back down and actually terminates the {@code cli -A} process: - *

    -	 *   end    : leaves config mode      ((configure/...)# -> #)
    -	 *   exit   : leaves privileged exec  (#                  -> >)
    -	 *   exit   : closes user-exec session(>                 -> EOF, cli -A exits)
    -	 * 
    - * Without the second {@code exit} the CLI sits at the {@code >} prompt waiting for - * more input and {@code awaitCompletion(...)} below would never observe the process - * terminating, which would (a) silently hang for the full timeout and (b) trip the - * defensive throw below on every call - turning a one-shot CLI invocation into a - * 30s hang per call and (post-defensive-check) an outright test failure. - * - *

    The CLI binary path {@code /usr/sw/loads/currentload/bin/cli} is the standard - * location inside the {@code solace-pubsub-standard} image. + * Runs the Solace CLI inside the broker container. Requires a pseudo-TTY so confirmation + * prompts (e.g. destructive {@code shutdown}) are honoured; trailing {@code end} + two + * {@code exit}s ensure {@code cli -A} terminates instead of hanging at the prompt. */ private static void runSolaceCliCommands(String containerId, String... cliCommands) throws Exception { DockerClient docker = DockerClientFactory.instance().client(); StringBuilder script = new StringBuilder(); for (String cmd : cliCommands) { - // Use CRLF so the input is accepted regardless of whether the pty is in - // cooked or raw line discipline. Cooked mode treats \n as Enter on its - // own, but appending \r is harmless and avoids ambiguity. script.append(cmd).append("\r\n"); } - // Terminators so the cli -A process actually exits (see method-level Javadoc for - // why two `exit`s are needed). Without the second exit, the CLI hangs at the - // top-level user-exec '>' prompt and awaitCompletion(...) below trips the throw. script.append("end\r\n").append("exit\r\n").append("exit\r\n"); ExecCreateCmdResponse exec = docker.execCreateCmd(containerId) - .withTty(true) // pseudo-TTY so the CLI honors confirmation prompts + .withTty(true) .withAttachStdin(true) .withAttachStdout(true) .withAttachStderr(true) @@ -731,10 +485,6 @@ private static void runSolaceCliCommands(String containerId, String... cliComman ByteArrayInputStream stdin = new ByteArrayInputStream( script.toString().getBytes(StandardCharsets.UTF_8)); - // Capture the CLI's stdout/stderr so a timeout produces a diagnostic message rather - // than a silent hang followed by a misleading assertion failure downstream. - // docker-java's TTY mode merges stdout+stderr onto a single Frame stream, which is - // what `cli -A` produces anyway. final StringBuilder capturedOutput = new StringBuilder(); com.github.dockerjava.api.async.ResultCallback.Adapter callback = new com.github.dockerjava.api.async.ResultCallback.Adapter<>() { From f10cb850315b7f63560de3f70195cfd47f97f14c Mon Sep 17 00:00:00 2001 From: sunil-solace Date: Wed, 20 May 2026 14:04:22 -0400 Subject: [PATCH 09/13] DATAGO-134580: Apply IT review feedback (PR #141) - Rename test methods to repo's testCamelCase convention. - Hoist BrokerConfigurator, vpnName, original spool quota, and cleanup of producer binding / queue deprovision / spool restore into @BeforeEach + @AfterEach so each test focuses on the scenario under test. - Extract createPersistentProducerChannel helper for the shared binder.bindProducer setup. - Drop the two SEMP-polling helpers (awaitVpnMaxMsgSpoolUsage, awaitQueueIngressEgress); SEMPv2 calls are synchronous so the toggles are committed by the time the call returns. Keep awaitBrokerSempResponsive for the CLI shutdown path where the broker management surface itself stutters. - Collapse BrokerConfiguratorBuilder's disable/restore spool pair into setMsgVpnSpool, and add getMsgVpnSpool accessor. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../config/BrokerConfiguratorBuilder.java | 12 +- .../JCSMPProducerCloseFlowRecoveryIT.java | 486 +++++++----------- 2 files changed, 187 insertions(+), 311 deletions(-) diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/it/util/semp/config/BrokerConfiguratorBuilder.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/it/util/semp/config/BrokerConfiguratorBuilder.java index 1324815cb..561b41d94 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/it/util/semp/config/BrokerConfiguratorBuilder.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/it/util/semp/config/BrokerConfiguratorBuilder.java @@ -279,15 +279,13 @@ public void disableVpn(String msgVpnName) { updateVpn(vpn); } - /** Zeroes the VPN's GD message-spool quota. */ - public void disableMsgSpoolForVpn(String msgVpnName) { - final ConfigMsgVpn vpn = queryVpn(msgVpnName); - vpn.maxMsgSpoolUsage(0L); - updateVpn(vpn); + /** Returns the VPN's GD message-spool quota (MB). */ + public Long getMsgVpnSpool(String msgVpnName) { + return queryVpn(msgVpnName).getMaxMsgSpoolUsage(); } - /** Restores the VPN's GD message-spool quota to the supplied value (MB). */ - public void restoreMsgSpoolForVpn(String msgVpnName, Long maxMsgSpoolUsageMb) { + /** Sets the VPN's GD message-spool quota (MB). Pass 0 to disable. */ + public void setMsgVpnSpool(String msgVpnName, Long maxMsgSpoolUsageMb) { final ConfigMsgVpn vpn = queryVpn(msgVpnName); vpn.maxMsgSpoolUsage(maxMsgSpoolUsageMb); updateVpn(vpn); diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/spring/cloud/stream/binder/JCSMPProducerCloseFlowRecoveryIT.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/spring/cloud/stream/binder/JCSMPProducerCloseFlowRecoveryIT.java index f3d1f9a84..5ba02aff8 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/spring/cloud/stream/binder/JCSMPProducerCloseFlowRecoveryIT.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/spring/cloud/stream/binder/JCSMPProducerCloseFlowRecoveryIT.java @@ -11,7 +11,6 @@ import com.solace.spring.cloud.stream.binder.util.DestinationType; import com.solace.test.integration.junit.jupiter.extension.PubSubPlusExtension; import com.solace.test.integration.semp.v2.SempV2Api; -import com.solace.test.integration.semp.v2.config.model.ConfigMsgVpnQueue; import com.solacesystems.jcsmp.DeliveryMode; import com.solacesystems.jcsmp.JCSMPFactory; import com.solacesystems.jcsmp.JCSMPProperties; @@ -25,6 +24,8 @@ import com.github.dockerjava.api.model.Container; import org.apache.commons.lang3.RandomStringUtils; import org.awaitility.Awaitility; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.TestInfo; import org.junit.jupiter.api.extension.ExtendWith; @@ -65,169 +66,141 @@ class JCSMPProducerCloseFlowRecoveryIT { private static final Logger logger = LoggerFactory.getLogger(JCSMPProducerCloseFlowRecoveryIT.class); - /** Control: persistent topic publisher survives a VPN spool quota toggle. */ - @Test - void test_persistentTopicPublisher_survivesSpoolToggle( - JCSMPSession jcsmpSession, - SempV2Api sempV2Api, - SpringCloudStreamContext context, - TestInfo testInfo) throws Exception { - SolaceTestBinder binder = context.getBinder(); - - String destination = RandomStringUtils.randomAlphanumeric(10); - ExtendedProducerProperties producerProperties = context.createProducerProperties(testInfo); - BindingProperties producerBindingProperties = new BindingProperties(); - producerBindingProperties.setProducer(producerProperties); - DirectChannel moduleOutputChannel = context.createBindableChannel("output", producerBindingProperties); - - Binding producerBinding = binder.bindProducer(destination, moduleOutputChannel, producerProperties); - - String vpnName = (String) jcsmpSession.getProperty(JCSMPProperties.VPN_NAME); - BrokerConfigurator brokerConfig = BrokerConfiguratorBuilder.create(sempV2Api).build(); - Long originalMaxMsgSpoolUsageMb = brokerConfig.vpns().queryVpn(vpnName).getMaxMsgSpoolUsage(); + private JCSMPSession jcsmpSession; + private SempV2Api sempV2Api; + private SpringCloudStreamContext context; + private String vpnName; + private BrokerConfigurator brokerConfig; + private Long originalMaxMsgSpoolUsageMb; + + // Per-test artifacts cleaned up in @AfterEach. + private Binding producerBinding; + private XMLMessageProducer rawProducer; + private String provisionedQueueName; + private boolean spoolModified; + private boolean brokerSpoolNeedsRestore; + private String solaceContainerId; + + @BeforeEach + void setUp(JCSMPSession jcsmpSession, SempV2Api sempV2Api, SpringCloudStreamContext context) { + this.jcsmpSession = jcsmpSession; + this.sempV2Api = sempV2Api; + this.context = context; + this.vpnName = (String) jcsmpSession.getProperty(JCSMPProperties.VPN_NAME); + this.brokerConfig = BrokerConfiguratorBuilder.create(sempV2Api).build(); + this.originalMaxMsgSpoolUsageMb = brokerConfig.vpns().getMsgVpnSpool(vpnName); assertThat(originalMaxMsgSpoolUsageMb) .as("Captured maxMsgSpoolUsage should be a positive value") .isNotNull() .isPositive(); + } - boolean spoolRestored = false; - try { - moduleOutputChannel.send(MessageBuilder.withPayload("before-toggle").build()); - - logger.info("Zeroing maxMsgSpoolUsage for VPN '{}'", vpnName); - brokerConfig.vpns().disableMsgSpoolForVpn(vpnName); - awaitVpnMaxMsgSpoolUsage(sempV2Api, vpnName, 0L); - - logger.info("Restoring maxMsgSpoolUsage={} MB for VPN '{}'", originalMaxMsgSpoolUsageMb, vpnName); - brokerConfig.vpns().restoreMsgSpoolForVpn(vpnName, originalMaxMsgSpoolUsageMb); - spoolRestored = true; - awaitVpnMaxMsgSpoolUsage(sempV2Api, vpnName, originalMaxMsgSpoolUsageMb); - - assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload("after-toggle").build())) - .as("Persistent topic publisher should be unaffected by a VPN spool quota toggle") - .doesNotThrowAnyException(); - } finally { - if (!spoolRestored) { - try { - brokerConfig.vpns().restoreMsgSpoolForVpn(vpnName, originalMaxMsgSpoolUsageMb); - } catch (Exception cleanupError) { - logger.warn("Failed to restore maxMsgSpoolUsage for VPN '{}' during cleanup", vpnName, cleanupError); - } + @AfterEach + void tearDown() { + if (spoolModified) { + try { + brokerConfig.vpns().setMsgVpnSpool(vpnName, originalMaxMsgSpoolUsageMb); + } catch (Exception e) { + logger.warn("Failed to restore maxMsgSpoolUsage for VPN '{}' during cleanup", vpnName, e); + } + } + if (brokerSpoolNeedsRestore) { + try { + runSolaceCliCommands(solaceContainerId, + "enable", + "configure", + "hardware", + "message-spool", + "no shutdown"); + } catch (Exception e) { + logger.warn("Failed to re-enable broker message-spool during cleanup", e); } + } + if (rawProducer != null) { + rawProducer.close(); + } + if (producerBinding != null) { producerBinding.unbind(); } + if (provisionedQueueName != null) { + Queue queue = JCSMPFactory.onlyInstance().createQueue(provisionedQueueName); + try { + jcsmpSession.deprovision(queue, JCSMPSession.FLAG_IGNORE_DOES_NOT_EXIST); + } catch (Exception e) { + logger.warn("Failed to deprovision queue '{}' during cleanup", provisionedQueueName, e); + } + } + } + + /** Control: persistent topic publisher survives a VPN spool quota toggle. */ + @Test + void testPersistentTopicPublisherSurvivesSpoolToggle(TestInfo testInfo) throws Exception { + DirectChannel moduleOutputChannel = createPersistentProducerChannel( + RandomStringUtils.randomAlphanumeric(10), DestinationType.TOPIC, testInfo); + + moduleOutputChannel.send(MessageBuilder.withPayload("before-toggle").build()); + + logger.info("Zeroing maxMsgSpoolUsage for VPN '{}'", vpnName); + spoolModified = true; + brokerConfig.vpns().setMsgVpnSpool(vpnName, 0L); + + logger.info("Restoring maxMsgSpoolUsage={} MB for VPN '{}'", originalMaxMsgSpoolUsageMb, vpnName); + brokerConfig.vpns().setMsgVpnSpool(vpnName, originalMaxMsgSpoolUsageMb); + spoolModified = false; + + assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload("after-toggle").build())) + .as("Persistent topic publisher should be unaffected by a VPN spool quota toggle") + .doesNotThrowAnyException(); } /** Control: direct topic publisher (raw JCSMP) is unaffected by spool toggle. */ @Test - void test_directTopicPublisher_survivesSpoolToggle( - JCSMPSession jcsmpSession, - SempV2Api sempV2Api) throws Exception { + void testDirectTopicPublisherSurvivesSpoolToggle() throws Exception { String topicName = RandomStringUtils.randomAlphanumeric(10); Topic topic = JCSMPFactory.onlyInstance().createTopic(topicName); - XMLMessageProducer producer = jcsmpSession.getMessageProducer(new SimpleJCSMPEventHandler()); - - String vpnName = (String) jcsmpSession.getProperty(JCSMPProperties.VPN_NAME); - BrokerConfigurator brokerConfig = BrokerConfiguratorBuilder.create(sempV2Api).build(); - Long originalMaxMsgSpoolUsageMb = brokerConfig.vpns().queryVpn(vpnName).getMaxMsgSpoolUsage(); - assertThat(originalMaxMsgSpoolUsageMb) - .as("Captured maxMsgSpoolUsage should be a positive value") - .isNotNull() - .isPositive(); - - boolean spoolRestored = false; - try { - TextMessage warmup = JCSMPFactory.onlyInstance().createMessage(TextMessage.class); - warmup.setDeliveryMode(DeliveryMode.DIRECT); - warmup.setText("before-toggle"); - producer.send(warmup, topic); - - logger.info("Zeroing maxMsgSpoolUsage for VPN '{}'", vpnName); - brokerConfig.vpns().disableMsgSpoolForVpn(vpnName); - awaitVpnMaxMsgSpoolUsage(sempV2Api, vpnName, 0L); - - logger.info("Restoring maxMsgSpoolUsage={} MB for VPN '{}'", originalMaxMsgSpoolUsageMb, vpnName); - brokerConfig.vpns().restoreMsgSpoolForVpn(vpnName, originalMaxMsgSpoolUsageMb); - spoolRestored = true; - awaitVpnMaxMsgSpoolUsage(sempV2Api, vpnName, originalMaxMsgSpoolUsageMb); - - TextMessage after = JCSMPFactory.onlyInstance().createMessage(TextMessage.class); - after.setDeliveryMode(DeliveryMode.DIRECT); - after.setText("after-toggle"); - assertThatCode(() -> producer.send(after, topic)) - .as("Direct topic publisher should be unaffected by a VPN spool quota toggle (no spool involvement)") - .doesNotThrowAnyException(); - } finally { - if (!spoolRestored) { - try { - brokerConfig.vpns().restoreMsgSpoolForVpn(vpnName, originalMaxMsgSpoolUsageMb); - } catch (Exception cleanupError) { - logger.warn("Failed to restore maxMsgSpoolUsage for VPN '{}' during cleanup", vpnName, cleanupError); - } - } - producer.close(); - } + rawProducer = jcsmpSession.getMessageProducer(new SimpleJCSMPEventHandler()); + + TextMessage warmup = JCSMPFactory.onlyInstance().createMessage(TextMessage.class); + warmup.setDeliveryMode(DeliveryMode.DIRECT); + warmup.setText("before-toggle"); + rawProducer.send(warmup, topic); + + logger.info("Zeroing maxMsgSpoolUsage for VPN '{}'", vpnName); + spoolModified = true; + brokerConfig.vpns().setMsgVpnSpool(vpnName, 0L); + + logger.info("Restoring maxMsgSpoolUsage={} MB for VPN '{}'", originalMaxMsgSpoolUsageMb, vpnName); + brokerConfig.vpns().setMsgVpnSpool(vpnName, originalMaxMsgSpoolUsageMb); + spoolModified = false; + + TextMessage after = JCSMPFactory.onlyInstance().createMessage(TextMessage.class); + after.setDeliveryMode(DeliveryMode.DIRECT); + after.setText("after-toggle"); + assertThatCode(() -> rawProducer.send(after, topic)) + .as("Direct topic publisher should be unaffected by a VPN spool quota toggle (no spool involvement)") + .doesNotThrowAnyException(); } /** Control: persistent queue publisher survives a queue ingress/egress toggle. */ @Test - void test_persistentQueuePublisher_survivesQueueIngressEgressToggle( - JCSMPSession jcsmpSession, - SempV2Api sempV2Api, - SpringCloudStreamContext context, - TestInfo testInfo) throws Exception { - SolaceTestBinder binder = context.getBinder(); - + void testPersistentQueuePublisherSurvivesQueueIngressEgressToggle(TestInfo testInfo) throws Exception { String queueName = RandomStringUtils.randomAlphanumeric(20); - ExtendedProducerProperties producerProperties = context.createProducerProperties(testInfo); - producerProperties.getExtension().setDestinationType(DestinationType.QUEUE); - BindingProperties producerBindingProperties = new BindingProperties(); - producerBindingProperties.setProducer(producerProperties); - DirectChannel moduleOutputChannel = context.createBindableChannel("output", producerBindingProperties); + provisionedQueueName = queueName; + DirectChannel moduleOutputChannel = createPersistentProducerChannel(queueName, DestinationType.QUEUE, testInfo); - Binding producerBinding = binder.bindProducer(queueName, moduleOutputChannel, producerProperties); + moduleOutputChannel.send(MessageBuilder.withPayload("before-toggle").build()); - String vpnName = (String) jcsmpSession.getProperty(JCSMPProperties.VPN_NAME); - BrokerConfigurator brokerConfig = BrokerConfiguratorBuilder.create(sempV2Api).build(); + logger.info("Disabling ingress and egress on queue '{}' in VPN '{}'", queueName, vpnName); + brokerConfig.queues().disableIngressOnQueue(vpnName, queueName); + brokerConfig.queues().disableEgressOnQueue(vpnName, queueName); - boolean queueRestored = false; - try { - moduleOutputChannel.send(MessageBuilder.withPayload("before-toggle").build()); + logger.info("Re-enabling ingress and egress on queue '{}' in VPN '{}'", queueName, vpnName); + brokerConfig.queues().reenableIngressOnQueue(vpnName, queueName); + brokerConfig.queues().reenableEgressOnQueue(vpnName, queueName); - logger.info("Disabling ingress and egress on queue '{}' in VPN '{}'", queueName, vpnName); - brokerConfig.queues().disableIngressOnQueue(vpnName, queueName); - brokerConfig.queues().disableEgressOnQueue(vpnName, queueName); - awaitQueueIngressEgress(sempV2Api, vpnName, queueName, false, false); - - logger.info("Re-enabling ingress and egress on queue '{}' in VPN '{}'", queueName, vpnName); - brokerConfig.queues().reenableIngressOnQueue(vpnName, queueName); - brokerConfig.queues().reenableEgressOnQueue(vpnName, queueName); - queueRestored = true; - awaitQueueIngressEgress(sempV2Api, vpnName, queueName, true, true); - - assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload("after-toggle").build())) - .as("Persistent queue publisher should be unaffected by a queue ingress/egress toggle") - .doesNotThrowAnyException(); - } finally { - if (!queueRestored) { - try { - brokerConfig.queues().reenableIngressOnQueue(vpnName, queueName); - brokerConfig.queues().reenableEgressOnQueue(vpnName, queueName); - } catch (Exception cleanupError) { - logger.warn("Failed to re-enable queue '{}' during cleanup", queueName, cleanupError); - } - } - try { - producerBinding.unbind(); - } finally { - Queue queue = JCSMPFactory.onlyInstance().createQueue(queueName); - try { - jcsmpSession.deprovision(queue, JCSMPSession.FLAG_IGNORE_DOES_NOT_EXIST); - } catch (Exception deprovisionError) { - logger.warn("Failed to deprovision queue '{}' during cleanup", queueName, deprovisionError); - } - } - } + assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload("after-toggle").build())) + .as("Persistent queue publisher should be unaffected by a queue ingress/egress toggle") + .doesNotThrowAnyException(); } /** @@ -236,30 +209,61 @@ void test_persistentQueuePublisher_survivesQueueIngressEgressToggle( * must rebuild and the first post-shutdown publish must succeed. */ @Test - void test_persistentQueuePublisher_recoversAfterMessageSpoolCliShutdown( - JCSMPSession jcsmpSession, - SempV2Api sempV2Api, - SpringCloudStreamContext context, - TestInfo testInfo) throws Exception { - SolaceTestBinder binder = context.getBinder(); + void testPersistentQueuePublisherRecoversAfterMessageSpoolCliShutdown(TestInfo testInfo) throws Exception { + String queueName = RandomStringUtils.randomAlphanumeric(20); + provisionedQueueName = queueName; + DirectChannel moduleOutputChannel = createPersistentProducerChannel(queueName, DestinationType.QUEUE, testInfo); + solaceContainerId = findSolaceContainerId(); + + moduleOutputChannel.send(MessageBuilder.withPayload("before-shutdown").build()); + + // CLI sub-modes are entered one line at a time; the `shutdown` prompt requires `y`. + logger.info("Shutting down broker message-spool via CLI in container '{}'", solaceContainerId); + brokerSpoolNeedsRestore = true; + runSolaceCliCommands(solaceContainerId, + "enable", + "configure", + "hardware", + "message-spool", + "shutdown", + "y"); + awaitBrokerSempResponsive(sempV2Api, vpnName); + + logger.info("Re-enabling broker message-spool via CLI in container '{}'", solaceContainerId); + runSolaceCliCommands(solaceContainerId, + "enable", + "configure", + "hardware", + "message-spool", + "no shutdown"); + brokerSpoolNeedsRestore = false; + awaitBrokerSempResponsive(sempV2Api, vpnName); + + logger.info("Attempting first post-shutdown publish; expecting proactive recovery"); + assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload("recovery-1").build())) + .as("First publish after broker CLI message-spool shutdown must recover via proactive isClosed() pre-check") + .doesNotThrowAnyException(); + + assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload("recovery-2").build())) + .as("Steady-state publish after recovery must continue to work") + .doesNotThrowAnyException(); + } + /** Recovery: same disruption looped 3x on the same binding to catch state-accumulation regressions. */ + @Test + void testPersistentQueuePublisherRecoversAcrossRepeatedMessageSpoolCliShutdowns(TestInfo testInfo) throws Exception { String queueName = RandomStringUtils.randomAlphanumeric(20); - ExtendedProducerProperties producerProperties = context.createProducerProperties(testInfo); - producerProperties.getExtension().setDestinationType(DestinationType.QUEUE); - BindingProperties producerBindingProperties = new BindingProperties(); - producerBindingProperties.setProducer(producerProperties); - DirectChannel moduleOutputChannel = context.createBindableChannel("output", producerBindingProperties); + provisionedQueueName = queueName; + DirectChannel moduleOutputChannel = createPersistentProducerChannel(queueName, DestinationType.QUEUE, testInfo); + solaceContainerId = findSolaceContainerId(); + final int cycles = 3; - Binding producerBinding = binder.bindProducer(queueName, moduleOutputChannel, producerProperties); + moduleOutputChannel.send(MessageBuilder.withPayload("initial-healthy").build()); - String vpnName = (String) jcsmpSession.getProperty(JCSMPProperties.VPN_NAME); - String solaceContainerId = findSolaceContainerId(); - boolean spoolRestored = false; - try { - moduleOutputChannel.send(MessageBuilder.withPayload("before-shutdown").build()); + for (int cycle = 1; cycle <= cycles; cycle++) { + logger.info("=== Cycle {}/{}: shutdown -> witness-failure -> recover ===", cycle, cycles); + brokerSpoolNeedsRestore = true; - // CLI sub-modes are entered one line at a time; the `shutdown` prompt requires `y`. - logger.info("Shutting down broker message-spool via CLI in container '{}'", solaceContainerId); runSolaceCliCommands(solaceContainerId, "enable", "configure", @@ -269,168 +273,42 @@ void test_persistentQueuePublisher_recoversAfterMessageSpoolCliShutdown( "y"); awaitBrokerSempResponsive(sempV2Api, vpnName); - logger.info("Re-enabling broker message-spool via CLI in container '{}'", solaceContainerId); runSolaceCliCommands(solaceContainerId, "enable", "configure", "hardware", "message-spool", "no shutdown"); - spoolRestored = true; + brokerSpoolNeedsRestore = false; awaitBrokerSempResponsive(sempV2Api, vpnName); - logger.info("Attempting first post-shutdown publish; expecting proactive recovery"); - assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload("recovery-1").build())) - .as("First publish after broker CLI message-spool shutdown must recover via proactive isClosed() pre-check") + final int currentCycle = cycle; + assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload( + "recovery-c" + currentCycle).build())) + .as("Cycle %d: first publish after broker CLI shutdown must recover via proactive isClosed() pre-check", currentCycle) .doesNotThrowAnyException(); - assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload("recovery-2").build())) - .as("Steady-state publish after recovery must continue to work") + assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload( + "steady-c" + currentCycle).build())) + .as("Cycle %d: steady-state publish after recovery must continue to work", currentCycle) .doesNotThrowAnyException(); - } finally { - if (!spoolRestored) { - try { - runSolaceCliCommands(solaceContainerId, - "enable", - "configure", - "hardware", - "message-spool", - "no shutdown"); - } catch (Exception cleanupError) { - logger.warn("Failed to re-enable broker message-spool during cleanup", cleanupError); - } - } - try { - producerBinding.unbind(); - } finally { - Queue queue = JCSMPFactory.onlyInstance().createQueue(queueName); - try { - jcsmpSession.deprovision(queue, JCSMPSession.FLAG_IGNORE_DOES_NOT_EXIST); - } catch (Exception deprovisionError) { - logger.warn("Failed to deprovision queue '{}' during cleanup", queueName, deprovisionError); - } - } } + + assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload("final-healthy").build())) + .as("After %d shutdown/recover cycles, the binding's producer must continue to publish normally", cycles) + .doesNotThrowAnyException(); } - /** Recovery: same disruption looped 3x on the same binding to catch state-accumulation regressions. */ - @Test - void test_persistentQueuePublisher_recoversAcrossRepeatedMessageSpoolCliShutdowns( - JCSMPSession jcsmpSession, - SempV2Api sempV2Api, - SpringCloudStreamContext context, - TestInfo testInfo) throws Exception { + private DirectChannel createPersistentProducerChannel(String destination, DestinationType destinationType, + TestInfo testInfo) throws Exception { SolaceTestBinder binder = context.getBinder(); - - String queueName = RandomStringUtils.randomAlphanumeric(20); ExtendedProducerProperties producerProperties = context.createProducerProperties(testInfo); - producerProperties.getExtension().setDestinationType(DestinationType.QUEUE); + producerProperties.getExtension().setDestinationType(destinationType); BindingProperties producerBindingProperties = new BindingProperties(); producerBindingProperties.setProducer(producerProperties); DirectChannel moduleOutputChannel = context.createBindableChannel("output", producerBindingProperties); - - Binding producerBinding = binder.bindProducer(queueName, moduleOutputChannel, producerProperties); - - String vpnName = (String) jcsmpSession.getProperty(JCSMPProperties.VPN_NAME); - String solaceContainerId = findSolaceContainerId(); - final int cycles = 3; - boolean spoolRestored = true; - try { - moduleOutputChannel.send(MessageBuilder.withPayload("initial-healthy").build()); - - for (int cycle = 1; cycle <= cycles; cycle++) { - logger.info("=== Cycle {}/{}: shutdown -> witness-failure -> recover ===", cycle, cycles); - spoolRestored = false; - - runSolaceCliCommands(solaceContainerId, - "enable", - "configure", - "hardware", - "message-spool", - "shutdown", - "y"); - awaitBrokerSempResponsive(sempV2Api, vpnName); - - runSolaceCliCommands(solaceContainerId, - "enable", - "configure", - "hardware", - "message-spool", - "no shutdown"); - spoolRestored = true; - awaitBrokerSempResponsive(sempV2Api, vpnName); - - final int currentCycle = cycle; - assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload( - "recovery-c" + currentCycle).build())) - .as("Cycle %d: first publish after broker CLI shutdown must recover via proactive isClosed() pre-check", currentCycle) - .doesNotThrowAnyException(); - - assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload( - "steady-c" + currentCycle).build())) - .as("Cycle %d: steady-state publish after recovery must continue to work", currentCycle) - .doesNotThrowAnyException(); - } - - assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload("final-healthy").build())) - .as("After %d shutdown/recover cycles, the binding's producer must continue to publish normally", cycles) - .doesNotThrowAnyException(); - } finally { - if (!spoolRestored) { - try { - runSolaceCliCommands(solaceContainerId, - "enable", - "configure", - "hardware", - "message-spool", - "no shutdown"); - } catch (Exception cleanupError) { - logger.warn("Failed to re-enable broker message-spool during cleanup", cleanupError); - } - } - try { - producerBinding.unbind(); - } finally { - Queue queue = JCSMPFactory.onlyInstance().createQueue(queueName); - try { - jcsmpSession.deprovision(queue, JCSMPSession.FLAG_IGNORE_DOES_NOT_EXIST); - } catch (Exception deprovisionError) { - logger.warn("Failed to deprovision queue '{}' during cleanup", queueName, deprovisionError); - } - } - } - } - - // SEMP-driven broker-state poll helpers (100ms poll, 10s ceiling, transient errors ignored). - - /** Polls until {@code maxMsgSpoolUsage} matches {@code expectedMb}. */ - private static void awaitVpnMaxMsgSpoolUsage(SempV2Api sempV2Api, String vpnName, long expectedMb) { - Awaitility.await(String.format("VPN '%s' maxMsgSpoolUsage == %d MB", vpnName, expectedMb)) - .atMost(Duration.ofSeconds(10)) - .pollInterval(Duration.ofMillis(100)) - .ignoreExceptions() - .untilAsserted(() -> assertThat(sempV2Api.config() - .getMsgVpn(vpnName, null, null) - .getData() - .getMaxMsgSpoolUsage()) - .isEqualTo(expectedMb)); - } - - /** Polls until the queue's ingress/egress flags match the supplied values. */ - private static void awaitQueueIngressEgress(SempV2Api sempV2Api, String vpnName, String queueName, - boolean expectedIngress, boolean expectedEgress) { - Awaitility.await(String.format("queue '%s' ingress=%s, egress=%s", - queueName, expectedIngress, expectedEgress)) - .atMost(Duration.ofSeconds(10)) - .pollInterval(Duration.ofMillis(100)) - .ignoreExceptions() - .untilAsserted(() -> { - ConfigMsgVpnQueue queue = sempV2Api.config() - .getMsgVpnQueue(vpnName, queueName, null, null) - .getData(); - assertThat(queue.isIngressEnabled()).as("ingress").isEqualTo(expectedIngress); - assertThat(queue.isEgressEnabled()).as("egress").isEqualTo(expectedEgress); - }); + producerBinding = binder.bindProducer(destination, moduleOutputChannel, producerProperties); + return moduleOutputChannel; } /** Polls until the broker SEMP API answers a {@code MsgVpn} lookup again. */ @@ -507,4 +385,4 @@ public void onNext(com.github.dockerjava.api.model.Frame frame) { containerId, script, capturedOutput)); } } -} \ No newline at end of file +} From f27284186b780df7a1d78552cc820a7b039ba00f Mon Sep 17 00:00:00 2001 From: sunil-solace Date: Thu, 21 May 2026 10:58:42 -0400 Subject: [PATCH 10/13] DATAGO-134580: Rename new unit tests to testCamelCase (PR #141) Align the six DATAGO-134580 tests in JCSMPOutboundMessageHandlerTest with the repo's dominant testCamelCase convention. Pre-existing test_xxx_yyy methods in the same file are not part of this PR and are left alone. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../outbound/JCSMPOutboundMessageHandlerTest.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java index 413faec90..d72c8390f 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java @@ -689,7 +689,7 @@ void testGetBindingName() { /** DATAGO-134580: stale producer (CloseFlow) is recreated on the next send. */ @CartesianTest(name = "[{index}] transacted={0}") - public void test_producerRecreatedAfterUnsolicitedCloseFlow( + public void testProducerRecreatedAfterUnsolicitedCloseFlow( @Values(booleans = {false, true}) boolean transacted, @Mock XMLMessageProducer producerB, @Mock TransactedSession transactedSessionB) throws Exception { @@ -742,7 +742,7 @@ public void test_producerRecreatedAfterUnsolicitedCloseFlow( /** DATAGO-134580: {@link ClosedFacilityException} on send also triggers recreation. */ @Test - void test_producerRecreatedAfterClosedFacilityException(@Mock XMLMessageProducer producerB) throws Exception { + void testProducerRecreatedAfterClosedFacilityException(@Mock XMLMessageProducer producerB) throws Exception { Mockito.when(session.createProducer( producerFlowPropertiesCaptor.capture(), pubEventHandlerCaptor.capture())) .thenReturn(messageProducer) @@ -769,7 +769,7 @@ void test_producerRecreatedAfterClosedFacilityException(@Mock XMLMessageProducer /** DATAGO-134580: {@link JCSMPTransportException} on send also triggers recreation. */ @Test - void test_producerRecreatedAfterJCSMPTransportException(@Mock XMLMessageProducer producerB) throws Exception { + void testProducerRecreatedAfterJCSMPTransportException(@Mock XMLMessageProducer producerB) throws Exception { Mockito.when(session.createProducer( producerFlowPropertiesCaptor.capture(), pubEventHandlerCaptor.capture())) .thenReturn(messageProducer) @@ -797,7 +797,7 @@ void test_producerRecreatedAfterJCSMPTransportException(@Mock XMLMessageProducer /** DATAGO-134580: proactive {@code isClosed()} pre-check rebuilds before the first send. */ @Test - void test_producerRecreatedProactivelyWhenIsClosedDetectedBeforeSend(@Mock XMLMessageProducer producerB) throws Exception { + void testProducerRecreatedProactivelyWhenIsClosedDetectedBeforeSend(@Mock XMLMessageProducer producerB) throws Exception { Mockito.when(session.createProducer( producerFlowPropertiesCaptor.capture(), pubEventHandlerCaptor.capture())) .thenReturn(messageProducer) @@ -819,7 +819,7 @@ void test_producerRecreatedProactivelyWhenIsClosedDetectedBeforeSend(@Mock XMLMe /** DATAGO-134580: failed recreation surfaces an error and retries on the next message. */ @Test - void test_producerRecreationFailurePropagatesAndRetriesNext(@Mock XMLMessageProducer producerB) throws Exception { + void testProducerRecreationFailurePropagatesAndRetriesNext(@Mock XMLMessageProducer producerB) throws Exception { Mockito.when(session.createProducer( producerFlowPropertiesCaptor.capture(), pubEventHandlerCaptor.capture())) .thenReturn(messageProducer) @@ -855,7 +855,7 @@ void test_producerRecreationFailurePropagatesAndRetriesNext(@Mock XMLMessageProd /** DATAGO-134580: stop/start clears the recreate flag so the next send doesn't rebuild. */ @Test - void test_recreationFlagResetAcrossStopStartCycle(@Mock XMLMessageProducer producerB) throws Exception { + void testRecreationFlagResetAcrossStopStartCycle(@Mock XMLMessageProducer producerB) throws Exception { Mockito.when(session.createProducer( producerFlowPropertiesCaptor.capture(), pubEventHandlerCaptor.capture())) .thenReturn(messageProducer) From 3b7b2e4b03b7421ddec1a525313a69ba42007324 Mon Sep 17 00:00:00 2001 From: sunil-solace Date: Thu, 21 May 2026 18:55:47 -0400 Subject: [PATCH 11/13] DATAGO-134580: Apply PR review feedback and harden CLI-shutdown ITs Outbound handler / error-queue (PR #475 review): - Drop orphan section comment above ErrorQueueInfrastructure.send; the recreate logic in the catch arm is self-documenting. - Rename recreateLock -> lifecycleLock and take it inside start(), stop(), and recreateProducerIfNeeded() so start/stop no longer race the recreate path. - Collapse recreateProducerIfNeeded body to closeResources() + producerManager.get(id) + createProducerInternal(). Keep the recreateProducer flag re-armed on failure so the next-message retry contract still holds. - Demote the hot-path "Detected stale ..." / "Recreating ..." logs from warn to debug in both the outbound handler and the error queue path to avoid flooding user logs on the message path. Integration test robustness: - findSolaceContainerId now matches the broker container by the SMF host port the JCSMP session is connected to, parsed from JCSMPProperties.HOST. Filtering by image name alone was unsafe with Ryuk disabled or with leftover containers across runs, which silently sent CLI commands to the wrong broker and made the recovery test pass vacuously. - After 'no shutdown', wait on session.isCapable(PUB_GUARANTEED) via Awaitility before the recovery publish. SEMP being responsive is not a proxy for JCSMP's cached capability set, which the per-binding createProducer call checks; without this wait the publish races the capability refresh and intermittently fails with "Router does not support guaranteed publisher flows". Co-Authored-By: Claude Opus 4.7 (1M context) --- .../outbound/JCSMPOutboundMessageHandler.java | 85 +++++++++---------- .../binder/util/ErrorQueueInfrastructure.java | 4 +- .../JCSMPProducerCloseFlowRecoveryIT.java | 50 +++++++++-- 3 files changed, 85 insertions(+), 54 deletions(-) diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandler.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandler.java index 1ad471449..d16c1765f 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandler.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandler.java @@ -69,7 +69,7 @@ public final class JCSMPOutboundMessageHandler implements MessageHandler, Lifecy // DATAGO-134580: recreate JCSMP producer on unsolicited termination from Solace broker. private volatile boolean recreateProducer = false; - private final Object recreateLock = new Object(); + private final Object lifecycleLock = new Object(); private static final Logger LOGGER = LoggerFactory.getLogger(JCSMPOutboundMessageHandler.class); @@ -177,7 +177,7 @@ public void handleMessage(@NonNull Message message) throws MessagingException || e instanceof ClosedFacilityException || (producer != null && producer.isClosed())) { if (!recreateProducer) { - LOGGER.warn("Detected stale JCSMP producer for binding {} (cause: {}); will " + + LOGGER.debug("Detected stale JCSMP producer for binding {} (cause: {}); will " + "recreate on next message ", properties.getBindingName(), e.getClass().getSimpleName(), id); } @@ -248,35 +248,37 @@ private Destination getDynamicDestination(Map headers, ErrorChan @Override public void start() { LOGGER.info("Creating producer to {} {} ", configDestinationType, configDestination.getName(), id); - if (isRunning()) { - LOGGER.warn("Nothing to do, message handler {} is already running", id); - return; - } - recreateProducer = false; + synchronized (lifecycleLock) { + if (isRunning()) { + LOGGER.warn("Nothing to do, message handler {} is already running", id); + return; + } + recreateProducer = false; - try { - Map headerNameMapping = properties.getExtension().getHeaderNameMapping(); - if (headerNameMapping != null && !headerNameMapping.isEmpty()) { - Set uniqueTargetHeaderNames = new HashSet<>(headerNameMapping.values()); - if (uniqueTargetHeaderNames.size() < headerNameMapping.size()) { - IllegalArgumentException exception = new IllegalArgumentException(String.format( - "Two or more headers map to the same header name in headerNameMapping %s ", - properties.getExtension().getHeaderNameMapping(), id)); - LOGGER.warn(exception.getMessage()); - throw exception; + try { + Map headerNameMapping = properties.getExtension().getHeaderNameMapping(); + if (headerNameMapping != null && !headerNameMapping.isEmpty()) { + Set uniqueTargetHeaderNames = new HashSet<>(headerNameMapping.values()); + if (uniqueTargetHeaderNames.size() < headerNameMapping.size()) { + IllegalArgumentException exception = new IllegalArgumentException(String.format( + "Two or more headers map to the same header name in headerNameMapping %s ", + properties.getExtension().getHeaderNameMapping(), id)); + LOGGER.warn(exception.getMessage()); + throw exception; + } } + + producerManager.get(id); + createProducerInternal(); + } catch (Exception e) { + String msg = String.format("Unable to get a message producer for session %s", jcsmpSession.getSessionName()); + LOGGER.warn(msg, e); + closeResources(); + throw new RuntimeException(msg, e); } - producerManager.get(id); - createProducerInternal(); - } catch (Exception e) { - String msg = String.format("Unable to get a message producer for session %s", jcsmpSession.getSessionName()); - LOGGER.warn(msg, e); - closeResources(); - throw new RuntimeException(msg, e); + isRunning = true; } - - isRunning = true; } private void createProducerInternal() throws JCSMPException { @@ -295,30 +297,19 @@ private void recreateProducerIfNeeded(ErrorChannelSendingCorrelationKey correlat if (!recreateProducer && (producer == null || !producer.isClosed())) { return; } - synchronized (recreateLock) { + synchronized (lifecycleLock) { if (!recreateProducer && (producer == null || !producer.isClosed())) { return; } - LOGGER.warn("Recreating JCSMP producer for binding {} after stale-flow detection ", + LOGGER.debug("Recreating JCSMP producer for binding {} after stale-flow detection ", properties.getBindingName(), id); + closeResources(); try { - if (producer != null) { - producer.close(); - } - } catch (Exception closeError) { - LOGGER.debug("Failed to close stale producer during recreation ", id, closeError); - } - try { - if (transactedSession != null) { - transactedSession.close(); - } - } catch (Exception closeError) { - LOGGER.debug("Failed to close stale transacted session during recreation ", id, closeError); - } - try { + producerManager.get(id); createProducerInternal(); recreateProducer = false; - } catch (JCSMPException createError) { + } catch (Exception createError) { + recreateProducer = true; throw handleMessagingException(correlationKey, "Failed to recreate JCSMP producer after stale-flow detection", createError); } @@ -327,9 +318,11 @@ private void recreateProducerIfNeeded(ErrorChannelSendingCorrelationKey correlat @Override public void stop() { - if (!isRunning()) return; - closeResources(); - isRunning = false; + synchronized (lifecycleLock) { + if (!isRunning()) return; + closeResources(); + isRunning = false; + } } private void closeResources() { diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/util/ErrorQueueInfrastructure.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/util/ErrorQueueInfrastructure.java index 5b1311046..13da8599b 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/util/ErrorQueueInfrastructure.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/util/ErrorQueueInfrastructure.java @@ -30,8 +30,6 @@ public ErrorQueueInfrastructure(JCSMPSessionProducerManager producerManager, Str this.consumerProperties = consumerProperties; } - // DATAGO-134580: recreate shared JCSMP producer on unsolicited termination from Solace broker. - public void send(MessageContainer messageContainer, ErrorQueueRepublishCorrelationKey key) throws JCSMPException { XMLMessage xmlMessage = xmlMessageMapper.mapError(messageContainer.getMessage(), consumerProperties); xmlMessage.setCorrelationKey(key); @@ -59,7 +57,7 @@ public void send(MessageContainer messageContainer, ErrorQueueRepublishCorrelati || e instanceof JCSMPTransportException || e instanceof ClosedFacilityException || producer.isClosed()) { - LOGGER.warn("Detected stale shared JCSMP producer while sending to error queue {} (cause: {}); " + + LOGGER.debug("Detected stale shared JCSMP producer while sending to error queue {} (cause: {}); " + "recreating for next attempt", errorQueueName, e.getClass().getSimpleName()); try { diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/spring/cloud/stream/binder/JCSMPProducerCloseFlowRecoveryIT.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/spring/cloud/stream/binder/JCSMPProducerCloseFlowRecoveryIT.java index 5ba02aff8..6474b7689 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/spring/cloud/stream/binder/JCSMPProducerCloseFlowRecoveryIT.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder/src/test/java/com/solace/spring/cloud/stream/binder/JCSMPProducerCloseFlowRecoveryIT.java @@ -11,6 +11,7 @@ import com.solace.spring.cloud.stream.binder.util.DestinationType; import com.solace.test.integration.junit.jupiter.extension.PubSubPlusExtension; import com.solace.test.integration.semp.v2.SempV2Api; +import com.solacesystems.jcsmp.CapabilityType; import com.solacesystems.jcsmp.DeliveryMode; import com.solacesystems.jcsmp.JCSMPFactory; import com.solacesystems.jcsmp.JCSMPProperties; @@ -238,6 +239,7 @@ void testPersistentQueuePublisherRecoversAfterMessageSpoolCliShutdown(TestInfo t "no shutdown"); brokerSpoolNeedsRestore = false; awaitBrokerSempResponsive(sempV2Api, vpnName); + awaitJcsmpGuaranteedPublisherCapability(); logger.info("Attempting first post-shutdown publish; expecting proactive recovery"); assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload("recovery-1").build())) @@ -281,6 +283,7 @@ void testPersistentQueuePublisherRecoversAcrossRepeatedMessageSpoolCliShutdowns( "no shutdown"); brokerSpoolNeedsRestore = false; awaitBrokerSempResponsive(sempV2Api, vpnName); + awaitJcsmpGuaranteedPublisherCapability(); final int currentCycle = cycle; assertThatCode(() -> moduleOutputChannel.send(MessageBuilder.withPayload( @@ -311,6 +314,16 @@ private DirectChannel createPersistentProducerChannel(String destination, Destin return moduleOutputChannel; } + /** Polls until JCSMP reports {@link CapabilityType#PUB_GUARANTEED} after broker spool re-enable. */ + private void awaitJcsmpGuaranteedPublisherCapability() { + Awaitility.await("JCSMP session reports PUB_GUARANTEED capability") + .atMost(Duration.ofSeconds(30)) + .pollInterval(Duration.ofMillis(250)) + .untilAsserted(() -> assertThat(jcsmpSession.isCapable(CapabilityType.PUB_GUARANTEED)) + .as("session must re-advertise PUB_GUARANTEED after broker message-spool re-enable") + .isTrue()); + } + /** Polls until the broker SEMP API answers a {@code MsgVpn} lookup again. */ private static void awaitBrokerSempResponsive(SempV2Api sempV2Api, String vpnName) { Awaitility.await("broker SEMP API responsive for VPN '" + vpnName + "'") @@ -325,17 +338,44 @@ private static void awaitBrokerSempResponsive(SempV2Api sempV2Api, String vpnNam // Docker / CLI helpers (only used by the message-spool CLI shutdown tests). - /** Finds the running {@code solace-pubsub-standard} container via the testcontainers docker client. */ - private static String findSolaceContainerId() { + /** Finds the broker container this test's JCSMP session is connected to, matched by SMF host port. */ + private String findSolaceContainerId() { + int smfPort = extractSmfPortFromJcsmpSession(); DockerClient docker = DockerClientFactory.instance().client(); List containers = docker.listContainersCmd().exec(); return containers.stream() .filter(c -> c.getImage() != null && c.getImage().contains("solace-pubsub-standard")) + .filter(c -> { + com.github.dockerjava.api.model.ContainerPort[] ports = c.getPorts(); + if (ports == null) return false; + for (com.github.dockerjava.api.model.ContainerPort port : ports) { + Integer publicPort = port.getPublicPort(); + if (publicPort != null && publicPort == smfPort) return true; + } + return false; + }) .findFirst() .map(Container::getId) - .orElseThrow(() -> new IllegalStateException( - "No running 'solace-pubsub-standard' container found via the docker client. " + - "This test requires the PubSubPlusExtension to have provisioned its container.")); + .orElseThrow(() -> new IllegalStateException(String.format( + "No running 'solace-pubsub-standard' container exposing SMF host port %d found.", + smfPort))); + } + + private int extractSmfPortFromJcsmpSession() { + Object host = jcsmpSession.getProperty(JCSMPProperties.HOST); + if (host == null) { + throw new IllegalStateException("JCSMP HOST property is not set"); + } + String firstHost = host.toString().split(",")[0].trim(); + int colonIdx = firstHost.lastIndexOf(':'); + if (colonIdx < 0 || colonIdx == firstHost.length() - 1) { + throw new IllegalStateException("Cannot parse SMF port from JCSMP HOST: " + host); + } + String portStr = firstHost.substring(colonIdx + 1).replaceAll("[^0-9].*", ""); + if (portStr.isEmpty()) { + throw new IllegalStateException("Cannot parse SMF port from JCSMP HOST: " + host); + } + return Integer.parseInt(portStr); } /** From 496d09083061b5aad7d5b1356f2eec44279ab3c1 Mon Sep 17 00:00:00 2001 From: sunil-solace Date: Fri, 22 May 2026 16:23:20 -0400 Subject: [PATCH 12/13] DATAGO-134580: Fold get/cleanup into createProducerInternal (PR #475) Apply latest Nephery review feedback on the outbound handler: - Move producerManager.get(id) into createProducerInternal so callers no longer have to remember the prelude. - Move the catch (with closeResources + RuntimeException wrap) into createProducerInternal so cleanup occurs whenever creation fails, regardless of which caller invoked it. - Add defensive synchronized(lifecycleLock) inside createProducerInternal and closeResources to keep them safe if future callers don't already hold the lock (intrinsic locks are reentrant, so current call sites are unaffected). - Drop now-unreachable null checks on the producer field in the handleMessage catch arm and recreateProducerIfNeeded pre-checks; once the isRunning gate has passed, producer is guaranteed non-null and is never re-nulled. The null check in closeResources stays because it can be called from start()'s catch where producer is still unset. - Knock-on: start() outer try/catch now only wraps the header check; recreateProducerIfNeeded no longer needs the inline get(id). - One unit test updated: the underlying JCSMPException is now the ROOT cause (wrapped once by createProducerInternal's RuntimeException), so testProducerRecreationFailurePropagatesAndRetriesNext switches hasCauseInstanceOf -> hasRootCauseInstanceOf. All 75 outbound-handler unit tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../outbound/JCSMPOutboundMessageHandler.java | 62 +++++++++++-------- .../JCSMPOutboundMessageHandlerTest.java | 2 +- 2 files changed, 36 insertions(+), 28 deletions(-) diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandler.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandler.java index d16c1765f..14fb20fdb 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandler.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/main/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandler.java @@ -175,7 +175,7 @@ public void handleMessage(@NonNull Message message) throws MessagingException if (e instanceof StaleSessionException || e instanceof JCSMPTransportException || e instanceof ClosedFacilityException - || (producer != null && producer.isClosed())) { + || producer.isClosed()) { if (!recreateProducer) { LOGGER.debug("Detected stale JCSMP producer for binding {} (cause: {}); will " + "recreate on next message ", @@ -267,45 +267,51 @@ public void start() { throw exception; } } - - producerManager.get(id); - createProducerInternal(); } catch (Exception e) { String msg = String.format("Unable to get a message producer for session %s", jcsmpSession.getSessionName()); LOGGER.warn(msg, e); - closeResources(); throw new RuntimeException(msg, e); } + createProducerInternal(); isRunning = true; } } - private void createProducerInternal() throws JCSMPException { - if (properties.getExtension().isTransacted()) { - LOGGER.info("Creating transacted session ", id); - transactedSession = jcsmpSession.createTransactedSession(); - producer = transactedSession.createProducer(SolaceProvisioningUtil.getProducerFlowProperties(jcsmpSession), - producerEventHandler); - } else { - producer = jcsmpSession.createProducer(SolaceProvisioningUtil.getProducerFlowProperties(jcsmpSession), - producerEventHandler); + private void createProducerInternal() { + synchronized (lifecycleLock) { + try { + producerManager.get(id); + if (properties.getExtension().isTransacted()) { + LOGGER.info("Creating transacted session ", id); + transactedSession = jcsmpSession.createTransactedSession(); + producer = transactedSession.createProducer(SolaceProvisioningUtil.getProducerFlowProperties(jcsmpSession), + producerEventHandler); + } else { + producer = jcsmpSession.createProducer(SolaceProvisioningUtil.getProducerFlowProperties(jcsmpSession), + producerEventHandler); + } + } catch (Exception e) { + String msg = String.format("Unable to get a message producer for session %s", jcsmpSession.getSessionName()); + LOGGER.warn(msg, e); + closeResources(); + throw new RuntimeException(msg, e); + } } } private void recreateProducerIfNeeded(ErrorChannelSendingCorrelationKey correlationKey) throws MessagingException { - if (!recreateProducer && (producer == null || !producer.isClosed())) { + if (!recreateProducer && !producer.isClosed()) { return; } synchronized (lifecycleLock) { - if (!recreateProducer && (producer == null || !producer.isClosed())) { + if (!recreateProducer && !producer.isClosed()) { return; } LOGGER.debug("Recreating JCSMP producer for binding {} after stale-flow detection ", properties.getBindingName(), id); closeResources(); try { - producerManager.get(id); createProducerInternal(); recreateProducer = false; } catch (Exception createError) { @@ -326,17 +332,19 @@ public void stop() { } private void closeResources() { - LOGGER.info("Stopping producer to {} {} ", configDestinationType, configDestination.getName(), id); - recreateProducer = false; - if (producer != null) { - LOGGER.info("Closing producer ", id); - producer.close(); - } - if (transactedSession != null) { - LOGGER.info("Closing transacted session ", id); - transactedSession.close(); + synchronized (lifecycleLock) { + LOGGER.info("Stopping producer to {} {} ", configDestinationType, configDestination.getName(), id); + recreateProducer = false; + if (producer != null) { + LOGGER.info("Closing producer ", id); + producer.close(); + } + if (transactedSession != null) { + LOGGER.info("Closing transacted session ", id); + transactedSession.close(); + } + producerManager.release(id); } - producerManager.release(id); } @Override diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java index d72c8390f..d15f2ce06 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java @@ -842,7 +842,7 @@ void testProducerRecreationFailurePropagatesAndRetriesNext(@Mock XMLMessageProdu assertThatThrownBy(() -> messageHandler.handleMessage(secondMessage)) .isInstanceOf(MessagingException.class) .hasMessageContaining("Failed to recreate JCSMP producer") - .hasCauseInstanceOf(JCSMPException.class); + .hasRootCauseInstanceOf(JCSMPException.class); Message thirdMessage = MessageBuilder.withPayload("payload-3").build(); messageHandler.handleMessage(thirdMessage); From ee61040317e3fe638b2bede6c83288282f93ded8 Mon Sep 17 00:00:00 2001 From: sunil-solace Date: Fri, 22 May 2026 17:00:03 -0400 Subject: [PATCH 13/13] DATAGO-134580: Collapse stale-flow tests + add transacted Cartesians (PR #475) Apply Nephery review feedback and round out test coverage: - Merge testProducerRecreatedAfterClosedFacilityException and testProducerRecreatedAfterJCSMPTransportException into testProducerRecreatedAfterUnsolicitedCloseFlow as a Cartesian parameter (transacted x exceptionType). 4 tests -> 6 instances, same coverage, less duplication. - Drop the class-level Javadoc on ErrorQueueInfrastructureTest; test names are self-documenting. - Add `transacted` as a Cartesian dimension to the three remaining DATAGO-134580 outbound tests that previously only exercised the non-transacted path: * testProducerRecreatedProactivelyWhenIsClosedDetectedBeforeSend * testProducerRecreationFailurePropagatesAndRetriesNext * testRecreationFlagResetAcrossStopStartCycle This catches transacted-specific regressions in the proactive pre-check, the recreate-failure-then-retry path, and the stop/start flag-reset semantic. All 80 JCSMPOutboundMessageHandlerTest cases and 6 ErrorQueueInfrastructureTest cases pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../JCSMPOutboundMessageHandlerTest.java | 185 ++++++++++-------- .../util/ErrorQueueInfrastructureTest.java | 8 - 2 files changed, 98 insertions(+), 95 deletions(-) diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java index d15f2ce06..3c94190f1 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/outbound/JCSMPOutboundMessageHandlerTest.java @@ -687,10 +687,11 @@ void testGetBindingName() { assertThat(messageHandler.getBindingName()).isNotEmpty().isEqualTo(producerProperties.getBindingName()); } - /** DATAGO-134580: stale producer (CloseFlow) is recreated on the next send. */ - @CartesianTest(name = "[{index}] transacted={0}") + /** DATAGO-134580: stale producer is recreated on the next send for each stale-flow exception type. */ + @CartesianTest(name = "[{index}] transacted={0} exception={1}") public void testProducerRecreatedAfterUnsolicitedCloseFlow( @Values(booleans = {false, true}) boolean transacted, + @Values(strings = {"stale", "closed-facility", "transport"}) String exceptionType, @Mock XMLMessageProducer producerB, @Mock TransactedSession transactedSessionB) throws Exception { producerProperties.getExtension().setTransacted(transacted); @@ -708,17 +709,23 @@ public void testProducerRecreatedAfterUnsolicitedCloseFlow( .thenReturn(producerB); } - StaleSessionException stale = new StaleSessionException( - "Tried to perform operation on a closed XML message producer", - new JCSMPException("Received unsolicited CloseFlow for producer (503:Service Unavailable).")); - Mockito.doThrow(stale).when(messageProducer).send(any(XMLMessage.class), any(Destination.class)); + JCSMPException sendError = switch (exceptionType) { + case "stale" -> new StaleSessionException( + "Tried to perform operation on a closed XML message producer", + new JCSMPException("Received unsolicited CloseFlow for producer (503:Service Unavailable).")); + case "closed-facility" -> new ClosedFacilityException("Producer is closed"); + case "transport" -> new JCSMPTransportException( + "Received unsolicited CloseFlow for producer (503:Service Unavailable)."); + default -> throw new IllegalArgumentException("unknown exception type: " + exceptionType); + }; + Mockito.doThrow(sendError).when(messageProducer).send(any(XMLMessage.class), any(Destination.class)); messageHandler.start(); Message firstMessage = MessageBuilder.withPayload("payload-1").build(); assertThatThrownBy(() -> messageHandler.handleMessage(firstMessage)) .isInstanceOf(MessagingException.class) - .hasCauseInstanceOf(StaleSessionException.class); + .hasCauseInstanceOf(sendError.getClass()); assertThat(messageHandler.isRunning()).isTrue(); Message secondMessage = MessageBuilder.withPayload("payload-2").build(); @@ -740,68 +747,26 @@ public void testProducerRecreatedAfterUnsolicitedCloseFlow( Mockito.verify(messageProducer, Mockito.atLeastOnce()).close(); } - /** DATAGO-134580: {@link ClosedFacilityException} on send also triggers recreation. */ - @Test - void testProducerRecreatedAfterClosedFacilityException(@Mock XMLMessageProducer producerB) throws Exception { - Mockito.when(session.createProducer( - producerFlowPropertiesCaptor.capture(), pubEventHandlerCaptor.capture())) - .thenReturn(messageProducer) - .thenReturn(producerB); - - Mockito.doThrow(new ClosedFacilityException("Producer is closed")) - .when(messageProducer).send(any(XMLMessage.class), any(Destination.class)); - - messageHandler.start(); - - Message firstMessage = MessageBuilder.withPayload("payload-1").build(); - assertThatThrownBy(() -> messageHandler.handleMessage(firstMessage)) - .isInstanceOf(MessagingException.class) - .hasCauseInstanceOf(ClosedFacilityException.class); - - Message secondMessage = MessageBuilder.withPayload("payload-2").build(); - messageHandler.handleMessage(secondMessage); - - Mockito.verify(session, Mockito.times(2)) - .createProducer(any(ProducerFlowProperties.class), any(JCSMPStreamingPublishCorrelatingEventHandler.class)); - Mockito.verify(producerB, Mockito.times(1)).send(any(XMLMessage.class), any(Destination.class)); - Mockito.verify(messageProducer, Mockito.atLeastOnce()).close(); - } - - /** DATAGO-134580: {@link JCSMPTransportException} on send also triggers recreation. */ - @Test - void testProducerRecreatedAfterJCSMPTransportException(@Mock XMLMessageProducer producerB) throws Exception { - Mockito.when(session.createProducer( - producerFlowPropertiesCaptor.capture(), pubEventHandlerCaptor.capture())) - .thenReturn(messageProducer) - .thenReturn(producerB); - - Mockito.doThrow(new JCSMPTransportException( - "Received unsolicited CloseFlow for producer (503:Service Unavailable).")) - .when(messageProducer).send(any(XMLMessage.class), any(Destination.class)); - - messageHandler.start(); - - Message firstMessage = MessageBuilder.withPayload("payload-1").build(); - assertThatThrownBy(() -> messageHandler.handleMessage(firstMessage)) - .isInstanceOf(MessagingException.class) - .hasCauseInstanceOf(JCSMPTransportException.class); - - Message secondMessage = MessageBuilder.withPayload("payload-2").build(); - messageHandler.handleMessage(secondMessage); - - Mockito.verify(session, Mockito.times(2)) - .createProducer(any(ProducerFlowProperties.class), any(JCSMPStreamingPublishCorrelatingEventHandler.class)); - Mockito.verify(producerB, Mockito.times(1)).send(any(XMLMessage.class), any(Destination.class)); - Mockito.verify(messageProducer, Mockito.atLeastOnce()).close(); - } - /** DATAGO-134580: proactive {@code isClosed()} pre-check rebuilds before the first send. */ - @Test - void testProducerRecreatedProactivelyWhenIsClosedDetectedBeforeSend(@Mock XMLMessageProducer producerB) throws Exception { - Mockito.when(session.createProducer( - producerFlowPropertiesCaptor.capture(), pubEventHandlerCaptor.capture())) - .thenReturn(messageProducer) - .thenReturn(producerB); + @CartesianTest(name = "[{index}] transacted={0}") + void testProducerRecreatedProactivelyWhenIsClosedDetectedBeforeSend( + @Values(booleans = {false, true}) boolean transacted, + @Mock XMLMessageProducer producerB, + @Mock TransactedSession transactedSessionB) throws Exception { + producerProperties.getExtension().setTransacted(transacted); + + if (transacted) { + Mockito.when(session.createTransactedSession()) + .thenReturn(transactedSession) + .thenReturn(transactedSessionB); + Mockito.when(transactedSessionB.createProducer(any(ProducerFlowProperties.class), + any(JCSMPStreamingPublishCorrelatingEventHandler.class))).thenReturn(producerB); + } else { + Mockito.when(session.createProducer( + producerFlowPropertiesCaptor.capture(), pubEventHandlerCaptor.capture())) + .thenReturn(messageProducer) + .thenReturn(producerB); + } Mockito.when(messageProducer.isClosed()).thenReturn(true); @@ -810,21 +775,43 @@ void testProducerRecreatedProactivelyWhenIsClosedDetectedBeforeSend(@Mock XMLMes Message firstMessage = MessageBuilder.withPayload("payload-1").build(); messageHandler.handleMessage(firstMessage); - Mockito.verify(session, Mockito.times(2)) - .createProducer(any(ProducerFlowProperties.class), any(JCSMPStreamingPublishCorrelatingEventHandler.class)); + if (transacted) { + Mockito.verify(session, Mockito.times(2)).createTransactedSession(); + Mockito.verify(transactedSession, Mockito.atLeastOnce()).close(); + Mockito.verify(transactedSessionB, Mockito.times(1)) + .createProducer(any(ProducerFlowProperties.class), + any(JCSMPStreamingPublishCorrelatingEventHandler.class)); + } else { + Mockito.verify(session, Mockito.times(2)) + .createProducer(any(ProducerFlowProperties.class), any(JCSMPStreamingPublishCorrelatingEventHandler.class)); + } Mockito.verify(producerB, Mockito.times(1)).send(any(XMLMessage.class), any(Destination.class)); Mockito.verify(messageProducer, Mockito.never()).send(any(XMLMessage.class), any(Destination.class)); Mockito.verify(messageProducer, Mockito.atLeastOnce()).close(); } /** DATAGO-134580: failed recreation surfaces an error and retries on the next message. */ - @Test - void testProducerRecreationFailurePropagatesAndRetriesNext(@Mock XMLMessageProducer producerB) throws Exception { - Mockito.when(session.createProducer( - producerFlowPropertiesCaptor.capture(), pubEventHandlerCaptor.capture())) - .thenReturn(messageProducer) - .thenThrow(new JCSMPException("Broker still reconnecting")) - .thenReturn(producerB); + @CartesianTest(name = "[{index}] transacted={0}") + void testProducerRecreationFailurePropagatesAndRetriesNext( + @Values(booleans = {false, true}) boolean transacted, + @Mock XMLMessageProducer producerB, + @Mock TransactedSession transactedSessionB) throws Exception { + producerProperties.getExtension().setTransacted(transacted); + + if (transacted) { + Mockito.when(session.createTransactedSession()) + .thenReturn(transactedSession) + .thenThrow(new JCSMPException("Broker still reconnecting")) + .thenReturn(transactedSessionB); + Mockito.when(transactedSessionB.createProducer(any(ProducerFlowProperties.class), + any(JCSMPStreamingPublishCorrelatingEventHandler.class))).thenReturn(producerB); + } else { + Mockito.when(session.createProducer( + producerFlowPropertiesCaptor.capture(), pubEventHandlerCaptor.capture())) + .thenReturn(messageProducer) + .thenThrow(new JCSMPException("Broker still reconnecting")) + .thenReturn(producerB); + } StaleSessionException stale = new StaleSessionException( "Tried to perform operation on a closed XML message producer", @@ -847,19 +834,39 @@ void testProducerRecreationFailurePropagatesAndRetriesNext(@Mock XMLMessageProdu Message thirdMessage = MessageBuilder.withPayload("payload-3").build(); messageHandler.handleMessage(thirdMessage); - Mockito.verify(session, Mockito.times(3)) - .createProducer(any(ProducerFlowProperties.class), any(JCSMPStreamingPublishCorrelatingEventHandler.class)); + if (transacted) { + Mockito.verify(session, Mockito.times(3)).createTransactedSession(); + Mockito.verify(transactedSessionB, Mockito.times(1)) + .createProducer(any(ProducerFlowProperties.class), + any(JCSMPStreamingPublishCorrelatingEventHandler.class)); + } else { + Mockito.verify(session, Mockito.times(3)) + .createProducer(any(ProducerFlowProperties.class), any(JCSMPStreamingPublishCorrelatingEventHandler.class)); + } Mockito.verify(producerB, Mockito.times(1)).send(any(XMLMessage.class), any(Destination.class)); assertThat(messageHandler.isRunning()).isTrue(); } /** DATAGO-134580: stop/start clears the recreate flag so the next send doesn't rebuild. */ - @Test - void testRecreationFlagResetAcrossStopStartCycle(@Mock XMLMessageProducer producerB) throws Exception { - Mockito.when(session.createProducer( - producerFlowPropertiesCaptor.capture(), pubEventHandlerCaptor.capture())) - .thenReturn(messageProducer) - .thenReturn(producerB); + @CartesianTest(name = "[{index}] transacted={0}") + void testRecreationFlagResetAcrossStopStartCycle( + @Values(booleans = {false, true}) boolean transacted, + @Mock XMLMessageProducer producerB, + @Mock TransactedSession transactedSessionB) throws Exception { + producerProperties.getExtension().setTransacted(transacted); + + if (transacted) { + Mockito.when(session.createTransactedSession()) + .thenReturn(transactedSession) + .thenReturn(transactedSessionB); + Mockito.when(transactedSessionB.createProducer(any(ProducerFlowProperties.class), + any(JCSMPStreamingPublishCorrelatingEventHandler.class))).thenReturn(producerB); + } else { + Mockito.when(session.createProducer( + producerFlowPropertiesCaptor.capture(), pubEventHandlerCaptor.capture())) + .thenReturn(messageProducer) + .thenReturn(producerB); + } StaleSessionException stale = new StaleSessionException( "Tried to perform operation on a closed XML message producer", @@ -880,8 +887,12 @@ void testRecreationFlagResetAcrossStopStartCycle(@Mock XMLMessageProducer produc Message secondMessage = MessageBuilder.withPayload("payload-2").build(); messageHandler.handleMessage(secondMessage); - Mockito.verify(session, Mockito.never()) - .createProducer(any(ProducerFlowProperties.class), any(JCSMPStreamingPublishCorrelatingEventHandler.class)); + if (transacted) { + Mockito.verify(session, Mockito.never()).createTransactedSession(); + } else { + Mockito.verify(session, Mockito.never()) + .createProducer(any(ProducerFlowProperties.class), any(JCSMPStreamingPublishCorrelatingEventHandler.class)); + } Mockito.verify(producerB, Mockito.times(1)).send(any(XMLMessage.class), any(Destination.class)); } diff --git a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/util/ErrorQueueInfrastructureTest.java b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/util/ErrorQueueInfrastructureTest.java index 299246722..507d3a8be 100644 --- a/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/util/ErrorQueueInfrastructureTest.java +++ b/solace-spring-cloud-stream-binder/solace-spring-cloud-stream-binder-core/src/test/java/com/solace/spring/cloud/stream/binder/util/ErrorQueueInfrastructureTest.java @@ -23,14 +23,6 @@ import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.mockito.ArgumentMatchers.any; -/** - * Unit tests for the DATAGO-134580 stale-flow recovery added to {@link ErrorQueueInfrastructure}. - * The error-queue republish path borrows the session-default producer from - * {@link JCSMPSessionProducerManager} and historically had no recovery logic when the broker - * fanned out an unsolicited CloseFlow on that producer (reactive recreation in - * {@code JCSMPOutboundMessageHandler} only protects per-binding producers, not the shared - * session-default one). - */ @ExtendWith(MockitoExtension.class) class ErrorQueueInfrastructureTest { private static final String PRODUCER_KEY = "test-producer-key";