rptest: add MultiNodeBootstrapTest

WillemKauf · WillemKauf · commit 895f588e5cbb · 2026-05-07T21:57:19.000-04:00
diff --git a/tests/rptest/tests/cluster_config_test.py b/tests/rptest/tests/cluster_config_test.py
@@ -43,7 +43,7 @@
     RedpandaVersion,
 )
 from rptest.tests.redpanda_test import RedpandaTest
-from rptest.util import expect_exception, expect_http_error
+from rptest.util import expect_exception, expect_http_error, wait_until_result
 from rptest.utils.si_utils import BucketView
 
 BOOTSTRAP_CONFIG = {
@@ -71,7 +71,10 @@ def check_restart_clears(admin, redpanda, nodes=None):
         nodes = redpanda.nodes
 
     status = admin.get_cluster_config_status()
-    for n in status:
+    relevant_ids = {redpanda.node_id(n) for n in nodes}
+    relevant = [s for s in status if s["node_id"] in relevant_ids]
+    assert len(relevant_ids) == len(relevant)
+    for n in relevant:
         assert n["restart"] is True
 
     first_node = nodes[0]
@@ -141,6 +144,33 @@ def is_complete(node):
         )
 
 
+def wait_for_active_nodes_version_status_sync(admin, redpanda, version, nodes):
+    """
+    Like wait_for_version_status_sync, but only requires the subset of
+    `active_nodes` to agree on `version`. Statuses for other nodes (e.g. a
+    downed node still listed by the controller) are ignored.
+    """
+    active_ids = {redpanda.node_id(n) for n in nodes}
+
+    def is_complete(node):
+        node_status = admin.get_cluster_config_status(node=node)
+        relevant = [s for s in node_status if s["node_id"] in active_ids]
+        return len(relevant) == len(active_ids) and {
+            s["config_version"] for s in relevant
+        } == {version}
+
+    for node in nodes:
+        wait_until(
+            lambda n=node: is_complete(n),
+            timeout_sec=10,
+            backoff_sec=0.5,
+            err_msg=(
+                f"Config status did not converge on {version} for active "
+                f"nodes {sorted(active_ids)}"
+            ),
+        )
+
+
 class ClusterConfigBootstrapTest(RedpandaTest):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, extra_rp_conf={}, **kwargs)
@@ -2599,6 +2629,337 @@ def assert_restart_status(expect: bool):
             assert n["restart"] is False
 
 
+class ClusterConfigMultiNodeBootstrapTest(RedpandaTest):
+    def __init__(self, test_context):
+        super().__init__(
+            test_context, num_brokers=3, si_settings=SISettings(test_context)
+        )
+        self.admin = Admin(self.redpanda)
+        self.rpk = RpkTool(self.redpanda)
+
+    def setUp(self):
+        # Skip starting redpanda, so that test can explicitly start
+        # it with some override_cfg_params
+        pass
+
+    def _local_replica_stms(self, node, topic_name, partition):
+        """
+        Return the set of stm names registered for `topic_name`/`partition` on
+        `node`'s local replica, or None if the replica has not materialized yet.
+        Suitable for passing to wait_until_result.
+        """
+        node_id = self.redpanda.node_id(node)
+        state = self.admin.get_partition_state(
+            "kafka", topic_name, partition, node=node
+        )
+        for r in state.get("replicas", []):
+            if r.get("raft_state", {}).get("node_id") == node_id:
+                return {s["name"] for s in r["raft_state"].get("stms", [])}
+        return None
+
+    @cluster(num_nodes=3)
+    def test_node_delayed_restart(self):
+        """
+        A node which has gone down should see the most up to date cluster config immediately in the bootstrap process, instead of needing to restart again.
+        """
+
+        def assert_restart_status_on_nodes(expect: bool, relevant_nodes):
+            relevant_ids = {self.redpanda.node_id(n) for n in relevant_nodes}
+            status = self.admin.get_cluster_config_status()
+            relevant = [s for s in status if s["node_id"] in relevant_ids]
+            assert len(relevant_ids) == len(relevant)
+            for n in relevant:
+                assert n["restart"] is expect, (
+                    f"Expected restart status {n['restart']} to be {expect}"
+                )
+
+        active_nodes = self.redpanda.nodes[0:2]
+        down_node = self.redpanda.nodes[2]
+        all_nodes = self.redpanda.nodes
+        self.redpanda.start(all_nodes)
+
+        # Wait for config status to populate
+        wait_until(
+            lambda: len(self.admin.get_cluster_config_status()) == 3,
+            timeout_sec=30,
+            backoff_sec=1,
+        )
+
+        assert_restart_status_on_nodes(False, all_nodes)
+
+        # Bring one of the nodes down.
+        self.redpanda.stop_node(down_node)
+
+        # An arbitrary restart-requiring setting with a non-default value
+        new_setting = (CLOUD_TOPICS_CONFIG_STR, True)
+        patch_result = self.admin.patch_cluster_config(upsert=dict([new_setting]))
+        new_version = patch_result["config_version"]
+        wait_for_active_nodes_version_status_sync(
+            self.admin, self.redpanda, new_version, nodes=active_nodes
+        )
+        assert_restart_status_on_nodes(True, active_nodes)
+
+        # Restart existing nodes to get them into a clean state
+        check_restart_clears(self.admin, self.redpanda, nodes=active_nodes)
+
+        config = {
+            TopicSpec.PROPERTY_STORAGE_MODE: TopicSpec.STORAGE_MODE_CLOUD,
+        }
+        topic_name = "tapioca"
+        self.rpk.create_topic(
+            topic=topic_name,
+            partitions=1,
+            replicas=3,
+            config=config,
+        )
+        topic_desc = self.rpk.describe_topic_configs(topic_name)
+        assert (
+            topic_desc[TopicSpec.PROPERTY_STORAGE_MODE][0]
+            == TopicSpec.STORAGE_MODE_CLOUD
+        )
+
+        # Start the node back up.
+        self.redpanda.start_node(down_node)
+
+        # Verify ctp_stm is registered on down_node's local replica of the
+        # cloud topic. This proves bootstrap applied cloud_topics_enabled=true
+        # (a needs_restart=yes property) before partition_manager constructed
+        # the partition; otherwise ctp_stm would be missing on this node
+        # until another restart.
+        down_node_id = self.redpanda.node_id(down_node)
+        stm_names = wait_until_result(
+            lambda: self._local_replica_stms(down_node, topic_name, 0),
+            timeout_sec=30,
+            backoff_sec=1,
+            err_msg=f"{topic_name} replica never materialized on restarted "
+            f"node {down_node_id}",
+        )
+        assert "ctp_stm" in stm_names, (
+            f"ctp_stm missing on restarted node {down_node_id}; got stms {stm_names}. Bootstrap did not apply cloud_topics_enabled before partition_manager built {topic_name}."
+        )
+
+        status = self.admin.get_cluster_config_status()
+        for n in status:
+            assert n["restart"] is False
+
+    @cluster(num_nodes=3)
+    def test_cloud_topic_on_joining_node(self):
+        """
+        A node joining a cluster for the first time should pick up
+        cloud_topics_enabled (a needs_restart=yes property) from the
+        register_with_cluster join snapshot, so that partition_manager
+        registers ctp_stm when the cloud topic's partition is constructed.
+        """
+        seed_nodes = self.redpanda.nodes[0:2]
+        joiner_node = self.redpanda.nodes[2]
+
+        # Bring up a 2-node cluster first.
+        self.redpanda.start(seed_nodes)
+        wait_until(
+            lambda: len(self.admin.get_cluster_config_status()) == 2,
+            timeout_sec=30,
+            backoff_sec=1,
+        )
+
+        # Enable cloud_topics_enabled and restart both seeds so the value
+        # is in active on whichever seed ends up serving the joiner's
+        # register_with_cluster RPC and validating the cloud topic create.
+        new_setting = (CLOUD_TOPICS_CONFIG_STR, True)
+        patch_result = self.admin.patch_cluster_config(upsert=dict([new_setting]))
+        new_version = patch_result["config_version"]
+        wait_for_active_nodes_version_status_sync(
+            self.admin, self.redpanda, new_version, nodes=seed_nodes
+        )
+        self.redpanda.restart_nodes(seed_nodes)
+        seed_ids = {self.redpanda.node_id(n) for n in seed_nodes}
+        wait_until(
+            lambda: all(
+                s["restart"] is False
+                for s in self.admin.get_cluster_config_status()
+                if s["node_id"] in seed_ids
+            ),
+            timeout_sec=30,
+            backoff_sec=1,
+            err_msg="seed restart flag did not clear after seed restart",
+        )
+
+        # Join the third node for the first time.
+        self.redpanda.start_node(joiner_node)
+        wait_until(
+            lambda: len(self.admin.get_cluster_config_status()) == 3,
+            timeout_sec=30,
+            backoff_sec=1,
+        )
+
+        # Create a cloud topic with rf=3 so the joiner hosts a replica.
+        topic_name = "tapioca_joiner"
+        self.rpk.create_topic(
+            topic=topic_name,
+            partitions=1,
+            replicas=3,
+            config={
+                TopicSpec.PROPERTY_STORAGE_MODE: TopicSpec.STORAGE_MODE_CLOUD,
+            },
+        )
+
+        # ctp_stm must be registered on the joiner's replica. If the
+        # joiner's bootstrap left cloud_topics_enabled in pending instead
+        # of active, partition_manager would build the partition without
+        # ctp_stm and we'd silently lose cloud-topics functionality on
+        # this node until another restart.
+        joiner_id = self.redpanda.node_id(joiner_node)
+        stm_names = wait_until_result(
+            lambda: self._local_replica_stms(joiner_node, topic_name, 0),
+            timeout_sec=30,
+            backoff_sec=1,
+            err_msg=f"{topic_name} replica never materialized on joiner "
+            f"node {joiner_id}",
+        )
+        assert "ctp_stm" in stm_names, (
+            f"ctp_stm missing on joiner node {joiner_id}; got stms {stm_names}. "
+            f"register_with_cluster snapshot did not apply cloud_topics_enabled "
+            f"before partition_manager built {topic_name}."
+        )
+
+        status = self.admin.get_cluster_config_status()
+        for n in status:
+            assert n["restart"] is False, (
+                f"Unexpected restart=true after fresh join: {status}"
+            )
+
+    @cluster(num_nodes=3)
+    def test_cluster_recovery_needs_restart_property(self):
+        """
+        After cluster recovery applies a needs_restart=yes property, the
+        active value should remain at the default until nodes restart.
+        After a restart, the recovered value should be in active because
+        bootstrap reads the local cache (which apply_delta -> store_delta
+        wrote during recovery).
+        """
+        # Faster cluster metadata upload so the source backup is captured
+        # quickly. enable_cluster_metadata_upload_loop is true by default.
+        self.redpanda.add_extra_rp_conf(
+            {
+                "controller_snapshot_max_age_sec": 1,
+                "cloud_storage_cluster_metadata_upload_interval_ms": 1000,
+            }
+        )
+
+        all_nodes = self.redpanda.nodes
+        self.redpanda.start(all_nodes)
+        wait_until(
+            lambda: len(self.admin.get_cluster_config_status()) == 3,
+            timeout_sec=30,
+            backoff_sec=1,
+        )
+
+        PROPERTY_NAME = "storage_compaction_key_map_memory_limit_percent"
+        PROPERTY_DEFAULT = 12
+        NEW_PROPERTY_VALUE = 6
+        # storage_compaction_key_map_memory_limit_percent is needs_restart=yes with default 12.
+        new_setting = (
+            PROPERTY_NAME,
+            NEW_PROPERTY_VALUE,
+        )
+        patch_result = self.admin.patch_cluster_config(upsert=dict([new_setting]))
+        new_version = patch_result["config_version"]
+        wait_for_active_nodes_version_status_sync(
+            self.admin, self.redpanda, new_version, nodes=all_nodes
+        )
+
+        # Let the metadata upload loop capture the post-patch state.
+        time.sleep(5)
+
+        # Wipe and bring up a fresh cluster.
+        self.redpanda.stop()
+        for n in all_nodes:
+            self.redpanda.remove_local_data(n)
+        self.redpanda.restart_nodes(all_nodes)
+        self.admin.await_stable_leader(
+            "controller",
+            partition=0,
+            namespace="redpanda",
+            timeout_s=60,
+            backoff_s=2,
+        )
+
+        # Use suppress_pending=True so we read the active value only, not
+        # the pending-aware view that rpk cluster_config_get returns by
+        # default. We want to verify that the recovered value lands in
+        # pending without changing active until restart.
+        for n in all_nodes:
+            v = self.admin.get_cluster_config(
+                node=n, key=PROPERTY_NAME, suppress_pending=True
+            )[PROPERTY_NAME]
+            assert v == PROPERTY_DEFAULT, (
+                f"Expected active {PROPERTY_NAME}={v} to be default value "
+                f"{PROPERTY_DEFAULT=} on {n.name} pre-recovery"
+            )
+
+        # Run cluster recovery.
+        self.admin.initialize_cluster_recovery()
+
+        def cluster_recovery_complete():
+            return (
+                "inactive" in self.admin.get_cluster_recovery_status().json()["state"]
+            )
+
+        wait_until(cluster_recovery_complete, timeout_sec=60, backoff_sec=1)
+
+        status = self.admin.get_cluster_config_status()
+        for n in status:
+            assert n["restart"] is True, (
+                f"Expected restart=true after recovery for needs_restart "
+                f"property, got status {status}"
+            )
+
+        # After recovery, the needs_restart=yes property is in pending.
+        # Active stays at the pre-recovery value; the pending-aware view
+        # already reflects the recovered value.
+        for n in all_nodes:
+            active = self.admin.get_cluster_config(
+                node=n, key=PROPERTY_NAME, suppress_pending=True
+            )[PROPERTY_NAME]
+            assert active == PROPERTY_DEFAULT, (
+                f"Expected active {PROPERTY_NAME}={active} to still be "
+                f"default {PROPERTY_DEFAULT=} on {n.name} after recovery "
+                f"(needs_restart=yes properties land in pending, not active)"
+            )
+            pending = self.admin.get_cluster_config(node=n, key=PROPERTY_NAME)[
+                PROPERTY_NAME
+            ]
+            assert pending == NEW_PROPERTY_VALUE, (
+                f"Expected pending-aware view of {PROPERTY_NAME}={pending} "
+                f"to reflect the recovered value {NEW_PROPERTY_VALUE=} on "
+                f"{n.name}"
+            )
+
+        self.redpanda.restart_nodes(all_nodes)
+        self.admin.await_stable_leader(
+            "controller",
+            partition=0,
+            namespace="redpanda",
+            timeout_s=60,
+            backoff_s=2,
+        )
+
+        # After restart, hydrate_cluster_config -> load_cache ->
+        # preload_local writes the recovered value into active.
+        for n in all_nodes:
+            v = self.admin.get_cluster_config(
+                node=n, key=PROPERTY_NAME, suppress_pending=True
+            )[PROPERTY_NAME]
+            assert v == NEW_PROPERTY_VALUE, (
+                f"Expected active {PROPERTY_NAME}={v} to be "
+                f"{NEW_PROPERTY_VALUE=} on {n.name} after recovery + restart"
+            )
+        status = self.admin.get_cluster_config_status()
+        for n in status:
+            assert n["restart"] is False, (
+                f"Unexpected restart=true after post-recovery restart: {status}"
+            )
+
+
 class ClusterConfigLegacyDefaultTest(RedpandaTest, ClusterConfigHelpersMixin):
     """
     Test config::legacy_default feature, that defaults for features can be