microsoft
diff --git a/‎CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 6 additions & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎doc/host_config_schema/cchost_config.json‎
Lines changed: 6 additions & 1 deletion b/‎doc/host_config_schema/cchost_config.json‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎doc/operations/ledger_snapshot.rst‎
Lines changed: 16 additions & 1 deletion b/‎doc/operations/ledger_snapshot.rst‎
Lines changed: 16 additions & 1 deletion
diff --git a/‎include/ccf/node/startup_config.h‎
Lines changed: 1 addition & 0 deletions b/‎include/ccf/node/startup_config.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/common/configuration.h‎
Lines changed: 4 additions & 1 deletion b/‎src/common/configuration.h‎
Lines changed: 4 additions & 1 deletion
@@ -24,6 +24,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 
 ### Added
 
+- Added `files_cleanup.max_committed_ledger_chunks` configuration option to limit the number of committed ledger chunk files retained in the main ledger directory. When the number of committed chunks exceeds this value, the oldest chunks (by sequence number) are automatically deleted, but only after verifying that an identical copy (by SHA-256 digest) exists in at least one `ledger.read_only_directories` entry. Committed ledger chunks that contain entries at or beyond the sequence number of the newest committed snapshot are never deleted, ensuring a complete ledger history from that snapshot for disaster recovery. At least one read-only ledger directory must be configured; the node will refuse to start otherwise.
 - Added `files_cleanup.max_snapshots` configuration option to limit the number of committed snapshot files retained on disk. When the number of committed snapshots exceeds this value, the oldest snapshots (by sequence number) are automatically deleted. The value must be at least 1 if set.
 - Added `files_cleanup.interval` configuration option (default `"30s"`) to periodically scan the snapshot directory and delete old committed snapshots exceeding `max_snapshots`. This ensures backup nodes (which receive snapshots via `backup_fetch`) also prune old snapshots. Only effective when `max_snapshots` is set.
 - Added `POST /node/snapshot:create`, gated by the `SnapshotCreate` RPC interface operator feature, to create a snapshot via an operator endpoint rather than a governance action.
 
@@ -643,6 +643,11 @@ if(BUILD_TESTS)
       ${CMAKE_CURRENT_SOURCE_DIR}/src/host/test/ledger.cpp
     )
 
+    add_unit_test(
+      files_cleanup_test
+      ${CMAKE_CURRENT_SOURCE_DIR}/src/host/test/files_cleanup_test.cpp
+    )
+
     add_unit_test(
       raft_test
       ${CMAKE_CURRENT_SOURCE_DIR}/src/consensus/aft/test/main.cpp
@@ -1231,6 +1236,7 @@ if(BUILD_TESTS)
       --historical-testdata
       ${CMAKE_SOURCE_DIR}/tests/testdata
   )
+  set_tests_properties(schema_test PROPERTIES TIMEOUT 900)
 
   add_e2e_test(
     NAME snp_platform_tests
 
@@ -555,10 +555,15 @@
           "description": "Maximum number of committed snapshot files to retain. When the number of committed snapshots exceeds this value, the oldest snapshots are deleted. Must be at least 1 if set. If null or unset, no automated snapshot garbage collection is performed.",
           "minimum": 1
         },
+        "max_committed_ledger_chunks": {
+          "type": ["integer", "null"],
+          "default": null,
+          "description": "Maximum number of committed ledger chunk files to retain in the main ledger directory. When the number of committed chunks exceeds this value, the oldest chunks are deleted, but only after verifying that an identical copy (by SHA-256 digest) exists in at least one read-only ledger directory. Chunks whose entries extend to or beyond the sequence number of the newest committed snapshot are never deleted, ensuring a complete ledger history from that snapshot for disaster recovery. Requires at least one ledger.read_only_directories entry; the node will refuse to start otherwise. If null or unset, no automated ledger chunk garbage collection is performed."
+        },
         "interval": {
           "type": "string",
           "default": "30s",
-          "description": "Time interval at which to scan the snapshot directory and delete old committed snapshots in excess of max_snapshots. This periodic cleanup executes regardless of the node's status (primary or backup)."
+          "description": "Time interval at which to scan and delete old committed files (snapshots and ledger chunks) that exceed the configured retention limits. This periodic cleanup executes regardless of the node's status (primary or backup)."
         }
       },
       "description": "This section includes configuration for periodic cleanup of old files (snapshots, ledger chunks)",
 
@@ -27,6 +27,8 @@ Ledger files that still contain some uncommitted entries are named ``ledger_<sta
 
 .. warning:: Removing `uncommitted` ledger files from the ``ledger.directory`` directory may cause a node to crash. It is however safe to move `committed` ledger files to another directory, accessible to a CCF node via the ``ledger.read_only_directories`` configuration entry.
 
+.. note:: The ``files_cleanup.max_committed_ledger_chunks`` configuration entry can be used to limit the number of committed ledger chunk files retained in the main ledger directory. When the number of committed chunks exceeds this value, the oldest chunks (by sequence number) are automatically deleted, but only after verifying that an identical copy (by SHA-256 digest) exists in at least one ``ledger.read_only_directories`` entry. At least one read-only ledger directory must be configured when this option is set; the node will refuse to start otherwise. Committed ledger chunks that contain entries at or beyond the sequence number of the newest committed snapshot are never deleted, regardless of the retention limit - this guarantees that a complete ledger history exists from the newest snapshot onwards, which is required for disaster recovery. Ledger chunk cleanup runs as part of the same periodic cleanup cycle as snapshot cleanup (see :ref:`operations/ledger_snapshot:Periodic File Cleanup`).
+
 It is important to note that while all entries stored in ledger files ending in ``.committed`` are committed, not all committed entries are stored in such a file at any given time. A number of them are typically in the in-progress files, waiting to be flushed to a ``.committed`` file once the size threshold (``ledger.chunk_size``) is met.
 
 The listing below is an example of what a ledger directory may look like:
@@ -178,7 +180,7 @@ Committed snapshot files are named ``snapshot_<seqno>_<evidence_seqno>.committed
 
 Uncommitted snapshot files, i.e. those whose evidence has not yet been committed, are named ``snapshot_<seqno>_<evidence_seqno>``. These files will be ignored by CCF when joining or recovering a service as no evidence can attest of their validity.
 
-.. note:: The ``files_cleanup.max_snapshots`` configuration entry can be used to limit the number of committed snapshot files retained on disk. When the number of committed snapshots exceeds this value, the oldest snapshots (by sequence number) are automatically deleted. This is useful to control the local persistent storage footprint of a node. The value must be at least 1 if set.
+.. note:: The ``files_cleanup.max_snapshots`` configuration entry can be used to limit the number of committed snapshot files retained on disk. When the number of committed snapshots exceeds this value, the oldest snapshots (by sequence number) are automatically deleted. This is useful to control the local persistent storage footprint of a node. The value must be at least 1 if set. Snapshot cleanup runs as part of the same periodic cleanup cycle as ledger chunk cleanup (see :ref:`operations/ledger_snapshot:Periodic File Cleanup`).
 
 Join or Recover From Snapshot
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -301,3 +303,16 @@ Invariants
 3. Snapshots are always generated for the ``seqno`` of a signature transaction (but not all signature transactions trigger the generation of snapshot).
 
 4. When a snapshot is generated, it must coincide with the end of a ledger file. Since a node can join using solely a snapshot, the first ledger file on that node will start just after the ``seqno`` of the snapshot. By 2., all nodes must have the same ledger files, so the generation of that snapshot on the primary must trigger the creation of a new ledger file starting at the next ``seqno`` to ensure the primary's ledger files are consistent with the joining node's files.
+
+Periodic File Cleanup
+---------------------
+
+Both snapshot and committed ledger chunk retention are managed by a single periodic cleanup cycle, controlled by the ``files_cleanup`` configuration section. The cleanup interval is set by ``files_cleanup.interval`` (default: ``30s``). On each cycle, the node checks whether committed snapshots or committed ledger chunks exceed their configured retention limits (``files_cleanup.max_snapshots`` and ``files_cleanup.max_committed_ledger_chunks`` respectively) and deletes the oldest files that qualify for removal.
+
+Snapshots qualify for removal if their number is in excess of the limit, starting from the ones with the lowest sequence numbers.
+
+Ledger chunks qualify for removal if their number is in excess of the limit, and if two other conditions apply. First, there must be at least one identical file in a read only ledger directory (contents are captured in a SHA-256 digest and compared). Second, as a safety measure, ledger chunks whose entries extend to or beyond the sequence number of the newest committed snapshot never qualify. This ensures that a complete ledger history is always available from the newest snapshot onwards, which is required for disaster recovery.
+
+If no committed snapshots exist, no ledger chunks are protected by this rule, but the existing backup-verification requirement still applies.
+
+Only one cleanup cycle can run at a time. If a cleanup task is still in progress when the next timer fires, the new cycle is skipped and a failure-level log message is emitted. This prevents overlapping cleanup operations, which could be wasteful, cause contention on the filesystem and produce spurious failures in the log. Under normal conditions each cleanup cycle completes well within the configured interval, so skipped cycles indicate that the interval may be too short or the node has an unusually large number of files to process.
@@ -120,6 +120,7 @@ namespace ccf
     struct FilesCleanup
     {
       std::optional<size_t> max_snapshots = std::nullopt;
+      std::optional<size_t> max_committed_ledger_chunks = std::nullopt;
       ccf::ds::TimeString interval = {"30s"};
 
       bool operator==(const FilesCleanup&) const = default;
 
@@ -117,7 +117,10 @@ namespace ccf
   DECLARE_JSON_TYPE_WITH_OPTIONAL_FIELDS(CCFConfig::FilesCleanup);
   DECLARE_JSON_REQUIRED_FIELDS(CCFConfig::FilesCleanup);
   DECLARE_JSON_OPTIONAL_FIELDS(
-    CCFConfig::FilesCleanup, max_snapshots, interval);
+    CCFConfig::FilesCleanup,
+    max_snapshots,
+    max_committed_ledger_chunks,
+    interval);
 
   DECLARE_JSON_TYPE_WITH_OPTIONAL_FIELDS(CCFConfig);
   DECLARE_JSON_REQUIRED_FIELDS(CCFConfig, network);
Original file line number	Diff line number	Diff line change
`@@ -120,6 +120,7 @@ namespace ccf`
`120`	`120`	`struct FilesCleanup`
`121`	`121`	`{`
`122`	`122`	`std::optional<size_t> max_snapshots = std::nullopt;`
	`123`	`+ std::optional<size_t> max_committed_ledger_chunks = std::nullopt;`
`123`	`124`	`ccf::ds::TimeString interval = {"30s"};`
`124`	`125`
`125`	`126`	`bool operator==(const FilesCleanup&) const = default;`