Skip to content

Commit dfad263

Browse files
committed
pool: stop automatic scrubber on repository read-only event
Motivation: The external pool check can indicate that the pool repository should be operated in a read-only mode. Such even can be triggered when under laying filesystem run internal rebuild due to disk failure in a RAID set. To reduce the load on the disk subsystem, it makes sense to disable automatic scrub process. The admin still able do force checksum validation. Modification: Update ChecksumScanner to listen repository fault events and disable scrubber if running. Result: less load on pool's I/O subsystem during disk rebuilds. ``` 18 Mar 2026 14:16:20 (pool_write) [] Read-only file system 18 Mar 2026 14:16:20 (pool_write) [] Pool mode changed to disabled(store,stage,p2p-client): Pool read-only: I/O test failed, READ_ONLY Error 18 Mar 2026 14:16:20 (PoolManager) [pool_write PoolManagerPoolUp] Pool pool_write changed from mode enabled to disabled(store,stage,p2p-client). 18 Mar 2026 14:16:20 (pool_write) [] Scrubber is stopping due to repository fault event: I/O test failed, READ_ONLY Error ``` Acked-by: Dmitry Litvintsev Target: master Require-book: no Require-notes: yes
1 parent c670b57 commit dfad263

1 file changed

Lines changed: 16 additions & 1 deletion

File tree

modules/dcache/src/main/java/org/dcache/pool/classic/ChecksumScanner.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@
3838
import java.util.concurrent.TimeUnit;
3939
import org.dcache.alarms.AlarmMarkerFactory;
4040
import org.dcache.alarms.PredefinedAlarm;
41+
import org.dcache.pool.FaultAction;
42+
import org.dcache.pool.FaultEvent;
43+
import org.dcache.pool.FaultListener;
4144
import org.dcache.pool.repository.ReplicaDescriptor;
4245
import org.dcache.pool.repository.ReplicaState;
4346
import org.dcache.pool.repository.Repository;
@@ -47,7 +50,7 @@
4750
import org.slf4j.LoggerFactory;
4851

4952
public class ChecksumScanner
50-
implements CellCommandListener, CellLifeCycleAware, CellSetupProvider, CellInfoProvider {
53+
implements CellCommandListener, CellLifeCycleAware, CellSetupProvider, CellInfoProvider, FaultListener {
5154

5255
private static final Logger LOGGER =
5356
LoggerFactory.getLogger(ChecksumScanner.class);
@@ -93,6 +96,7 @@ private void stopScrubber() {
9396

9497
public void setRepository(Repository repository) {
9598
_repository = repository;
99+
_repository.addFaultListener(this);
96100
}
97101

98102
public void setChecksumModule(ChecksumModuleV1 csm) {
@@ -107,6 +111,17 @@ public void setPoolName(String poolName) {
107111
this.poolName = poolName;
108112
}
109113

114+
@Override
115+
public void faultOccurred(FaultEvent event) {
116+
// If the fault is not a log-only event, stop the scrubber to avoid
117+
// further stressing the underlying storage. The scrubber will be restarted
118+
// when the pool recovers.
119+
if (event.getAction() != FaultAction.LOG && _scrubber.isActive()) {
120+
LOGGER.warn("Scrubber is stopping due to repository fault event: {}", event.getMessage());
121+
stopScrubber();
122+
}
123+
}
124+
110125
private class FullScan extends Singleton {
111126

112127
private volatile int _totalCount;

0 commit comments

Comments
 (0)