From a9102dd3d824738772b7f15c3446a0ba34c67840 Mon Sep 17 00:00:00 2001 From: Maksim Sukharev Date: Fri, 22 May 2026 17:55:18 +0200 Subject: [PATCH 1/4] fix(AmazonS3#getDirectoryMetaData): map root path to filecache key normalizePath('') returns '.' for S3 object keys, but the filecache stores the storage root under the key ''. getDirectoryMetaData() was calling getCache()->get('.') which looks up by md5('.'), never matching the root entry stored at md5(''). The cache miss caused getDirectoryMetaData to return synthetic data with time() as mtime/storage_mtime and uniqid() as etag on every call. The scanner then saw a storage_mtime mismatch and wrote the fabricated timestamps back to the cache on every occ files:scan run, regardless of whether any S3 content had changed. Introduce getCachePath() to translate the normalized root path '.' back to '' before any filecache lookup. Signed-off-by: Maksim Sukharev Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../lib/Lib/Storage/AmazonS3.php | 7 ++++- .../tests/Storage/Amazons3Test.php | 27 +++++++++++++++++-- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/apps/files_external/lib/Lib/Storage/AmazonS3.php b/apps/files_external/lib/Lib/Storage/AmazonS3.php index a45019b98f9de..224e2dbfe6873 100644 --- a/apps/files_external/lib/Lib/Storage/AmazonS3.php +++ b/apps/files_external/lib/Lib/Storage/AmazonS3.php @@ -70,6 +70,11 @@ private function isRoot(string $path): bool { return $path === '.'; } + private function getCachePath(string $path): string { + // normalizePath() converts '' to '.' for S3 object keys, but filecache stores the root as '' + return $this->isRoot($path) ? '' : $path; + } + private function cleanKey(string $path): string { if ($this->isRoot($path)) { return '/'; @@ -667,7 +672,7 @@ private function getDirectoryMetaData(string $path): ?array { if ($this->versioningEnabled() && !$this->doesDirectoryExist($path)) { return null; } - $cacheEntry = $this->getCache()->get($path); + $cacheEntry = $this->getCache()->get($this->getCachePath($path)); if ($cacheEntry instanceof CacheEntry) { return $cacheEntry->getData(); } else { diff --git a/apps/files_external/tests/Storage/Amazons3Test.php b/apps/files_external/tests/Storage/Amazons3Test.php index 90e36d5fdbd74..1a7b598c4cbef 100644 --- a/apps/files_external/tests/Storage/Amazons3Test.php +++ b/apps/files_external/tests/Storage/Amazons3Test.php @@ -8,6 +8,7 @@ */ namespace OCA\Files_External\Tests\Storage; +use OC\Files\Cache\Scanner; use OCA\Files_External\Lib\Storage\AmazonS3; /** @@ -38,7 +39,29 @@ protected function tearDown(): void { parent::tearDown(); } - public function testStat(): void { - $this->markTestSkipped('S3 doesn\'t update the parents folder mtime'); + /** + * Regression test for the '.' vs '' root path mismatch in getDirectoryMetaData. + * + * normalizePath('') returns '.' for S3 object keys, but the filecache stores the + * storage root under the key ''. Before the fix, getCache()->get('.') returned false, + * causing getDirectoryMetaData to return a fabricated time() on every call, which + * made getCacheEntry always see a changed storage_mtime and fire propagateChange. + */ + public function testStatRootPreservesStorageMtimeFromCache(): void { + $this->instance->getScanner()->scan('', Scanner::SCAN_SHALLOW); + + $cachedRoot = $this->instance->getCache()->get(''); + $this->assertNotFalse($cachedRoot, 'Root entry must exist in cache after scan'); + + $cachedStorageMtime = $cachedRoot['storage_mtime']; + + $stat = $this->instance->stat(''); + $this->assertNotFalse($stat, 'stat(\'\') must return data'); + $this->assertEquals( + $cachedStorageMtime, + $stat['storage_mtime'], + 'stat(\'\') must return storage_mtime from the cache entry, not a fabricated time()' + ); } + } From 57c47d51bbe1c2c1aaf2aae71ccef5b08c3dc5f1 Mon Sep 17 00:00:00 2001 From: Maksim Sukharev Date: Fri, 22 May 2026 17:56:09 +0200 Subject: [PATCH 2/4] fix(AmazonS3#getMetaData): preserve storage_mtime after parent clobbers it Common::getMetaData (Common.php:667) always sets storage_mtime = mtime before returning. For S3 virtual directories this is wrong: mtime can be bumped by child propagation while storage_mtime should remain the actual last S3 change. When the scanner later calls getMetaData() it compares data['storage_mtime'] against cacheData['storage_mtime']. If they differ it writes the value back, triggering View::getCacheEntry to fire propagateChange on every read even when nothing on S3 changed. Override getMetaData() to restore storage_mtime from the live cache entry (or, for non-root directories not yet in the cache, from the S3 directory marker LastModified header) after the parent call. Signed-off-by: Maksim Sukharev Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../lib/Lib/Storage/AmazonS3.php | 20 ++++++++++++++ .../tests/Storage/Amazons3Test.php | 27 +++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/apps/files_external/lib/Lib/Storage/AmazonS3.php b/apps/files_external/lib/Lib/Storage/AmazonS3.php index 224e2dbfe6873..3d3b1be4e8210 100644 --- a/apps/files_external/lib/Lib/Storage/AmazonS3.php +++ b/apps/files_external/lib/Lib/Storage/AmazonS3.php @@ -331,6 +331,26 @@ public function stat(string $path): array|false { return $stat; } + #[\Override] + public function getMetaData(string $path): ?array { + $data = parent::getMetaData($path); + if ($data !== null && $data['mimetype'] === FileInfo::MIMETYPE_FOLDER) { + // Common::getMetaData sets storage_mtime = mtime, but for S3 virtual directories + // mtime may have been updated by mtime propagation while storage_mtime should + // reflect the actual last storage change. Without this override the scanner sees + // data['storage_mtime'] != cacheData['storage_mtime'] and re-writes the cache, + // causing View::getCacheEntry to trigger propagateChange on every read. + $path = $this->normalizePath($path); + $cacheEntry = $this->getCache()->get($this->getCachePath($path)); + if ($cacheEntry instanceof CacheEntry) { + $data['storage_mtime'] = $cacheEntry->getStorageMTime(); + } elseif (!$this->isRoot($path) && $directoryMarker = $this->headObject($path . '/')) { + $data['storage_mtime'] = strtotime($directoryMarker['LastModified']); + } + } + return $data; + } + #[\Override] public function is_dir(string $path): bool { $path = $this->normalizePath($path); diff --git a/apps/files_external/tests/Storage/Amazons3Test.php b/apps/files_external/tests/Storage/Amazons3Test.php index 1a7b598c4cbef..7a1c86aabf556 100644 --- a/apps/files_external/tests/Storage/Amazons3Test.php +++ b/apps/files_external/tests/Storage/Amazons3Test.php @@ -64,4 +64,31 @@ public function testStatRootPreservesStorageMtimeFromCache(): void { ); } + /** + * Regression test: Common::getMetaData sets storage_mtime = mtime, but for S3 virtual + * directories mtime may have been bumped by propagation while storage_mtime should stay + * stable. The override restores storage_mtime from the cache entry so the scanner does + * not see a spurious mismatch and re-write the cache on every scan. + */ + public function testGetMetaDataDirectoryPreservesStorageMtimeSeparateFromMtime(): void { + $this->instance->getScanner()->scan('', Scanner::SCAN_SHALLOW); + + $cachedRoot = $this->instance->getCache()->get(''); + $this->assertNotFalse($cachedRoot, 'Root entry must exist in cache after scan'); + + // Simulate propagation bumping mtime without touching storage_mtime + $originalStorageMtime = $cachedRoot['storage_mtime']; + $this->instance->getCache()->update($cachedRoot->getId(), [ + 'mtime' => $originalStorageMtime + 9999, + ]); + + $meta = $this->instance->getMetaData(''); + $this->assertNotNull($meta, 'getMetaData(\'\') must return data'); + $this->assertEquals( + $originalStorageMtime, + $meta['storage_mtime'], + 'getMetaData must return storage_mtime from cache, not the propagated mtime' + ); + } + } From 82c63a399f3cb1c927192b6a49a79b5565c81cca Mon Sep 17 00:00:00 2001 From: Maksim Sukharev Date: Fri, 22 May 2026 17:56:22 +0200 Subject: [PATCH 3/4] fix(View#getCacheEntry): skip propagation when storage metadata is unchanged Storage backends like AmazonS3 whose hasUpdated() always returns true (S3 has no cheap global change-detection mechanism) caused propagateChange() to be called on every watcher->update() after a folder was browsed, even when the underlying storage_mtime and etag were identical to the cached values. Before the fix, getCacheEntry() would call watcher->update() and then unconditionally call propagateChange() whenever the watcher reported a change. For S3 directories this meant every read bumped the parent mtime chain. After the fix, getCacheEntry() snapshots the cache entry before watcher->update() and compares it with the entry after. propagateChange() is only invoked when at least one metadata field (mtime, storage_mtime, size, or etag) actually changed. Signed-off-by: Maksim Sukharev Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- lib/private/Files/View.php | 10 +++++++- tests/lib/Files/ViewTest.php | 50 ++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/lib/private/Files/View.php b/lib/private/Files/View.php index da3900bff4a66..95c6cf602905d 100644 --- a/lib/private/Files/View.php +++ b/lib/private/Files/View.php @@ -1410,9 +1410,17 @@ private function getCacheEntry($storage, $internalPath, $relativePath) { $data = $cache->get($internalPath); } elseif (!Scanner::isPartialFile($internalPath) && $watcher->needsUpdate($internalPath, $data)) { $this->lockFile($relativePath, ILockingProvider::LOCK_SHARED); + $cacheDataBefore = $data instanceof CacheEntry ? $data->getData() : false; $watcher->update($internalPath, $data); - $storage->getPropagator()->propagateChange($internalPath, time()); $data = $cache->get($internalPath); + $cacheDataAfter = $data instanceof CacheEntry ? $data->getData() : false; + + // Only propagate mtime change to parent folders if the scanner actually changed the cached metadata, + // to avoid updating folder mtimes on every read for backends that conservatively report directories as updated (e.g. S3) + if ($cacheDataAfter !== $cacheDataBefore) { + $storage->getPropagator()->propagateChange($internalPath, time()); + $data = $cache->get($internalPath); + } $this->unlockFile($relativePath, ILockingProvider::LOCK_SHARED); } } catch (LockedException $e) { diff --git a/tests/lib/Files/ViewTest.php b/tests/lib/Files/ViewTest.php index feb7800c12845..0b8a5dcb1a288 100644 --- a/tests/lib/Files/ViewTest.php +++ b/tests/lib/Files/ViewTest.php @@ -80,6 +80,16 @@ public function instanceOfStorage(string $class): bool { } } +/** + * Storage that always reports hasUpdated() = true for any path, simulating the + * behavior of Amazon S3 external storage (which has no real directory objects). + */ +class TemporaryAlwaysUpdated extends Temporary { + public function hasUpdated(string $path, int $time): bool { + return true; + } +} + class TestEventHandler { public function umount() { } @@ -456,6 +466,46 @@ public function testWatcher(): void { $this->assertEquals(3, $cachedData['size']); } + /** + * Regression test for View::getCacheEntry unconditionally calling propagateChange + * when the watcher reports needsUpdate = true, regardless of whether the scanner + * found any actual storage change. + * + * Backends like Amazon S3 always return hasUpdated() = true for directory paths + * because S3 has no real directory objects. Before the fix, every getFileInfo() + * call on a folder fired propagateChange(path, time()), stamping all ancestor + * folders in the filecache with the current request timestamp. + * + * After the fix, propagateChange is only called when watcher->update() actually + * changes the cached metadata for the path. + */ + public function testWatcherDoesNotPropagateWhenStorageMtimeUnchanged(): void { + $storage = $this->getTestStorage(true, TemporaryAlwaysUpdated::class); + Filesystem::mount($storage, [], '/'); + $storage->getWatcher()->setPolicy(Watcher::CHECK_ALWAYS); + + $rootView = new View(''); + + // Note the root mtime right after the initial scan. + $rootMtimeBefore = $storage->getCache()->get('')['mtime']; + + // Access a subfolder. The watcher will fire (hasUpdated always returns true), + // but the scanner leaves the cached metadata for 'folder' unchanged. + // getCacheEntry must therefore NOT call propagateChange('folder', time()), + // which would update the root entry's mtime to the current timestamp. + $rootView->getFileInfo('folder'); + + // Read the root mtime directly from the cache to avoid triggering another watcher cycle. + $rootMtimeAfter = $storage->getCache()->get('')['mtime']; + + $this->assertEquals( + $rootMtimeBefore, + $rootMtimeAfter, + 'Root folder mtime must not be updated when the watcher fires but cached metadata has not changed' + ); + } + + public function testCopyBetweenStorageNoCross(): void { $storage1 = $this->getTestStorage(true, TemporaryNoCross::class); $storage2 = $this->getTestStorage(true, TemporaryNoCross::class); From 156c0277f88b2109ff864e6bec3282260c8ef490 Mon Sep 17 00:00:00 2001 From: Maksim Sukharev Date: Fri, 22 May 2026 18:08:50 +0200 Subject: [PATCH 4/4] fix(AmazonS3): handle missing LastModified and ETag in S3 responses Add null-safety checks to handle S3 responses that don't include LastModified and ETag fields. This prevents 'Undefined array key' warnings and deprecation notices when processing directory metadata or incomplete S3 responses. - objectToMetaData(): Check if LastModified/ETag exist before accessing - getMetaData(): Check if LastModified exists before using in strtotime() Fixes test failures in testStat where hasUpdated('/', time) would fail when encountering S3 objects without complete metadata. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: Maksim Sukharev --- apps/files_external/lib/Lib/Storage/AmazonS3.php | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/apps/files_external/lib/Lib/Storage/AmazonS3.php b/apps/files_external/lib/Lib/Storage/AmazonS3.php index 3d3b1be4e8210..2699385af4cbf 100644 --- a/apps/files_external/lib/Lib/Storage/AmazonS3.php +++ b/apps/files_external/lib/Lib/Storage/AmazonS3.php @@ -345,7 +345,9 @@ public function getMetaData(string $path): ?array { if ($cacheEntry instanceof CacheEntry) { $data['storage_mtime'] = $cacheEntry->getStorageMTime(); } elseif (!$this->isRoot($path) && $directoryMarker = $this->headObject($path . '/')) { - $data['storage_mtime'] = strtotime($directoryMarker['LastModified']); + if (isset($directoryMarker['LastModified'])) { + $data['storage_mtime'] = strtotime($directoryMarker['LastModified']); + } } } return $data; @@ -674,12 +676,14 @@ public function getDirectoryContent(string $directory): \Traversable { } private function objectToMetaData(array $object): array { + $mtime = isset($object['LastModified']) ? strtotime($object['LastModified']) : time(); + $etag = isset($object['ETag']) ? trim($object['ETag'], '"') : ''; return [ 'name' => basename($object['Key']), 'mimetype' => $this->mimeDetector->detectPath($object['Key']), - 'mtime' => strtotime($object['LastModified']), - 'storage_mtime' => strtotime($object['LastModified']), - 'etag' => trim($object['ETag'], '"'), + 'mtime' => $mtime, + 'storage_mtime' => $mtime, + 'etag' => $etag, 'permissions' => Constants::PERMISSION_ALL - Constants::PERMISSION_CREATE, 'size' => (int)($object['Size'] ?? $object['ContentLength']), ];