From ca1b4cf476bda92799e0a01d8fcdb5415f6b9ac6 Mon Sep 17 00:00:00 2001 From: say Date: Mon, 11 May 2026 23:24:55 -0700 Subject: [PATCH 1/2] fix(filelocker): remove stale .stop file when Lock times out MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When Lock() creates a .stop file to signal release to the current holder and then times out without acquiring the lock, the .stop file was previously only removed by a successful Unlock() — which is never called for a failed acquisition. The leftover file would persist on disk until the next successful Unlock() of the same upload ID. On filesystems where flock() can return ErrBusy spuriously (notably SMB/CIFS), this caused failed acquisitions to leave permanent stale state that broke all subsequent retries. Clean up the .stop file directly in the ctx.Done() branch. Fixes #1372. --- pkg/filelocker/filelocker.go | 6 ++++- pkg/filelocker/filelocker_test.go | 38 +++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/pkg/filelocker/filelocker.go b/pkg/filelocker/filelocker.go index 13e9005e8..2263d5281 100644 --- a/pkg/filelocker/filelocker.go +++ b/pkg/filelocker/filelocker.go @@ -132,7 +132,11 @@ func (lock fileUploadLock) Lock(ctx context.Context, requestRelease func()) erro select { case <-ctx.Done(): - // Context expired, so we return a timeout + // Context expired, so we return a timeout. Since the lock was never + // successfully acquired, Unlock() will not be called and the .stop + // file would otherwise remain on disk indefinitely. Remove it here + // so future acquisition attempts are not affected by stale state. + _ = os.Remove(lock.requestReleaseFile) return handler.ErrLockTimeout case <-time.After(lock.acquirerPollInterval): // Continue with the next attempt after a short delay diff --git a/pkg/filelocker/filelocker_test.go b/pkg/filelocker/filelocker_test.go index f561d65a5..47ce7d89d 100644 --- a/pkg/filelocker/filelocker_test.go +++ b/pkg/filelocker/filelocker_test.go @@ -60,6 +60,44 @@ func TestFileLocker_Timeout(t *testing.T) { assertEmptyDirectory(dir, a) } +func TestFileLocker_Timeout_StopFileCleanedUp(t *testing.T) { + // Regression test: when Lock() times out without ever successfully + // acquiring the lock, the .stop file it created to signal the holder + // must be cleaned up. Otherwise the .stop file remains on disk + // indefinitely, which is especially problematic on filesystems where + // flock() can return ErrBusy spuriously (e.g. SMB/CIFS) — subsequent + // retries observe stale state and fail deterministically. + a := assert.New(t) + + dir, err := os.MkdirTemp("", "tusd-file-locker") + a.NoError(err) + + locker := New(dir) + locker.AcquirerPollInterval = 10 * time.Millisecond + + lock1, err := locker.NewLock("one") + a.NoError(err) + a.NoError(lock1.Lock(context.Background(), func() {})) + + ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond) + defer cancel() + + lock2, err := locker.NewLock("one") + a.NoError(err) + err = lock2.Lock(ctx, func() { + panic("must not be called") + }) + a.Equal(err, handler.ErrLockTimeout) + + // The .stop file created by lock2's contended acquisition must not + // linger on disk after the acquisition failed. + _, err = os.Stat(dir + "/one.stop") + a.True(os.IsNotExist(err), "stop file should have been removed after ErrLockTimeout, got err=%v", err) + + a.NoError(lock1.Unlock()) + assertEmptyDirectory(dir, a) +} + func TestFileLocker_RequestUnlock(t *testing.T) { a := assert.New(t) From 95c461e199dac9af80632b1ba83c071ea5b29ebf Mon Sep 17 00:00:00 2001 From: Sai Asish Y Date: Tue, 12 May 2026 16:35:19 -0700 Subject: [PATCH 2/2] fix(filelocker): close .stop file handle before removal for Windows --- pkg/filelocker/filelocker.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/filelocker/filelocker.go b/pkg/filelocker/filelocker.go index 2263d5281..944571178 100644 --- a/pkg/filelocker/filelocker.go +++ b/pkg/filelocker/filelocker.go @@ -124,11 +124,13 @@ func (lock fileUploadLock) Lock(ctx context.Context, requestRelease func()) erro // If we are here, the lock is already held by another entity. // We create the .stop file to signal the lock holder to release the lock. + // The handle is closed right away: the holder only checks the file's + // existence, and an open handle would block removal on Windows. file, err := os.Create(lock.requestReleaseFile) if err != nil { return err } - defer file.Close() + file.Close() select { case <-ctx.Done():