Skip to content

Commit 26d7096

Browse files
authored
Merge pull request #529 from docker/fix/posixage-flock-stale-recovery-race
fix(posixage/flock): close ghost-lock race via inode verify + heartbeat
2 parents 82c7740 + 5048a3d commit 26d7096

7 files changed

Lines changed: 453 additions & 99 deletions

File tree

store/posixage/internal/flock/flock.go

Lines changed: 239 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -17,24 +17,77 @@ package flock
1717
import (
1818
"context"
1919
"errors"
20+
"io/fs"
2021
"os"
2122
"sync"
2223
"time"
2324

2425
"github.com/cenkalti/backoff/v5"
26+
27+
"github.com/docker/secrets-engine/x/logging"
2528
)
2629

30+
// noopLogger discards every record. It is the [logging.Logger]
31+
// implementation used when no logger is provided in the context passed
32+
// to [TryLock] or [TryRLock], so the package never writes to the
33+
// caller's stderr unsolicited.
34+
type noopLogger struct{}
35+
36+
func (noopLogger) Printf(string, ...any) {}
37+
func (noopLogger) Warnf(string, ...any) {}
38+
func (noopLogger) Errorf(string, ...any) {}
39+
40+
// loggerFromCtx returns the [logging.Logger] stored on ctx by
41+
// [logging.WithLogger], or a [noopLogger] when none is set. The library
42+
// must never log to a default sink — silence is the safe default for a
43+
// dependency.
44+
func loggerFromCtx(ctx context.Context) logging.Logger {
45+
if l, err := logging.FromContext(ctx); err == nil {
46+
return l
47+
}
48+
return noopLogger{}
49+
}
50+
2751
var (
2852
ErrLockUnsuccessful = errors.New("store is locked")
2953
ErrUnlockUnsuccessful = errors.New("could not unlock store")
54+
55+
// errStaleInode indicates that the file we flocked is no longer the
56+
// file at the lock-file path. This happens when another caller's
57+
// stale-recovery unlinked the file between our open and our flock.
58+
// Locking an unlinked inode would leave us holding a "ghost" lock
59+
// that no other caller can observe.
60+
errStaleInode = errors.New("lock file inode changed under us")
3061
)
3162

3263
const (
3364
lockFileName = ".posixage.lock"
3465
)
3566

36-
// UnlockFunc is the callback function returned by [TryLock] and [TryRLock]
37-
// it should always be called inside a defer.
67+
// heartbeatInterval is how often the [heartbeat] goroutine refreshes the
68+
// lock file's modtime while a caller holds the lock. It must be well
69+
// below [staleThreshold] so that a holder which misses a tick or two
70+
// (GC pause, brief scheduler starvation) still appears live to concurrent
71+
// recovery callers.
72+
//
73+
// Exposed as a var rather than a const so tests can shorten it.
74+
var heartbeatInterval = 10 * time.Second
75+
76+
// UnlockFunc is the callback returned by [TryLock] and [TryRLock]. It
77+
// releases the advisory lock, closes the underlying file descriptor, and
78+
// stops the background heartbeat goroutine that refreshes the lock
79+
// file's modtime.
80+
//
81+
// Callers MUST invoke this function exactly once, typically via defer
82+
// immediately after a successful lock acquisition. Failing to call it
83+
// leaks both the file descriptor and the heartbeat goroutine for the
84+
// remaining lifetime of the process — the goroutine has no other
85+
// termination signal. Calling it more than once is safe and idempotent;
86+
// only the first call performs the release.
87+
//
88+
// The returned error reflects the unlock/close step only. The heartbeat
89+
// goroutine is always stopped and joined before the unlock is attempted,
90+
// so the file descriptor is never touched after it has been closed.
3891
type UnlockFunc func() error
3992

4093
// openFile is a helper function for internal use by [tryLock]
@@ -49,88 +102,171 @@ func openFile(root *os.Root) (*os.File, error) {
49102
return fl, nil
50103
}
51104

52-
func tryLock(ctx context.Context, root *os.Root, exclusive bool) (UnlockFunc, error) {
53-
var (
54-
err error
55-
fl *os.File
56-
)
57-
58-
defer func() {
59-
// we must always close the file on any error returned
60-
if err != nil && fl != nil {
61-
_ = fl.Close()
62-
}
63-
}()
105+
// acquireOnce performs a single lock acquisition attempt and verifies the
106+
// resulting lock is on the file currently at the lock-file path.
107+
//
108+
// The sequence is open -> flock -> truncate -> compare-inodes. If any step
109+
// fails the function releases the flock (when held) and closes the fd
110+
// before returning. The returned [os.File] is the locked descriptor; the
111+
// caller is responsible for unlocking and closing it.
112+
//
113+
// The inode check is what prevents the "ghost lock" race: when a
114+
// concurrent stale-recovery unlinks the file between our [openFile] and
115+
// our [lockFile] call, [lockFile] will succeed on the unlinked inode but
116+
// the path will resolve to a brand-new inode. Treating that as a failure
117+
// forces the caller to drop the bad lock and try again with a fresh fd.
118+
func acquireOnce(root *os.Root, exclusive bool) (*os.File, error) {
119+
fl, err := openFile(root)
120+
if err != nil {
121+
return nil, err
122+
}
64123

65-
fl, err = openFile(root)
124+
if err := lockFile(fl.Fd(), exclusive); err != nil {
125+
_ = fl.Close()
126+
return nil, err
127+
}
128+
129+
// Truncate first so the modtime refresh is visible to any concurrent
130+
// recovery caller before we check the inode. Doing it the other way
131+
// round leaves a window where a passing inode check is followed by an
132+
// unlink before truncate, and the caller walks away with a lock on an
133+
// already-orphaned inode.
134+
_ = fl.Truncate(0)
135+
136+
same, err := isCurrentLockFile(fl, root)
66137
if err != nil {
138+
_ = releaseLock(fl)
139+
_ = fl.Close()
67140
return nil, err
68141
}
142+
if !same {
143+
_ = releaseLock(fl)
144+
_ = fl.Close()
145+
return nil, errStaleInode
146+
}
147+
return fl, nil
148+
}
69149

70-
if err = lockFile(fl.Fd(), exclusive); err == nil {
71-
// truncate to update the modtime to signal to other processes that the
72-
// current lock is valid so they don't attempt a recovery on it.
73-
_ = fl.Truncate(0)
74-
return sync.OnceValue(func() error {
75-
return unlockFile(fl)
76-
}), nil
150+
// isCurrentLockFile reports whether the locked descriptor [fl] still refers
151+
// to the file at the lock-file path. It returns false when the path no
152+
// longer exists or has been replaced by a different inode.
153+
func isCurrentLockFile(fl *os.File, root *os.Root) (bool, error) {
154+
fdInfo, err := fl.Stat()
155+
if err != nil {
156+
return false, err
157+
}
158+
pathInfo, err := root.Stat(lockFileName)
159+
if err != nil {
160+
if errors.Is(err, fs.ErrNotExist) {
161+
return false, nil
162+
}
163+
return false, err
77164
}
78-
err = errors.Join(ErrLockUnsuccessful, err)
165+
return os.SameFile(fdInfo, pathInfo), nil
166+
}
79167

80-
if ctx.Err() != nil {
81-
return nil, errors.Join(err, ctx.Err())
168+
func tryLock(ctx context.Context, root *os.Root, exclusive bool) (UnlockFunc, error) {
169+
logger := loggerFromCtx(ctx)
170+
fl, err := acquireOnce(root, exclusive)
171+
if err == nil {
172+
return startHeartbeat(fl, root, logger), nil
82173
}
174+
firstErr := errors.Join(ErrLockUnsuccessful, err)
83175

84-
// recoverStaleLock always closes fl; clear our reference so the deferred
85-
// close-on-error cleanup does not double-close it.
86-
recoverErr := recoverStaleLock(root, fl)
87-
fl = nil
88-
if recoverErr != nil && !errors.Is(recoverErr, errRecoverLock) {
89-
return nil, errors.Join(err, recoverErr)
176+
if ctxErr := ctx.Err(); ctxErr != nil {
177+
return nil, errors.Join(firstErr, ctxErr)
90178
}
91179

92-
fl, err = openFile(root)
93-
if err != nil {
94-
return nil, err
180+
if recoverErr := recoverStaleLock(root); recoverErr != nil && !errors.Is(recoverErr, errRecoverLock) {
181+
return nil, errors.Join(firstErr, recoverErr)
95182
}
96183

97-
err = retryLock(ctx, fl, exclusive)
184+
fl, err = retryLock(ctx, root, exclusive)
98185
if err != nil {
99186
return nil, err
100187
}
188+
return startHeartbeat(fl, root, logger), nil
189+
}
101190

191+
// startHeartbeat launches the modtime-refresh goroutine for a locked file
192+
// and returns an [UnlockFunc] that stops the goroutine, waits for it to
193+
// exit, and then unlocks/closes the file. The wait ensures the goroutine
194+
// never touches the fd after [unlockFile] closes it.
195+
//
196+
// The supplied logger is used by the goroutine to surface truncate
197+
// failures and inode-mismatch hijacks. A [noopLogger] is acceptable when
198+
// the caller has no logging plumbed.
199+
func startHeartbeat(fl *os.File, root *os.Root, logger logging.Logger) UnlockFunc {
200+
hbCtx, stop := context.WithCancel(context.Background())
201+
done := make(chan struct{})
202+
go func() {
203+
defer close(done)
204+
heartbeat(hbCtx, fl, root, logger)
205+
}()
102206
return sync.OnceValue(func() error {
207+
stop()
208+
<-done
103209
return unlockFile(fl)
104-
}), nil
210+
})
105211
}
106212

107-
// retryLock attempts to acquire an advisory lock on the given file
108-
// using flock, retrying until the context is canceled or the lock is acquired.
213+
// heartbeat re-truncates the locked file every [heartbeatInterval] so its
214+
// modtime stays younger than [staleThreshold] for the lifetime of the
215+
// lock. Without this, a holder doing work that exceeds [staleThreshold]
216+
// would be misclassified as stale by concurrent callers, which would
217+
// unlink the lock file and let a fresh inode be created at the same path
218+
// — the holder-side half of the ghost-lock race.
109219
//
110-
// Retries use exponential backoff with a maximum delay of 100ms
111-
// between attempts.
220+
// Each tick also re-verifies the locked descriptor still refers to the
221+
// file at the lock-file path. A mismatch means we have been hijacked
222+
// (heartbeat starved past [staleThreshold] long enough for recovery to
223+
// fire). There is no in-band way to fail the caller's outstanding
224+
// operation, so the mismatch is logged via the supplied [logging.Logger]
225+
// and the goroutine keeps running — surfacing the hijack is the best we
226+
// can do.
112227
//
113-
// Set exclusive to true for write or delete operations to prevent
114-
// concurrent reads.
115-
func retryLock(ctx context.Context, f *os.File, exclusive bool) error {
228+
// The goroutine returns when ctx is canceled by [startHeartbeat]'s
229+
// returned [UnlockFunc].
230+
func heartbeat(ctx context.Context, fl *os.File, root *os.Root, logger logging.Logger) {
231+
ticker := time.NewTicker(heartbeatInterval)
232+
defer ticker.Stop()
233+
for {
234+
select {
235+
case <-ctx.Done():
236+
return
237+
case <-ticker.C:
238+
if err := fl.Truncate(0); err != nil {
239+
logger.Warnf("flock heartbeat: truncate failed: %v", err)
240+
continue
241+
}
242+
same, err := isCurrentLockFile(fl, root)
243+
if err != nil {
244+
logger.Warnf("flock heartbeat: inode verify failed: %v", err)
245+
continue
246+
}
247+
if !same {
248+
logger.Warnf("flock heartbeat: lock file inode changed under us; lock has likely been hijacked")
249+
}
250+
}
251+
}
252+
}
253+
254+
// retryLock loops [acquireOnce] with exponential backoff until ctx is
255+
// canceled or a verified lock is obtained. Each iteration opens a fresh
256+
// fd, so a [errStaleInode] result simply causes the next attempt to start
257+
// over against whatever file is currently at the path.
258+
func retryLock(ctx context.Context, root *os.Root, exclusive bool) (*os.File, error) {
116259
ep := backoff.NewExponentialBackOff()
117260
ep.InitialInterval = time.Millisecond * 10
118261
ep.MaxInterval = time.Millisecond * 100
119-
_, err := backoff.Retry(ctx, func() (bool, error) {
120-
if err := lockFile(f.Fd(), exclusive); err != nil {
121-
return false, err
122-
}
123-
return true, nil
262+
263+
fl, err := backoff.Retry(ctx, func() (*os.File, error) {
264+
return acquireOnce(root, exclusive)
124265
}, backoff.WithBackOff(ep), backoff.WithMaxElapsedTime(0))
125266
if err != nil {
126-
return errors.Join(ErrLockUnsuccessful, err)
267+
return nil, errors.Join(ErrLockUnsuccessful, err)
127268
}
128-
129-
// truncate to update the modtime to signal to other processes that the
130-
// current lock is valid so they don't attempt a recovery on it.
131-
_ = f.Truncate(0)
132-
133-
return nil
269+
return fl, nil
134270
}
135271

136272
// TryLock acquires an exclusive advisory lock on a lock file.
@@ -140,11 +276,31 @@ func retryLock(ctx context.Context, f *os.File, exclusive bool) error {
140276
// lock is acquired.
141277
//
142278
// As a safeguard, the function attempts to recover from stale locks,
143-
// defined as lock files older than 30s. Stale lock recovery is skipped when
144-
// ctx has been canceled. If recovery fails, manual intervention may be
145-
// required.
279+
// defined as lock files older than 30s. While the lock is held, a
280+
// background goroutine refreshes the lock file's modtime every 10s so
281+
// long-running operations are not misclassified as stale. Stale lock
282+
// recovery is skipped when ctx has been canceled. If recovery fails,
283+
// manual intervention may be required.
146284
//
147-
// It returns an unlock function that must be called to release the lock.
285+
// The heartbeat goroutine surfaces truncate failures and hijack
286+
// detections through the [logging.Logger] stored on ctx via
287+
// [logging.WithLogger]. When no logger is set, those events are dropped
288+
// silently — the package never writes to a default sink.
289+
//
290+
// On success, the returned [UnlockFunc] MUST be called exactly once to
291+
// release the lock, close the file descriptor, and stop the heartbeat
292+
// goroutine. The idiomatic pattern is to defer it immediately:
293+
//
294+
// ctx = logging.WithLogger(ctx, myLogger) // optional
295+
// unlock, err := flock.TryLock(ctx, root)
296+
// if err != nil {
297+
// return err
298+
// }
299+
// defer unlock()
300+
//
301+
// Failing to call the returned function leaks both the file descriptor
302+
// and the heartbeat goroutine for the remaining lifetime of the process.
303+
// See [UnlockFunc] for details.
148304
func TryLock(ctx context.Context, root *os.Root) (UnlockFunc, error) {
149305
return tryLock(ctx, root, true)
150306
}
@@ -156,11 +312,31 @@ func TryLock(ctx context.Context, root *os.Root) (UnlockFunc, error) {
156312
// lock is acquired.
157313
//
158314
// As a safeguard, the function attempts to recover from stale locks,
159-
// defined as lock files older than 30s. Stale lock recovery is skipped when
160-
// ctx has been canceled. If recovery fails, manual intervention may be
161-
// required.
315+
// defined as lock files older than 30s. While the lock is held, a
316+
// background goroutine refreshes the lock file's modtime every 10s so
317+
// long-running operations are not misclassified as stale. Stale lock
318+
// recovery is skipped when ctx has been canceled. If recovery fails,
319+
// manual intervention may be required.
320+
//
321+
// The heartbeat goroutine surfaces truncate failures and hijack
322+
// detections through the [logging.Logger] stored on ctx via
323+
// [logging.WithLogger]. When no logger is set, those events are dropped
324+
// silently — the package never writes to a default sink.
325+
//
326+
// On success, the returned [UnlockFunc] MUST be called exactly once to
327+
// release the lock, close the file descriptor, and stop the heartbeat
328+
// goroutine. The idiomatic pattern is to defer it immediately:
329+
//
330+
// ctx = logging.WithLogger(ctx, myLogger) // optional
331+
// unlock, err := flock.TryRLock(ctx, root)
332+
// if err != nil {
333+
// return err
334+
// }
335+
// defer unlock()
162336
//
163-
// It returns an unlock function that must be called to release the lock.
337+
// Failing to call the returned function leaks both the file descriptor
338+
// and the heartbeat goroutine for the remaining lifetime of the process.
339+
// See [UnlockFunc] for details.
164340
func TryRLock(ctx context.Context, root *os.Root) (UnlockFunc, error) {
165341
return tryLock(ctx, root, false)
166342
}

0 commit comments

Comments
 (0)