Skip to content

Commit 67b58b2

Browse files
authored
fix: add lock on config.yaml to prevent TOCTOU coruption (#50)
* test: reproduce config file corruption from concurrent CLI instances Add race_test.go that demonstrates IIP-20714: multiple concurrent read-modify-write cycles on config.yaml without file locking causes invalid YAML, empty file reads, and silent data loss. * fix: add cross-process file lock to prevent config.yaml corruption Concurrent CLI invocations race on config.yaml through the non-atomic read-modify-write in New() (viper.WriteConfig + addDocCommentsToYAML). Wrap the entire critical section in an exclusive flock via gofrs/flock. * test: tighten readonly_directory assertion to verify lock acquisition path * test: allow TestRaceWorker to run standalone as a config.New() smoke test Falls back to t.TempDir() when RACE_DATA_DIR is not set, so the test no longer skips/fatals when run individually or in CI outside of the race suite.
1 parent 964476e commit 67b58b2

5 files changed

Lines changed: 198 additions & 4 deletions

File tree

go.mod

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ require (
1010
github.com/charmbracelet/huh v0.7.0
1111
github.com/charmbracelet/lipgloss v1.1.0
1212
github.com/go-viper/mapstructure/v2 v2.4.0
13+
github.com/gofrs/flock v0.13.0
1314
github.com/google/uuid v1.6.0
1415
github.com/mattn/go-colorable v0.1.14
1516
github.com/mattn/go-runewidth v0.0.19
@@ -22,7 +23,7 @@ require (
2223
github.com/stretchr/testify v1.11.1
2324
github.com/tidwall/gjson v1.18.0
2425
go.uber.org/mock v0.6.0
25-
golang.org/x/sys v0.36.0
26+
golang.org/x/sys v0.37.0
2627
golang.org/x/term v0.35.0
2728
gopkg.in/yaml.v3 v3.0.1
2829
modernc.org/sqlite v1.39.0

go.sum

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S
6262
github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
6363
github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs=
6464
github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM=
65+
github.com/gofrs/flock v0.13.0 h1:95JolYOvGMqeH31+FC7D2+uULf6mG61mEZ/A8dRYMzw=
66+
github.com/gofrs/flock v0.13.0/go.mod h1:jxeyy9R1auM5S6JYDBhDt+E2TCo7DkratH4Pgi8P+Z0=
6567
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
6668
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
6769
github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs=
@@ -170,8 +172,8 @@ golang.org/x/sys v0.0.0-20211110154304-99a53858aa08/go.mod h1:oPkhp1MJrh7nUepCBc
170172
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
171173
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
172174
golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
173-
golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k=
174-
golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
175+
golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
176+
golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
175177
golang.org/x/term v0.14.0/go.mod h1:TySc+nGkYR6qt8km8wUhuFRTVSMIX3XPR58y2lC8vww=
176178
golang.org/x/term v0.35.0 h1:bZBVKBudEyhRcajGcNc3jIfWPqV4y/Kt2XcoigOWtDQ=
177179
golang.org/x/term v0.35.0/go.mod h1:TPGtkTLesOwf2DE8CgVYiZinHAOuy5AYUYT1lENIZnA=

internal/config/config.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"time"
99

1010
"github.com/go-viper/mapstructure/v2"
11+
"github.com/gofrs/flock"
1112
"github.com/spf13/pflag"
1213
"github.com/spf13/viper"
1314

@@ -68,6 +69,12 @@ func New(dataDir string) (*Config, cenclierrors.CencliError) {
6869

6970
configPath := filepath.Join(dataDir, "config.yaml")
7071

72+
fileLock := flock.New(configPath + ".lock")
73+
if err := fileLock.Lock(); err != nil {
74+
return nil, newInvalidConfigError(fmt.Errorf("failed to acquire config lock: %w", err).Error())
75+
}
76+
defer func() { _ = fileLock.Unlock() }()
77+
7178
if err := viper.ReadInConfig(); err != nil {
7279
var configFileNotFoundError viper.ConfigFileNotFoundError
7380
if !errors.As(err, &configFileNotFoundError) {

internal/config/config_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ func TestConfigWriteErrors(t *testing.T) {
346346
_, err = New(tempDir)
347347
var invalidConfigErr InvalidConfigError
348348
assert.ErrorAs(t, err, &invalidConfigErr)
349-
assert.Contains(t, err.Error(), "failed to write config file")
349+
assert.Contains(t, err.Error(), "failed to acquire config lock")
350350
})
351351
}
352352

internal/config/race_test.go

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
package config
2+
3+
import (
4+
"errors"
5+
"fmt"
6+
"os"
7+
"os/exec"
8+
"path/filepath"
9+
"strings"
10+
"sync"
11+
"testing"
12+
13+
"gopkg.in/yaml.v3"
14+
)
15+
16+
// TestRaceConditionEndToEnd reproduces the real file-level race condition that
17+
// occurs when multiple cencli processes initialize config concurrently against
18+
// the same data directory. Each subprocess is a real OS process with its own
19+
// viper instance — just like production. The race is in the non-atomic
20+
// read-modify-write of config.yaml (viper.WriteConfig + addDocCommentsToYAML).
21+
//
22+
// Run with: go test -run TestRaceConditionEndToEnd -count=1 -v ./internal/config/
23+
func TestRaceConditionEndToEnd(t *testing.T) {
24+
const processes = 15
25+
26+
dataDir := t.TempDir()
27+
if err := os.MkdirAll(filepath.Join(dataDir, "templates"), 0o755); err != nil {
28+
t.Fatal(err)
29+
}
30+
31+
configPath := filepath.Join(dataDir, "config.yaml")
32+
33+
type procResult struct {
34+
id int
35+
exitCode int
36+
output string
37+
err error
38+
}
39+
40+
var (
41+
wg sync.WaitGroup
42+
mu sync.Mutex
43+
results []procResult
44+
)
45+
46+
// All processes start as close together as possible.
47+
start := make(chan struct{})
48+
49+
for i := 0; i < processes; i++ {
50+
wg.Add(1)
51+
go func(id int) {
52+
defer wg.Done()
53+
<-start
54+
55+
cmd := exec.Command(
56+
os.Args[0],
57+
"-test.run=^TestRaceWorker$",
58+
"-test.v",
59+
)
60+
cmd.Env = append(os.Environ(),
61+
"RACE_WORKER=1",
62+
fmt.Sprintf("RACE_DATA_DIR=%s", dataDir),
63+
)
64+
65+
out, err := cmd.CombinedOutput()
66+
67+
exitCode := 0
68+
if err != nil {
69+
var ee *exec.ExitError
70+
if errors.As(err, &ee) {
71+
exitCode = ee.ExitCode()
72+
} else {
73+
exitCode = -1
74+
}
75+
}
76+
77+
mu.Lock()
78+
results = append(results, procResult{
79+
id: id,
80+
exitCode: exitCode,
81+
output: string(out),
82+
err: err,
83+
})
84+
mu.Unlock()
85+
}(i)
86+
}
87+
88+
close(start)
89+
wg.Wait()
90+
91+
// Tally process-level failures.
92+
var processErrors int
93+
for _, r := range results {
94+
if r.exitCode != 0 {
95+
processErrors++
96+
t.Logf("process %d exited %d:\n%s", r.id, r.exitCode, r.output)
97+
}
98+
}
99+
100+
// Check the final state of config.yaml — the file all processes raced on.
101+
finalRaw, err := os.ReadFile(configPath)
102+
if err != nil {
103+
t.Fatalf("cannot read final config.yaml: %v", err)
104+
}
105+
106+
var (
107+
fileEmpty bool
108+
fileCorrupt bool
109+
yamlErr string
110+
)
111+
112+
if len(finalRaw) == 0 {
113+
fileEmpty = true
114+
} else {
115+
var parsed map[string]interface{}
116+
if err := yaml.Unmarshal(finalRaw, &parsed); err != nil {
117+
fileCorrupt = true
118+
yamlErr = err.Error()
119+
}
120+
}
121+
122+
t.Logf("--- Race Condition Results ---")
123+
t.Logf(" Processes launched: %d", processes)
124+
t.Logf(" Process failures: %d", processErrors)
125+
t.Logf(" Final file empty: %v", fileEmpty)
126+
t.Logf(" Final file corrupt: %v", fileCorrupt)
127+
if fileCorrupt {
128+
t.Logf(" YAML error: %s", yamlErr)
129+
t.Logf(" File content:\n%s", finalRaw)
130+
}
131+
132+
if processErrors > 0 || fileEmpty || fileCorrupt {
133+
t.Errorf("Race condition reproduced: processes_failed=%d file_empty=%v file_corrupt=%v\n"+
134+
"Multiple processes doing read-modify-write on config.yaml without file locking\n"+
135+
"causes corruption visible to concurrent or subsequent CLI invocations.",
136+
processErrors, fileEmpty, fileCorrupt)
137+
}
138+
}
139+
140+
// TestRaceWorker verifies that config.New() produces a valid, non-corrupt
141+
// config file. When spawned as a subprocess by TestRaceConditionEndToEnd
142+
// (RACE_WORKER=1, RACE_DATA_DIR set), it operates against the shared data
143+
// directory to exercise the file-lock under contention. When run standalone
144+
// it uses its own temp directory as a basic config.New() smoke test.
145+
func TestRaceWorker(t *testing.T) {
146+
dataDir := os.Getenv("RACE_DATA_DIR")
147+
if dataDir == "" {
148+
dataDir = t.TempDir()
149+
}
150+
151+
cfg, cErr := New(dataDir)
152+
if cErr != nil {
153+
t.Fatalf("New() failed: %v", cErr)
154+
}
155+
156+
// Verify the returned config is sane.
157+
if cfg.OutputFormat == "" {
158+
t.Error("config has empty output-format")
159+
}
160+
161+
// Verify the file on disk is valid YAML right after our write.
162+
configPath := filepath.Join(dataDir, "config.yaml")
163+
raw, err := os.ReadFile(configPath)
164+
if err != nil {
165+
t.Fatalf("cannot read config.yaml after New(): %v", err)
166+
}
167+
if len(raw) == 0 {
168+
t.Fatal("config.yaml is empty immediately after New()")
169+
}
170+
171+
var parsed map[string]interface{}
172+
if err := yaml.Unmarshal(raw, &parsed); err != nil {
173+
t.Fatalf("config.yaml is corrupted after New(): %v", err)
174+
}
175+
176+
// Check for partial writes — key fields should be present.
177+
requiredKeys := []string{"output-format", "streaming", "timeouts", "retry-strategy"}
178+
content := string(raw)
179+
for _, key := range requiredKeys {
180+
if !strings.Contains(content, key+":") {
181+
t.Errorf("config.yaml missing expected key %q — possible truncated write", key)
182+
}
183+
}
184+
}

0 commit comments

Comments
 (0)