Skip to content

Commit c41a0e0

Browse files
committed
test: reproduce config file corruption from concurrent CLI instances
Add race_test.go that demonstrates IIP-20714: multiple concurrent read-modify-write cycles on config.yaml without file locking causes invalid YAML, empty file reads, and silent data loss.
1 parent 964476e commit c41a0e0

1 file changed

Lines changed: 184 additions & 0 deletions

File tree

internal/config/race_test.go

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
package config
2+
3+
import (
4+
"fmt"
5+
"os"
6+
"os/exec"
7+
"path/filepath"
8+
"strings"
9+
"sync"
10+
"testing"
11+
12+
"gopkg.in/yaml.v3"
13+
)
14+
15+
// TestRaceConditionEndToEnd reproduces the real file-level race condition that
16+
// occurs when multiple cencli processes initialize config concurrently against
17+
// the same data directory. Each subprocess is a real OS process with its own
18+
// viper instance — just like production. The race is in the non-atomic
19+
// read-modify-write of config.yaml (viper.WriteConfig + addDocCommentsToYAML).
20+
//
21+
// Run with: go test -run TestRaceConditionEndToEnd -count=1 -v ./internal/config/
22+
func TestRaceConditionEndToEnd(t *testing.T) {
23+
const processes = 15
24+
25+
dataDir := t.TempDir()
26+
if err := os.MkdirAll(filepath.Join(dataDir, "templates"), 0o755); err != nil {
27+
t.Fatal(err)
28+
}
29+
30+
configPath := filepath.Join(dataDir, "config.yaml")
31+
32+
type procResult struct {
33+
id int
34+
exitCode int
35+
output string
36+
err error
37+
}
38+
39+
var (
40+
wg sync.WaitGroup
41+
mu sync.Mutex
42+
results []procResult
43+
)
44+
45+
// All processes start as close together as possible.
46+
start := make(chan struct{})
47+
48+
for i := 0; i < processes; i++ {
49+
wg.Add(1)
50+
go func(id int) {
51+
defer wg.Done()
52+
<-start
53+
54+
cmd := exec.Command(
55+
os.Args[0],
56+
"-test.run=^TestRaceWorker$",
57+
"-test.v",
58+
)
59+
cmd.Env = append(os.Environ(),
60+
"RACE_WORKER=1",
61+
fmt.Sprintf("RACE_DATA_DIR=%s", dataDir),
62+
)
63+
64+
out, err := cmd.CombinedOutput()
65+
66+
exitCode := 0
67+
if err != nil {
68+
if ee, ok := err.(*exec.ExitError); ok {
69+
exitCode = ee.ExitCode()
70+
} else {
71+
exitCode = -1
72+
}
73+
}
74+
75+
mu.Lock()
76+
results = append(results, procResult{
77+
id: id,
78+
exitCode: exitCode,
79+
output: string(out),
80+
err: err,
81+
})
82+
mu.Unlock()
83+
}(i)
84+
}
85+
86+
close(start)
87+
wg.Wait()
88+
89+
// Tally process-level failures.
90+
var processErrors int
91+
for _, r := range results {
92+
if r.exitCode != 0 {
93+
processErrors++
94+
t.Logf("process %d exited %d:\n%s", r.id, r.exitCode, r.output)
95+
}
96+
}
97+
98+
// Check the final state of config.yaml — the file all processes raced on.
99+
finalRaw, err := os.ReadFile(configPath)
100+
if err != nil {
101+
t.Fatalf("cannot read final config.yaml: %v", err)
102+
}
103+
104+
var (
105+
fileEmpty bool
106+
fileCorrupt bool
107+
yamlErr string
108+
)
109+
110+
if len(finalRaw) == 0 {
111+
fileEmpty = true
112+
} else {
113+
var parsed map[string]interface{}
114+
if err := yaml.Unmarshal(finalRaw, &parsed); err != nil {
115+
fileCorrupt = true
116+
yamlErr = err.Error()
117+
}
118+
}
119+
120+
t.Logf("--- Race Condition Results ---")
121+
t.Logf(" Processes launched: %d", processes)
122+
t.Logf(" Process failures: %d", processErrors)
123+
t.Logf(" Final file empty: %v", fileEmpty)
124+
t.Logf(" Final file corrupt: %v", fileCorrupt)
125+
if fileCorrupt {
126+
t.Logf(" YAML error: %s", yamlErr)
127+
t.Logf(" File content:\n%s", finalRaw)
128+
}
129+
130+
if processErrors > 0 || fileEmpty || fileCorrupt {
131+
t.Errorf("Race condition reproduced: processes_failed=%d file_empty=%v file_corrupt=%v\n"+
132+
"Multiple processes doing read-modify-write on config.yaml without file locking\n"+
133+
"causes corruption visible to concurrent or subsequent CLI invocations.",
134+
processErrors, fileEmpty, fileCorrupt)
135+
}
136+
}
137+
138+
// TestRaceWorker is a subprocess helper that calls config.New() once against a
139+
// shared data directory. Each invocation is a separate OS process — exactly
140+
// like a real cencli CLI invocation. Only runs when spawned by the parent test.
141+
func TestRaceWorker(t *testing.T) {
142+
if os.Getenv("RACE_WORKER") != "1" {
143+
t.Skip("skipping: only runs as subprocess of TestRaceConditionEndToEnd")
144+
}
145+
146+
dataDir := os.Getenv("RACE_DATA_DIR")
147+
if dataDir == "" {
148+
t.Fatal("RACE_DATA_DIR not set")
149+
}
150+
151+
cfg, cErr := New(dataDir)
152+
if cErr != nil {
153+
t.Fatalf("New() failed: %v", cErr)
154+
}
155+
156+
// Verify the returned config is sane.
157+
if cfg.OutputFormat == "" {
158+
t.Error("config has empty output-format")
159+
}
160+
161+
// Verify the file on disk is valid YAML right after our write.
162+
configPath := filepath.Join(dataDir, "config.yaml")
163+
raw, err := os.ReadFile(configPath)
164+
if err != nil {
165+
t.Fatalf("cannot read config.yaml after New(): %v", err)
166+
}
167+
if len(raw) == 0 {
168+
t.Fatal("config.yaml is empty immediately after New()")
169+
}
170+
171+
var parsed map[string]interface{}
172+
if err := yaml.Unmarshal(raw, &parsed); err != nil {
173+
t.Fatalf("config.yaml is corrupted after New(): %v", err)
174+
}
175+
176+
// Check for partial writes — key fields should be present.
177+
requiredKeys := []string{"output-format", "streaming", "timeouts", "retry-strategy"}
178+
content := string(raw)
179+
for _, key := range requiredKeys {
180+
if !strings.Contains(content, key+":") {
181+
t.Errorf("config.yaml missing expected key %q — possible truncated write", key)
182+
}
183+
}
184+
}

0 commit comments

Comments
 (0)