Skip to content

Commit cab81b3

Browse files
NikolaySSarumyan
authored andcommitted
fix(cli): migrate .env on mon update / mon update-config (closes #203)
1 parent d60068f commit cab81b3

3 files changed

Lines changed: 254 additions & 21 deletions

File tree

README.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ This will:
294294

295295
> **Note:** The `.env` file contains configuration for the monitoring stack, including `PGAI_TAG` (the Docker image version tag), `REPLICATOR_PASSWORD` (generated password for the demo standby replication user), `VM_AUTH_USERNAME`, `VM_AUTH_PASSWORD`, and optionally `GF_SECURITY_ADMIN_PASSWORD` (Grafana admin password) and `PGAI_REGISTRY` (custom Docker registry). `postgresai mon local-install` preserves existing `REPLICATOR_PASSWORD` and `VM_AUTH_*` values or generates new ones when they are missing; Docker Compose requires these values and does not use known default passwords.
296296
297-
> **Manual upgrade note:** If you run `docker compose` directly or maintain `.env` yourself, add `VM_AUTH_USERNAME=vmauth` and a non-empty `VM_AUTH_PASSWORD` before upgrading. Rotate VictoriaMetrics auth with `VM_AUTH_PASSWORD="$(openssl rand -base64 18)" ./scripts/rotate-vm-auth.sh` from the monitoring directory; the script updates `.env` and recreates `sink-prometheus` plus `grafana` together so datasource provisioning cannot reinsert stale credentials on restart.
297+
> **In-place upgrade note:** Newer stack versions can require additional `.env` keys (e.g., `VM_AUTH_USERNAME` / `VM_AUTH_PASSWORD` were added in 0.15 for VictoriaMetrics basic auth). Both `postgresai mon local-install -y` and `postgresai mon update` perform a purely-additive `.env` migration on every run: existing values are preserved verbatim, and any newly-required keys are appended with safe random defaults. If you run `docker compose` directly and maintain `.env` yourself, add `VM_AUTH_USERNAME=vmauth` and a non-empty `VM_AUTH_PASSWORD` before upgrading, or run `postgresai mon update-config` once to have the CLI fill them in for you. To rotate the VictoriaMetrics auth password, run `VM_AUTH_PASSWORD="$(openssl rand -base64 18)" ./scripts/rotate-vm-auth.sh` from the monitoring directory; the script updates `.env` and recreates `sink-prometheus` plus `grafana` together so datasource provisioning cannot reinsert stale credentials on restart.
298298
299299
**Alternative: Manual upgrade**
300300

@@ -305,6 +305,9 @@ If you prefer more control:
305305
postgresai --version # check your CLI version
306306
# Edit .env and set PGAI_TAG to the version number
307307

308+
# Migrate .env to add any newly-required keys (e.g. VM_AUTH_* for 0.15+)
309+
postgresai mon update-config
310+
308311
# Pull new images
309312
docker compose pull
310313

cli/bin/postgres-ai.ts

Lines changed: 127 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,71 @@ function stripMatchingQuotes(value: string): string {
7474
return trimmed;
7575
}
7676

77+
/**
78+
* Required env vars contract for the monitoring stack.
79+
*
80+
* Keys listed here are required by the docker-compose stack and must exist in
81+
* `.env` for the stack to start cleanly. Each entry knows how to mint a safe
82+
* default if the key is missing. Existing values are always preserved
83+
* verbatim - this function is purely additive.
84+
*
85+
* This is the spine of the in-place upgrade story: when a user upgrades from
86+
* a version that didn't require a key (e.g. 0.14, pre-VM-auth) to one that
87+
* does (0.15), `ensureRequiredEnvVars` appends what's missing so the next
88+
* `docker compose up` doesn't fail with `missing "<KEY>" env var`.
89+
*/
90+
type EnvKeyDefault = {
91+
key: string;
92+
/** Default value or factory for green-field installs / first upgrade. */
93+
defaultValue: () => string;
94+
/** Key was introduced in this CLI version - used in human-readable migration logs. */
95+
introducedIn: string;
96+
};
97+
98+
const REQUIRED_ENV_KEYS: EnvKeyDefault[] = [
99+
{ key: "REPLICATOR_PASSWORD", defaultValue: () => crypto.randomBytes(32).toString("hex"), introducedIn: "0.13" },
100+
{ key: "VM_AUTH_USERNAME", defaultValue: () => "vmauth", introducedIn: "0.15" },
101+
{ key: "VM_AUTH_PASSWORD", defaultValue: () => crypto.randomBytes(18).toString("base64"), introducedIn: "0.15" },
102+
];
103+
104+
/**
105+
* Read `.env` (if present), append any required keys that are missing, write
106+
* back atomically with 0600 perms, and return the list of keys that were added.
107+
*
108+
* Idempotent: a second call is a no-op once all keys are present.
109+
*
110+
* Used by `mon local-install`, `mon update`, and `mon update-config` so the
111+
* in-place upgrade path picks up newly-required env vars without surprising
112+
* the user with a silent boot failure on `sink-prometheus` / `grafana`.
113+
*/
114+
function ensureRequiredEnvVars(projectDir: string): string[] {
115+
const envFile = path.resolve(projectDir, ".env");
116+
const existing = fs.existsSync(envFile) ? fs.readFileSync(envFile, "utf8") : "";
117+
118+
const added: string[] = [];
119+
const appendLines: string[] = [];
120+
121+
for (const spec of REQUIRED_ENV_KEYS) {
122+
const re = new RegExp(`^${spec.key}=`, "m");
123+
if (!re.test(existing)) {
124+
appendLines.push(`${spec.key}=${spec.defaultValue()}`);
125+
added.push(spec.key);
126+
}
127+
}
128+
129+
if (appendLines.length === 0) {
130+
return added;
131+
}
132+
133+
// Append (don't overwrite) so we preserve order and any comments the user
134+
// may have added to their .env. Make sure we have a trailing newline first.
135+
const needsTrailingNewline = existing.length > 0 && !existing.endsWith("\n");
136+
const newContent = existing + (needsTrailingNewline ? "\n" : "") + appendLines.join("\n") + "\n";
137+
fs.writeFileSync(envFile, newContent, { encoding: "utf8", mode: 0o600 });
138+
139+
return added;
140+
}
141+
77142
// Helper functions for spawning processes - use Node.js child_process for compatibility
78143
async function execFilePromise(file: string, args: string[]): Promise<{ stdout: string; stderr: string }> {
79144
return new Promise((resolve, reject) => {
@@ -2970,41 +3035,83 @@ mon
29703035
});
29713036
mon
29723037
.command("update-config")
2973-
.description("apply monitoring services configuration (generate sources)")
3038+
.description("apply monitoring services configuration (generate sources, migrate .env)")
29743039
.action(async () => {
3040+
let projectDir: string;
3041+
try {
3042+
({ projectDir } = await resolveOrInitPaths());
3043+
} catch (error) {
3044+
const message = error instanceof Error ? error.message : String(error);
3045+
console.error(message);
3046+
process.exitCode = 1;
3047+
return;
3048+
}
3049+
3050+
// Migrate .env first: append any required keys introduced by newer stack
3051+
// versions (e.g. VM_AUTH_* added in 0.15). This is what makes in-place
3052+
// upgrades from older deployments not break with `missing "VM_AUTH_USERNAME"
3053+
// env var` when sink-prometheus boots.
3054+
const added = ensureRequiredEnvVars(projectDir);
3055+
if (added.length > 0) {
3056+
console.log(`Added missing .env keys for this stack version: ${added.join(", ")}`);
3057+
console.log("(existing values were preserved; missing keys filled with safe defaults)\n");
3058+
}
3059+
29753060
const code = await runCompose(["run", "--rm", "sources-generator"]);
29763061
if (code !== 0) process.exitCode = code;
29773062
});
29783063
mon
29793064
.command("update")
2980-
.description("update monitoring stack")
3065+
.description("update monitoring stack (migrate .env, pull images)")
29813066
.action(async () => {
29823067
console.log("Updating PostgresAI monitoring stack...\n");
29833068

29843069
try {
2985-
// Check if we're in a git repo
2986-
const gitDir = path.resolve(process.cwd(), ".git");
2987-
if (!fs.existsSync(gitDir)) {
2988-
console.error("Not a git repository. Cannot update.");
3070+
let projectDir: string;
3071+
try {
3072+
({ projectDir } = await resolveOrInitPaths());
3073+
} catch (error) {
3074+
const message = error instanceof Error ? error.message : String(error);
3075+
console.error(message);
29893076
process.exitCode = 1;
29903077
return;
29913078
}
29923079

2993-
// Fetch latest changes
2994-
console.log("Fetching latest changes...");
2995-
await execFilePromise("git", ["fetch", "origin"]);
2996-
2997-
// Check current branch
2998-
const { stdout: branch } = await execFilePromise("git", ["rev-parse", "--abbrev-ref", "HEAD"]);
2999-
const currentBranch = branch.trim();
3000-
console.log(`Current branch: ${currentBranch}`);
3001-
3002-
// Pull latest changes
3003-
console.log("Pulling latest changes...");
3004-
const { stdout: pullOut } = await execFilePromise("git", ["pull", "origin", currentBranch]);
3005-
console.log(pullOut);
3080+
// Step 1: migrate .env so newer stack versions that require additional
3081+
// env vars (e.g. VM_AUTH_USERNAME / VM_AUTH_PASSWORD introduced in 0.15)
3082+
// don't make `docker compose up` fail silently for users who installed
3083+
// before those vars existed. Purely additive: existing values are kept.
3084+
console.log("Checking .env for newly-required keys...");
3085+
const added = ensureRequiredEnvVars(projectDir);
3086+
if (added.length > 0) {
3087+
console.log(`✓ Added missing .env keys: ${added.join(", ")}`);
3088+
console.log(" (existing values preserved; missing keys filled with safe defaults)");
3089+
} else {
3090+
console.log("✓ .env is up to date");
3091+
}
3092+
console.log();
3093+
3094+
// Step 2: refresh repo if this is a git-based deployment. Some users
3095+
// upgrade purely via `npm install -g postgresai@latest` and don't have a
3096+
// git checkout - in that case we skip git operations and still do the
3097+
// env migration + docker pull.
3098+
const gitDir = path.resolve(projectDir, ".git");
3099+
if (fs.existsSync(gitDir)) {
3100+
console.log("Fetching latest changes...");
3101+
await execFilePromise("git", ["fetch", "origin"]);
3102+
3103+
const { stdout: branch } = await execFilePromise("git", ["rev-parse", "--abbrev-ref", "HEAD"]);
3104+
const currentBranch = branch.trim();
3105+
console.log(`Current branch: ${currentBranch}`);
3106+
3107+
console.log("Pulling latest changes...");
3108+
const { stdout: pullOut } = await execFilePromise("git", ["pull", "origin", currentBranch]);
3109+
console.log(pullOut);
3110+
} else {
3111+
console.log("(not a git checkout — skipping git fetch/pull and going straight to image pull)");
3112+
}
30063113

3007-
// Update Docker images
3114+
// Step 3: pull new images.
30083115
console.log("\nUpdating Docker images...");
30093116
const code = await runCompose(["pull"]);
30103117

cli/test/upgrade.test.ts

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,3 +420,126 @@ describe("upgrade CLI commands", () => {
420420
expect(stdout).toMatch(/health/i);
421421
}, { timeout: TEST_TIMEOUT });
422422
});
423+
424+
describe("in-place upgrade env migration (mon update / update-config)", () => {
425+
/**
426+
* Regression tests for the 0.14 -> 0.15 in-place upgrade gap (#203).
427+
*
428+
* Before this fix, a user who installed at 0.14 and ran the documented
429+
* upgrade flow (`pgai mon update`) ended up with a .env file that lacked
430+
* VM_AUTH_USERNAME / VM_AUTH_PASSWORD, so sink-prometheus exited with:
431+
*
432+
* fatal cannot read "/postgres_ai_configs/prometheus/prometheus.yml":
433+
* cannot expand environment variables: missing "VM_AUTH_USERNAME" env var
434+
*
435+
* `mon update` and `mon update-config` now migrate .env additively before
436+
* doing anything else.
437+
*/
438+
439+
let tempDir: string;
440+
441+
beforeAll(() => {
442+
tempDir = fs.mkdtempSync(resolve(os.tmpdir(), "pgai-upgrade-env-migration-"));
443+
});
444+
445+
afterAll(() => {
446+
if (tempDir && fs.existsSync(tempDir)) {
447+
fs.rmSync(tempDir, { recursive: true, force: true });
448+
}
449+
});
450+
451+
test("mon update-config appends missing VM_AUTH_USERNAME / VM_AUTH_PASSWORD to a 0.14-shaped .env", () => {
452+
const testDir = resolve(tempDir, "update-config-0.14-env");
453+
fs.mkdirSync(testDir, { recursive: true });
454+
455+
// 0.14-shaped .env: PGAI_TAG present, VM_AUTH_* absent.
456+
fs.writeFileSync(resolve(testDir, ".env"), "PGAI_TAG=0.14.0\nGF_SECURITY_ADMIN_PASSWORD=user-set-grafana-pw\n");
457+
fs.writeFileSync(resolve(testDir, "docker-compose.yml"), "version: '3'\nservices: {}\n");
458+
fs.writeFileSync(resolve(testDir, "instances.yml"), "# instances\n");
459+
460+
// The compose run will fail (no Docker in CI), but env migration runs first.
461+
runCliInDir(["mon", "update-config"], testDir, { PGAI_TAG: undefined });
462+
463+
const envContent = fs.readFileSync(resolve(testDir, ".env"), "utf8");
464+
465+
// Existing values must be preserved verbatim.
466+
expect(envContent).toMatch(/^PGAI_TAG=0\.14\.0$/m);
467+
expect(envContent).toMatch(/^GF_SECURITY_ADMIN_PASSWORD=user-set-grafana-pw$/m);
468+
469+
// New required keys must be appended (vmauth username + non-empty base64 password).
470+
expect(envContent).toMatch(/^VM_AUTH_USERNAME=vmauth$/m);
471+
expect(envContent).toMatch(/^VM_AUTH_PASSWORD=[A-Za-z0-9+/]+={0,2}$/m);
472+
473+
// REPLICATOR_PASSWORD was introduced earlier and is also part of the contract.
474+
expect(envContent).toMatch(/^REPLICATOR_PASSWORD=[a-f0-9]{64}$/m);
475+
}, { timeout: TEST_TIMEOUT });
476+
477+
test("mon update appends missing VM_AUTH_USERNAME / VM_AUTH_PASSWORD to a 0.14-shaped .env", () => {
478+
const testDir = resolve(tempDir, "update-0.14-env");
479+
fs.mkdirSync(testDir, { recursive: true });
480+
481+
fs.writeFileSync(resolve(testDir, ".env"), "PGAI_TAG=0.14.0\n");
482+
fs.writeFileSync(resolve(testDir, "docker-compose.yml"), "version: '3'\nservices: {}\n");
483+
fs.writeFileSync(resolve(testDir, "instances.yml"), "# instances\n");
484+
485+
// mon update will fail (no Docker in CI, no git repo), but env migration runs first.
486+
const result = runCliInDir(["mon", "update"], testDir, { PGAI_TAG: undefined });
487+
488+
const envContent = fs.readFileSync(resolve(testDir, ".env"), "utf8");
489+
490+
expect(envContent).toMatch(/^PGAI_TAG=0\.14\.0$/m);
491+
expect(envContent).toMatch(/^VM_AUTH_USERNAME=vmauth$/m);
492+
expect(envContent).toMatch(/^VM_AUTH_PASSWORD=[A-Za-z0-9+/]+={0,2}$/m);
493+
494+
// The migration step should print what it added so the user can see it.
495+
expect(result.stdout).toMatch(/Added missing \.env keys/);
496+
expect(result.stdout).toMatch(/VM_AUTH_USERNAME/);
497+
expect(result.stdout).toMatch(/VM_AUTH_PASSWORD/);
498+
}, { timeout: TEST_TIMEOUT });
499+
500+
test("mon update preserves existing VM_AUTH_* values (no rotation)", () => {
501+
const testDir = resolve(tempDir, "update-preserve-vm-auth");
502+
fs.mkdirSync(testDir, { recursive: true });
503+
504+
// User already has VM auth configured (e.g. set up via rotate-vm-auth.sh).
505+
fs.writeFileSync(
506+
resolve(testDir, ".env"),
507+
"PGAI_TAG=0.15.0\nVM_AUTH_USERNAME=custom-user\nVM_AUTH_PASSWORD=custom-pw-do-not-rotate\nREPLICATOR_PASSWORD=" +
508+
"a".repeat(64) +
509+
"\n",
510+
);
511+
fs.writeFileSync(resolve(testDir, "docker-compose.yml"), "version: '3'\nservices: {}\n");
512+
fs.writeFileSync(resolve(testDir, "instances.yml"), "# instances\n");
513+
514+
const result = runCliInDir(["mon", "update"], testDir, { PGAI_TAG: undefined });
515+
516+
const envContent = fs.readFileSync(resolve(testDir, ".env"), "utf8");
517+
518+
expect(envContent).toMatch(/^VM_AUTH_USERNAME=custom-user$/m);
519+
expect(envContent).toMatch(/^VM_AUTH_PASSWORD=custom-pw-do-not-rotate$/m);
520+
expect(envContent).toMatch(/^REPLICATOR_PASSWORD=a{64}$/m);
521+
522+
// When nothing is missing, the migration step should say so.
523+
expect(result.stdout).toMatch(/\.env is up to date/);
524+
}, { timeout: TEST_TIMEOUT });
525+
526+
test("mon update-config handles a .env that doesn't end with a newline", () => {
527+
const testDir = resolve(tempDir, "update-config-no-trailing-newline");
528+
fs.mkdirSync(testDir, { recursive: true });
529+
530+
// No trailing newline - migration must add one before appending new keys
531+
// or we'd produce e.g. `PGAI_TAG=0.14.0VM_AUTH_USERNAME=vmauth`.
532+
fs.writeFileSync(resolve(testDir, ".env"), "PGAI_TAG=0.14.0");
533+
fs.writeFileSync(resolve(testDir, "docker-compose.yml"), "version: '3'\nservices: {}\n");
534+
fs.writeFileSync(resolve(testDir, "instances.yml"), "# instances\n");
535+
536+
runCliInDir(["mon", "update-config"], testDir, { PGAI_TAG: undefined });
537+
538+
const envContent = fs.readFileSync(resolve(testDir, ".env"), "utf8");
539+
540+
expect(envContent).toMatch(/^PGAI_TAG=0\.14\.0$/m);
541+
expect(envContent).toMatch(/^VM_AUTH_USERNAME=vmauth$/m);
542+
// No key should be glued onto the previous line.
543+
expect(envContent).not.toMatch(/PGAI_TAG=0\.14\.0VM_AUTH_USERNAME/);
544+
}, { timeout: TEST_TIMEOUT });
545+
});

0 commit comments

Comments
 (0)