Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions changelog.d/2957-deploy-w4-rollup.fixed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
- `wheels deploy`: a same-version redeploy no longer hits a guaranteed `docker run --name` conflict — deploy now force-removes an existing same-name container (exact-anchored, idempotent) before each app run, and stops superseded versions best-effort after the kamal-proxy cutover instead of leaving every old container running forever (#2957 DEP-11a)
- `wheels deploy secrets extract <key>` now matches the key case-sensitively, like Kamal — previously CFML's case-insensitive `==` let `extract path` return the `PATH` line's value (#2957 DEP-11b)
- `wheels deploy`: the kamal-proxy config volume now derives the remote home from the ssh user (`/root` for the default root user, `/home/<user>` otherwise) instead of hardcoding the `/home/<user>` layout, which was wrong on every default config (#2957 DEP-11c)
- `wheels deploy`: secret resolution from `.kamal/secrets` is now bounded by a timeout (default 60s, configurable via `timeoutSeconds`) — a command blocking on interactive input (e.g. `op read` prompting for sign-in) used to hang the deploy thread forever; it now kills the bash process and throws a clear `SecretResolver.ResolutionFailed` (#2957)
- `wheels deploy rollback --destination=X` now applies the destination overlay — rollback was the last verb loading `deploy.yml` without forwarding `--destination`, so it targeted the base config's hosts (#2957; same defect class as the `config` fix in #3085)
16 changes: 15 additions & 1 deletion cli/lucli/services/deploy/cli/DeployMainCli.cfc
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,9 @@ component {

for (var role in cfg.roles()) {
for (var host in role.hosts()) {
// A same-version redeploy would hit a guaranteed
// docker run --name conflict otherwise (#2957 DEP-11a).
$dispatch([host], app.remove_conflicting(role, ver), dryRun);
if (arrayLen(secretNames)) {
$deliverEnvFile(
[host],
Expand All @@ -163,6 +166,11 @@ component {
dryRun
);
}
// Post-cutover cleanup: stop superseded versions
// (#2957 DEP-11a). Best-effort (allowFail) — a failed
// stop of an OLD container must not fail an
// otherwise-complete deploy of the NEW one.
$dispatch([host], app.stop_old_versions(role, ver), dryRun, true);
}
}
$dispatch(hosts, auditor.record("completed deploy of version " & ver), dryRun, true);
Expand Down Expand Up @@ -220,7 +228,13 @@ component {
message = "rollback requires a version (pass opts.version)"
);
}
var cfg = variables.loader.load(arguments.opts.configPath);
// Forward the destination overlay — rollback() was the last
// loader.load() call site not doing so (#2957; same defect class
// as the config() fix in #3085).
var cfg = variables.loader.load(
arguments.opts.configPath,
{destination: arguments.opts.destination ?: ""}
);
var app = new modules.wheels.services.deploy.commands.AppCommands(cfg);
var proxy = new modules.wheels.services.deploy.commands.ProxyCommands(cfg);
var dryRun = arguments.opts.dryRun ?: false;
Expand Down
5 changes: 4 additions & 1 deletion cli/lucli/services/deploy/cli/DeploySecretsCli.cfc
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,10 @@ component {
var eq = find("=", line);
// eq > 1, not > 0: a line starting with '=' has no key, and
// left(line, 0) crashes Lucee 7 (Cross-Engine Invariant 8).
if (eq > 1 && left(line, eq - 1) == key) {
// compare(), not ==: CFML == is case-insensitive, but env var
// names are case-sensitive and Kamal's extract is an exact
// match (#2957 DEP-11b).
if (eq > 1 && compare(left(line, eq - 1), key) == 0) {
return mid(line, eq + 1, 99999);
}
}
Expand Down
37 changes: 37 additions & 0 deletions cli/lucli/services/deploy/commands/AppCommands.cfc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,43 @@ component extends="Base" {
);
}

/**
* Idempotent same-name conflict guard (#2957 DEP-11a): `docker run
* --name X` hard-fails when ANY container — running or stopped —
* already holds the name, which is the guaranteed state on a
* same-version redeploy. The name filter is regex-anchored for an
* exact match and `xargs -r` makes the whole pipeline a no-op
* (exit 0) when nothing matches.
*/
public string function remove_conflicting(required any role, required string version) {
return pipe([
docker("container", "ls", "--all",
"--filter name=^#container_name(arguments.role, arguments.version)#$",
"--quiet"),
"xargs -r docker container rm --force"
]);
}

/**
* Stop every superseded version of this role's container after the
* proxy cutover (#2957 DEP-11a) — previously old versions kept
* running (and auto-restarting) forever. Scoped by the same
* service/role/destination labels $labelArgs stamps on at run time;
* the just-deployed container is excluded by exact name. `xargs -r`
* keeps a first deploy (no old versions) a no-op.
*/
public string function stop_old_versions(required any role, required string version) {
return pipe([
docker("ps",
"--filter label=service=#variables.config.service()#",
"--filter label=role=#arguments.role.name()#",
"--filter label=destination=#variables.config.destination()#",
"--format {{.Names}}"),
"grep -v '^#container_name(arguments.role, arguments.version)#$'",
"xargs -r docker stop"
]);
}

public string function start(required any role, required string version) {
return docker("start", container_name(arguments.role, arguments.version));
}
Expand Down
13 changes: 12 additions & 1 deletion cli/lucli/services/deploy/commands/ProxyCommands.cfc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ component extends="Base" {
"--network kamal",
"--publish 80:80",
"--publish 443:443",
"--volume /home/#variables.config.ssh().user()#/.config/kamal-proxy:/home/kamal-proxy/.config/kamal-proxy",
"--volume #$remoteHome()#/.config/kamal-proxy:/home/kamal-proxy/.config/kamal-proxy",
variables.PROXY_IMAGE
);
}
Expand Down Expand Up @@ -91,4 +91,15 @@ component extends="Base" {
public string function restart() {
return docker("restart", variables.PROXY_CONTAINER_NAME);
}

/**
* Home directory of the deploy user on the remote host, for the proxy
* config volume. The previous hardcoded `/home/<user>` was wrong for
* the DEFAULT ssh user (root's home is /root) — #2957 DEP-11c.
* compare(), not ==: unix usernames are case-sensitive.
*/
private string function $remoteHome() {
var sshUser = variables.config.ssh().user();
return compare(sshUser, "root") == 0 ? "/root" : "/home/" & sshUser;
}
}
54 changes: 46 additions & 8 deletions cli/lucli/services/deploy/lib/SecretResolver.cfc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ component {
// (e.g. a non-standard Git Bash install on Windows). Also the
// seam the BashUnavailable spec uses.
variables.bashCmd = arguments.opts.bashCmd ?: "bash";
// Upper bound on resolution, in seconds (#2957): a command inside
// the secrets file that blocks on interactive input — `op read`
// prompting for sign-in, `bw get` waiting to unlock — used to hang
// the deploy thread forever on an unbounded waitFor()/stdout read.
variables.timeoutSeconds = arguments.opts.timeoutSeconds ?: 60;
variables.resolved = $loadAll();
return this;
}
Expand Down Expand Up @@ -165,7 +170,7 @@ component {
// the stderr write while we block on the stdout read. A file sink
// never fills, so bash always runs to completion. Secret values
// travel on stdout (read in-memory); only diagnostics touch disk,
// and the file is deleted in the finally block even when waitFor()
// and the file is deleted in the finally block even when the drain
// or the throw paths interrupt the happy path.
var errPath = getTempFile(getTempDirectory(), "wheels-secret-err");
try {
Expand All @@ -181,7 +186,9 @@ component {
detail = "Secret resolution requires a local bash for $(cmd) expansion. On Windows, run inside WSL or Git Bash."
);
}
var out = $readStream(proc.getInputStream());
var out = $drainWithDeadline(proc);
// The drain only returns once the process has exited, so this
// waitFor() is an immediate exit-code read, not a blocking wait.
var exitCode = proc.waitFor();
var err = fileExists(errPath) ? fileRead(errPath, "UTF-8") : "";
return {exitCode: exitCode, out: out, err: err};
Expand All @@ -190,12 +197,43 @@ component {
}
}

private string function $readStream(required any inputStream) {
var scanner = createObject("java", "java.util.Scanner").init(arguments.inputStream, "UTF-8");
scanner.useDelimiter("\A");
var content = scanner.hasNext() ? scanner.next() : "";
scanner.close();
return content;
/**
* Drain the process's stdout to completion, bounded by
* variables.timeoutSeconds (#2957). The previous shape — a blocking
* read-to-EOF followed by an unbounded waitFor() — hung the deploy
* thread forever when a command inside the secrets file blocked on
* interactive input while holding stdout open. Polling `available()`
* against a deadline bounds both hang shapes (silent never-exits AND
* endless streamers); on expiry the bash process is force-killed and
* a clear ResolutionFailed surfaces.
*/
private string function $drainWithDeadline(required any proc) {
var deadlineAt = getTickCount() + (variables.timeoutSeconds * 1000);
var stdoutStream = arguments.proc.getInputStream();
var buffer = createObject("java", "java.io.ByteArrayOutputStream").init();
while (arguments.proc.isAlive()) {
if (getTickCount() >= deadlineAt) {
arguments.proc.destroyForcibly();
throw(
type = "SecretResolver.ResolutionFailed",
message = "Resolving .kamal/secrets timed out after " & variables.timeoutSeconds
& " second(s) — a command inside the secrets file may be waiting for"
& " interactive input (e.g. a credential manager prompting for sign-in)."
& " The bash process was killed.",
detail = "Sign in to the secret manager in a terminal first, or raise the limit"
& " via SecretResolver init opts.timeoutSeconds for legitimately slow commands."
);
}
var avail = stdoutStream.available();
if (avail > 0) {
buffer.writeBytes(stdoutStream.readNBytes(avail));
} else {
sleep(25);
}
}
// Process exited — anything left in the pipe reads to EOF instantly.
buffer.writeBytes(stdoutStream.readAllBytes());
return buffer.toString("UTF-8");
}

private string function $shellEscape(required string path) {
Expand Down
81 changes: 81 additions & 0 deletions cli/lucli/tests/specs/deploy/cli/DeployMainCliSpec.cfc
Original file line number Diff line number Diff line change
Expand Up @@ -961,6 +961,59 @@ component extends="wheels.wheelstest.system.BaseSpec" {
expect(out).toInclude("--name demo-web-v1");
});

// Regression suite for #2957 (Wave 4 roll-ups).
// (DEP-11a) deploy() issued `docker run --name <service>-<role>-<version>`
// with no prior conflict removal — a same-version redeploy hit a
// guaranteed name conflict — and superseded containers were never
// stopped after cutover. (rollback --destination) rollback() was the
// last loader.load() call site not forwarding the destination overlay
// (same defect class as the config() fix in #3085).

it("deploy removes a conflicting same-name container before docker run (##2957 DEP-11a)", () => {
var fake = new cli.lucli.services.deploy.lib.FakeSshPool();
var dc = new cli.lucli.services.deploy.cli.DeployMainCli(fake);
dc.deploy({configPath: variables.fixture, version: "v1"});
var cmds = $cmds(fake);
var rmIdx = 0; var runIdx = 0;
for (var i = 1; i <= arrayLen(cmds); i++) {
if (!rmIdx && findNoCase("name=^demo-web-v1$", cmds[i])
&& findNoCase("docker container rm --force", cmds[i])) rmIdx = i;
// "--name demo-web-v1" pins the APP container run — the
// proxy's start_or_run also embeds a "docker run --detach".
if (!runIdx && findNoCase("docker run --detach", cmds[i])
&& findNoCase("--name demo-web-v1", cmds[i])) runIdx = i;
}
expect(rmIdx).toBeGT(0);
expect(runIdx).toBeGT(rmIdx);
});

it("deploy stops superseded versions after the proxy cutover (##2957 DEP-11a)", () => {
var fake = new cli.lucli.services.deploy.lib.FakeSshPool();
var dc = new cli.lucli.services.deploy.cli.DeployMainCli(fake);
dc.deploy({configPath: variables.fixture, version: "v1"});
var cmds = $cmds(fake);
var proxyIdx = 0; var stopOldIdx = 0;
for (var i = 1; i <= arrayLen(cmds); i++) {
if (!proxyIdx && findNoCase("kamal-proxy deploy", cmds[i])) proxyIdx = i;
if (!stopOldIdx && findNoCase("grep -v '^demo-web-v1$'", cmds[i])
&& findNoCase("xargs -r docker stop", cmds[i])) stopOldIdx = i;
}
expect(proxyIdx).toBeGT(0);
expect(stopOldIdx).toBeGT(proxyIdx);
});

it("a failing old-version stop does not fail an otherwise-complete deploy (##2957 DEP-11a)", () => {
var fake = new cli.lucli.services.deploy.lib.FakeSshPool();
var cfg = new cli.lucli.services.deploy.config.ConfigLoader().load(variables.fixture);
var app = new cli.lucli.services.deploy.commands.AppCommands(cfg);
fake.expect("1.2.3.4", app.stop_old_versions(cfg.roles()[1], "v1"), {
exitCode: 1, stdout: "", stderr: "Cannot connect to the Docker daemon"
});
var dc = new cli.lucli.services.deploy.cli.DeployMainCli(fake);
var out = dc.deploy({configPath: variables.fixture, version: "v1"});
expect(out).toInclude("Deployed");
});

// Regression suite for #2957 (Wave 3 — observability). (DEP-6a) the
// dispatch closures dropped every ssh.run() result, so read verbs
// (`audit`, `details`) returned only host-count summaries in live
Expand Down Expand Up @@ -1059,6 +1112,34 @@ component extends="wheels.wheelstest.system.BaseSpec" {
expect(out).toInclude("Deployed");
});

it("rollback --dry-run applies the destination overlay (##2957, same class as ##3085)", () => {
var base = getTempFile(getTempDirectory(), "yml");
fileWrite(
base,
"service: demo#chr(10)#image: acme/demo#chr(10)#servers: [192.0.2.10]"
& "#chr(10)#registry: {username: u, password: [X]}"
);
var overlay = new cli.lucli.services.deploy.config.ConfigLoader()
.$overlayPathFor(base, "staging");
fileWrite(overlay, "servers:#chr(10)# - 192.0.2.99");
try {
var dc = new cli.lucli.services.deploy.cli.DeployMainCli(
new cli.lucli.services.deploy.lib.FakeSshPool()
);
var out = dc.rollback({
configPath: base,
destination: "staging",
version: "v1",
dryRun: true
});
expect(out).toInclude("192.0.2.99");
expect(out).notToInclude("192.0.2.10");
} finally {
fileDelete(base);
fileDelete(overlay);
}
});

// Regression for #2671 — git's stderr ("fatal: not a git repository...") used to leak through as the version string.
it("$gitShortSha() returns 'unknown' when run outside a git repo", () => {
var nonGitDir = getTempDirectory() & "/wheels-2671-main-" & createUUID();
Expand Down
11 changes: 11 additions & 0 deletions cli/lucli/tests/specs/deploy/cli/DeploySecretsCliSpec.cfc
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,17 @@ component extends="wheels.wheelstest.system.BaseSpec" {
expect(cli.extract({key: "", from: "FOO=bar"})).toBe("");
});

// #2957 DEP-11b — CFML `==` is case-insensitive, so `extract path`
// matched the PATH line. Kamal's extract is an exact (case-sensitive)
// key match; env var names are case-sensitive in every shell.
it("extract key match is case-sensitive (##2957 DEP-11b)", () => {
var cli = new cli.lucli.services.deploy.cli.DeploySecretsCli();
var block = "PATH=/usr/bin" & chr(10) & "path=lower-value";
expect(cli.extract({key: "path", from: block})).toBe("lower-value");
expect(cli.extract({key: "PATH", from: block})).toBe("/usr/bin");
expect(cli.extract({key: "Path", from: block})).toBe("");
});

it("extract skips malformed lines that start with '='", () => {
// left(line, 0) on a keyless '=...' line crashes Lucee 7
// (Cross-Engine Invariant 8); such lines must be skipped.
Expand Down
26 changes: 26 additions & 0 deletions cli/lucli/tests/specs/deploy/commands/AppCommandsSpec.cfc
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,32 @@ component extends="wheels.wheelstest.system.BaseSpec" {
expect(state.message).notToInclude("PRESENT");
});

// #2957 DEP-11a — `docker run --name X` hard-fails when ANY container
// (running or stopped) already holds the name, which is the guaranteed
// state on a same-version redeploy. The conflict guard must be exact-name
// anchored and a no-op when nothing matches (xargs -r).
it("remove_conflicting() force-removes an existing same-name container, idempotently (##2957 DEP-11a)", () => {
var cmd = new cli.lucli.services.deploy.commands.AppCommands(variables.cfg)
.remove_conflicting(variables.cfg.roles()[1], "v1");
expect(cmd).toInclude("docker container ls --all");
expect(cmd).toInclude("--filter name=^demo-web-v1$");
expect(cmd).toInclude("--quiet");
expect(cmd).toInclude("xargs -r docker container rm --force");
});

// #2957 DEP-11a — superseded containers were never stopped after
// cutover, so every old version kept running (and restarting) forever.
it("stop_old_versions() stops same-role containers except the current version (##2957 DEP-11a)", () => {
var cmd = new cli.lucli.services.deploy.commands.AppCommands(variables.cfg)
.stop_old_versions(variables.cfg.roles()[1], "v2");
expect(cmd).toInclude("docker ps");
expect(cmd).toInclude("--filter label=service=demo");
expect(cmd).toInclude("--filter label=role=web");
expect(cmd).toInclude("--filter label=destination=");
expect(cmd).toInclude("grep -v '^demo-web-v2$'");
expect(cmd).toInclude("xargs -r docker stop");
});

it("remove() chains docker stop and docker rm", () => {
var cmd = new cli.lucli.services.deploy.commands.AppCommands(variables.cfg)
.remove(variables.cfg.roles()[1], "v9");
Expand Down
Loading
Loading