|
| 1 | +// backup_dump.go — per-resource_type dump strategies for the customer-backup |
| 2 | +// runner. Mirrors the pgDumpRunner seam already in customer_backup_runner.go |
| 3 | +// so the runner can back up postgres/vector (pg_dump), mongodb (mongodump), |
| 4 | +// and redis (redis-cli --rdb) through ONE pipeline (gzip → sha256 → S3) with |
| 5 | +// ONE retention/cadence/keep-last-N policy. |
| 6 | +// |
| 7 | +// R2 (2026-06-10) — closing the durability gap. Before this file the backup |
| 8 | +// ladder backed up postgres/vector ONLY; the product sells "backups + |
| 9 | +// 1-click restore" for ALL paid resources, so Mongo + Redis had ZERO |
| 10 | +// automated backup (worker #103 note + GAP-AUDIT-2026-06-10). This file adds |
| 11 | +// the Mongo + Redis dump strategies; the runner dispatches on resource_type. |
| 12 | +// |
| 13 | +// THE GZIP CONTRACT (why this matters): the existing pg path writes a RAW |
| 14 | +// (uncompressed) `pg_dump --format=custom` archive into the runner's gzip |
| 15 | +// writer — the pipeline owns compression, the S3 object is `<archive>.gz`, |
| 16 | +// and the restore path gunzips then pipes to pg_restore. To keep ONE |
| 17 | +// pipeline + ONE object layout + ONE sha256/restore story, every dumpRunner |
| 18 | +// here writes RAW (uncompressed) bytes too. Concretely: |
| 19 | +// |
| 20 | +// - mongodump: `--archive` (NOT `--archive --gzip`). mongodump's own |
| 21 | +// --gzip would double-compress under the pipeline's gzip layer, bloating |
| 22 | +// the object and breaking the "gunzip → mongorestore --archive" restore |
| 23 | +// symmetry. Restore gunzips, then pipes to `mongorestore --archive`. |
| 24 | +// - redis-cli: `--rdb -` streams the live RDB snapshot to stdout (a single |
| 25 | +// uncompressed RDB blob). The pipeline gzips it to `<id>.dump.gz` exactly |
| 26 | +// like the pg/mongo archives. |
| 27 | +// |
| 28 | +// SECRET HYGIENE (mirrors SEC-WORKER FINDING-2 on the pg path): the customer |
| 29 | +// credential must NOT sit in argv (/proc/<pid>/cmdline, `ps aux`, kubectl |
| 30 | +// describe crash archive) for the multi-minute backup window. pg_dump uses |
| 31 | +// PGPASSWORD env (splitPGPassword). mongodump accepts the full mongodb URI in |
| 32 | +// `--uri` — the URI carries the password, so we pass it on stdin-equivalent… |
| 33 | +// mongodump has no env-password knob, BUT it DOES read the URI from |
| 34 | +// `--uri=<file>`? No: mongodump's only password-out-of-argv path is |
| 35 | +// interactive prompt, which we can't drive. We therefore pass the URI via the |
| 36 | +// MONGODB_URI-equivalent the tool honors: mongodump reads `--uri` from argv. |
| 37 | +// To keep the secret out of argv we instead write the URI to a 0600 temp file |
| 38 | +// and pass `--config=<file>` (mongodump supports a YAML config with a |
| 39 | +// `uri:`/`password:` field). See realMongoDumpRunner for the exact mechanism. |
| 40 | +package jobs |
| 41 | + |
| 42 | +import ( |
| 43 | + "context" |
| 44 | + "fmt" |
| 45 | + "io" |
| 46 | + "net/url" |
| 47 | + "os" |
| 48 | + "os/exec" |
| 49 | +) |
| 50 | + |
| 51 | +// Resource-type string constants — the values stored in resources.resource_type |
| 52 | +// and echoed in resource_backups. Kept as named constants (not scattered |
| 53 | +// literals) so the dispatch + scheduler + tests reference one source. |
| 54 | +const ( |
| 55 | + resourceTypePostgres = "postgres" |
| 56 | + resourceTypeVector = "vector" |
| 57 | + resourceTypeMongoDB = "mongodb" |
| 58 | + resourceTypeRedis = "redis" |
| 59 | +) |
| 60 | + |
| 61 | +// mongoDumpRunner abstracts `mongodump` execution so tests can substitute a |
| 62 | +// fake without a live Mongo. Mirrors pgDumpRunner exactly: Run writes the RAW |
| 63 | +// (uncompressed) BSON archive to w; the runner's gzip layer compresses it. |
| 64 | +type mongoDumpRunner interface { |
| 65 | + Run(ctx context.Context, connURL string, w io.Writer) error |
| 66 | +} |
| 67 | + |
| 68 | +// redisDumpRunner abstracts `redis-cli --rdb -` execution. Run writes the RAW |
| 69 | +// (uncompressed) RDB snapshot to w; the runner's gzip layer compresses it. |
| 70 | +type redisDumpRunner interface { |
| 71 | + Run(ctx context.Context, connURL string, w io.Writer) error |
| 72 | +} |
| 73 | + |
| 74 | +// realMongoDumpRunner shells out to the real `mongodump` binary, streaming a |
| 75 | +// `--archive` (uncompressed) BSON archive to stdout. |
| 76 | +// |
| 77 | +// Secret hygiene: the mongodb URI carries the password in its userinfo. To |
| 78 | +// keep it out of argv (mongodump has no PGPASSWORD-style env knob), we write a |
| 79 | +// minimal mongodump YAML config file (mode 0600, in the pod's tmpfs) carrying |
| 80 | +// `uri:` and pass `--config=<file>`. The file is removed on return. Fail-open |
| 81 | +// on a temp-file error: fall back to `--uri` in argv (no regression vs a |
| 82 | +// world with no mongo backup at all) and log nothing here — the runner's |
| 83 | +// failure path captures any downstream error. |
| 84 | +type realMongoDumpRunner struct{} |
| 85 | + |
| 86 | +func (realMongoDumpRunner) Run(ctx context.Context, connURL string, w io.Writer) error { |
| 87 | + // Try the config-file path first so the URI (with password) stays out of |
| 88 | + // argv. mongodump's config file is YAML with a top-level `uri:` key. |
| 89 | + cfgPath, cleanup, cfgErr := writeMongoConfig(connURL) |
| 90 | + var cmd *exec.Cmd |
| 91 | + if cfgErr == nil { |
| 92 | + defer cleanup() |
| 93 | + cmd = exec.CommandContext(ctx, "mongodump", |
| 94 | + "--config", cfgPath, |
| 95 | + "--archive", // uncompressed; the runner pipeline gzips |
| 96 | + ) |
| 97 | + } else { |
| 98 | + // Fail-open: pass the URI in argv. Less ideal (secret in cmdline) but |
| 99 | + // strictly better than no backup. The leak window is the dump |
| 100 | + // duration only, same posture the pg path documents for its parse |
| 101 | + // fail-open branch. |
| 102 | + cmd = exec.CommandContext(ctx, "mongodump", |
| 103 | + "--uri", connURL, |
| 104 | + "--archive", |
| 105 | + ) |
| 106 | + } |
| 107 | + cmd.Stdout = w |
| 108 | + var stderrBuf limitedBuffer |
| 109 | + cmd.Stderr = &stderrBuf |
| 110 | + if err := cmd.Run(); err != nil { |
| 111 | + return fmt.Errorf("mongodump: %w (stderr: %s)", err, stderrBuf.String()) |
| 112 | + } |
| 113 | + return nil |
| 114 | +} |
| 115 | + |
| 116 | +// Test seams for writeMongoConfig's filesystem operations. The chmod / write / |
| 117 | +// sync failure arms cannot be forced against a real, freshly created temp file |
| 118 | +// (a healthy fd accepts all three), so each op routes through an injectable |
| 119 | +// package var — same seam pattern as txtLookupFunc (custom_domain_reconcile.go) |
| 120 | +// and deployNotifyResolver (deploy_notify_webhook.go). Production behavior is |
| 121 | +// the default literal; tests swap + defer-restore. |
| 122 | +var ( |
| 123 | + mongoCfgCreateTemp = func() (*os.File, error) { |
| 124 | + return os.CreateTemp("", "instant-mongodump-*.yaml") |
| 125 | + } |
| 126 | + // 0600 — only the worker process can read the URI. CreateTemp already |
| 127 | + // uses 0600 on unix, but set it explicitly so the contract is loud. |
| 128 | + mongoCfgChmod = func(f *os.File) error { return f.Chmod(0o600) } |
| 129 | + // mongodump config YAML: a single `uri:` key. Quote the value so a URI |
| 130 | + // with YAML-special characters (e.g. a password containing ':' or '@') |
| 131 | + // is parsed as a single scalar. |
| 132 | + mongoCfgWriteURI = func(f *os.File, connURL string) error { |
| 133 | + _, err := fmt.Fprintf(f, "uri: %q\n", connURL) |
| 134 | + return err |
| 135 | + } |
| 136 | + mongoCfgSync = func(f *os.File) error { return f.Sync() } |
| 137 | +) |
| 138 | + |
| 139 | +// writeMongoConfig writes a mongodump YAML config carrying the connection URI |
| 140 | +// to a 0600 temp file and returns its path plus a cleanup func. Keeps the |
| 141 | +// password out of argv. The caller MUST invoke cleanup() to remove the file. |
| 142 | +func writeMongoConfig(connURL string) (path string, cleanup func(), err error) { |
| 143 | + f, err := mongoCfgCreateTemp() |
| 144 | + if err != nil { |
| 145 | + return "", func() {}, fmt.Errorf("create mongodump config: %w", err) |
| 146 | + } |
| 147 | + cleanup = func() { |
| 148 | + _ = f.Close() |
| 149 | + _ = os.Remove(f.Name()) |
| 150 | + } |
| 151 | + if chmodErr := mongoCfgChmod(f); chmodErr != nil { |
| 152 | + cleanup() |
| 153 | + return "", func() {}, fmt.Errorf("chmod mongodump config: %w", chmodErr) |
| 154 | + } |
| 155 | + if wErr := mongoCfgWriteURI(f, connURL); wErr != nil { |
| 156 | + cleanup() |
| 157 | + return "", func() {}, fmt.Errorf("write mongodump config: %w", wErr) |
| 158 | + } |
| 159 | + if syncErr := mongoCfgSync(f); syncErr != nil { |
| 160 | + cleanup() |
| 161 | + return "", func() {}, fmt.Errorf("sync mongodump config: %w", syncErr) |
| 162 | + } |
| 163 | + return f.Name(), cleanup, nil |
| 164 | +} |
| 165 | + |
| 166 | +// realRedisDumpRunner shells out to `redis-cli --rdb -`, streaming the live |
| 167 | +// RDB snapshot to stdout. `-` for the filename means stdout (redis-cli |
| 168 | +// 4.0+). The runner pipeline gzips the RDB blob. |
| 169 | +// |
| 170 | +// Secret hygiene: redis-cli accepts the password via the REDISCLI_AUTH env |
| 171 | +// var (libredis honors it the same way PGPASSWORD works for pg_dump), so we |
| 172 | +// split the password out of the URI and pass host/port/db/tls as flags + |
| 173 | +// REDISCLI_AUTH on the env. The password never appears in argv. Fail-open on |
| 174 | +// a URI parse error: pass the raw `-u <uri>` form (secret in argv) so a |
| 175 | +// malformed-but-valid-to-redis URI still backs up. |
| 176 | +type realRedisDumpRunner struct{} |
| 177 | + |
| 178 | +func (realRedisDumpRunner) Run(ctx context.Context, connURL string, w io.Writer) error { |
| 179 | + host, port, password, useTLS, parseErr := splitRedisURL(connURL) |
| 180 | + var cmd *exec.Cmd |
| 181 | + if parseErr == nil { |
| 182 | + args := []string{"-h", host, "-p", port} |
| 183 | + if useTLS { |
| 184 | + args = append(args, "--tls") |
| 185 | + } |
| 186 | + args = append(args, "--rdb", "-") // "-" = stream RDB to stdout |
| 187 | + cmd = exec.CommandContext(ctx, "redis-cli", args...) |
| 188 | + if password != "" { |
| 189 | + // REDISCLI_AUTH keeps the password out of argv (same posture as |
| 190 | + // PGPASSWORD on the pg path). |
| 191 | + cmd.Env = append(os.Environ(), "REDISCLI_AUTH="+password) |
| 192 | + } |
| 193 | + } else { |
| 194 | + // Fail-open: -u <uri> (secret in argv). Strictly better than no |
| 195 | + // backup; the leak window is the dump duration only. |
| 196 | + cmd = exec.CommandContext(ctx, "redis-cli", "-u", connURL, "--rdb", "-") |
| 197 | + } |
| 198 | + cmd.Stdout = w |
| 199 | + var stderrBuf limitedBuffer |
| 200 | + cmd.Stderr = &stderrBuf |
| 201 | + if err := cmd.Run(); err != nil { |
| 202 | + return fmt.Errorf("redis-cli --rdb: %w (stderr: %s)", err, stderrBuf.String()) |
| 203 | + } |
| 204 | + return nil |
| 205 | +} |
| 206 | + |
| 207 | +// splitRedisURL parses a redis://[:password@]host[:port][/db] (or rediss:// |
| 208 | +// for TLS) URL into its parts so redis-cli can be invoked with the password |
| 209 | +// out of argv (via REDISCLI_AUTH). Returns an error if the URL can't be |
| 210 | +// parsed; the caller falls back to `-u <uri>` (fail-open). Defaults: port |
| 211 | +// 6379, db unset. rediss:// → useTLS=true. |
| 212 | +func splitRedisURL(rawURL string) (host, port, password string, useTLS bool, err error) { |
| 213 | + u, err := url.Parse(rawURL) |
| 214 | + if err != nil { |
| 215 | + return "", "", "", false, fmt.Errorf("parse redis url: %w", err) |
| 216 | + } |
| 217 | + switch u.Scheme { |
| 218 | + case "redis": |
| 219 | + useTLS = false |
| 220 | + case "rediss": |
| 221 | + useTLS = true |
| 222 | + default: |
| 223 | + return "", "", "", false, fmt.Errorf("unexpected redis scheme %q", u.Scheme) |
| 224 | + } |
| 225 | + host = u.Hostname() |
| 226 | + if host == "" { |
| 227 | + return "", "", "", false, fmt.Errorf("redis url missing host") |
| 228 | + } |
| 229 | + port = u.Port() |
| 230 | + if port == "" { |
| 231 | + port = "6379" |
| 232 | + } |
| 233 | + if u.User != nil { |
| 234 | + if pw, ok := u.User.Password(); ok { |
| 235 | + password = pw |
| 236 | + } |
| 237 | + } |
| 238 | + return host, port, password, useTLS, nil |
| 239 | +} |
| 240 | + |
| 241 | +// backupSupportedResourceType reports whether the customer-backup ladder knows |
| 242 | +// how to dump the given resource_type. The scheduler's SQL filter and the |
| 243 | +// runner's dispatch both anchor on this single predicate so the "what's |
| 244 | +// backed up" set lives in one place (root rule 16/18 — no scattered list). |
| 245 | +func backupSupportedResourceType(resourceType string) bool { |
| 246 | + switch resourceType { |
| 247 | + case resourceTypePostgres, resourceTypeVector, resourceTypeMongoDB, resourceTypeRedis: |
| 248 | + return true |
| 249 | + default: |
| 250 | + return false |
| 251 | + } |
| 252 | +} |
| 253 | + |
| 254 | +// dumpForResourceType returns the dumpRunner Run func for the given |
| 255 | +// resource_type, or nil + a descriptive reason when the type is unsupported. |
| 256 | +// The runner uses the returned closure so postgres/vector/mongodb/redis all |
| 257 | +// flow through ONE pipeline (gzip → sha256 → S3). Keeping the dispatch here |
| 258 | +// (not inline in processBackup) lets the unit test assert the mapping |
| 259 | +// directly. |
| 260 | +func (w *CustomerBackupRunnerWorker) dumpForResourceType(resourceType string) (func(ctx context.Context, connURL string, out io.Writer) error, string) { |
| 261 | + switch resourceType { |
| 262 | + case resourceTypePostgres, resourceTypeVector: |
| 263 | + return w.pgDump.Run, "" |
| 264 | + case resourceTypeMongoDB: |
| 265 | + if w.mongoDump == nil { |
| 266 | + return nil, "mongo dump runner not configured" |
| 267 | + } |
| 268 | + return w.mongoDump.Run, "" |
| 269 | + case resourceTypeRedis: |
| 270 | + if w.redisDump == nil { |
| 271 | + return nil, "redis dump runner not configured" |
| 272 | + } |
| 273 | + return w.redisDump.Run, "" |
| 274 | + default: |
| 275 | + return nil, fmt.Sprintf("unsupported resource_type %q for backup", resourceType) |
| 276 | + } |
| 277 | +} |
0 commit comments