|
| 1 | +package cmd |
| 2 | + |
| 3 | +import ( |
| 4 | + "fmt" |
| 5 | + "os" |
| 6 | + "path/filepath" |
| 7 | + |
| 8 | + "github.com/boringsql/fixturize/fixturize" |
| 9 | + "github.com/spf13/cobra" |
| 10 | +) |
| 11 | + |
| 12 | +var ( |
| 13 | + extractCmd = &cobra.Command{ |
| 14 | + Use: "extract", |
| 15 | + Short: "Extract a consistent subgraph of real data from a live database", |
| 16 | + Long: `Extract real data from a PostgreSQL database by following foreign key |
| 17 | +relationships from a root table query. Produces a self-contained JSON fixture |
| 18 | +with data that satisfies all FK constraints by definition. |
| 19 | +
|
| 20 | +Examples: |
| 21 | + # Extract one organization and everything related |
| 22 | + fixturize extract --connection "$DB" \ |
| 23 | + --root "organizations WHERE id = 42" |
| 24 | +
|
| 25 | + # Extract 3 random orgs, cap children at 500 rows each |
| 26 | + fixturize extract --connection "$DB" \ |
| 27 | + --root "organizations ORDER BY random() LIMIT 3" \ |
| 28 | + --limit 500 |
| 29 | +
|
| 30 | + # Include lookup tables, exclude audit logs |
| 31 | + fixturize extract --connection "$DB" \ |
| 32 | + --root "organizations WHERE name = 'acme'" \ |
| 33 | + --include "roles,permissions" \ |
| 34 | + --exclude "audit_log,event_log" |
| 35 | +
|
| 36 | + # Mask PII columns |
| 37 | + fixturize extract --connection "$DB" \ |
| 38 | + --root "organizations WHERE id = 42" \ |
| 39 | + --mask "auth.users.email='user_' || id || '@test.com'" \ |
| 40 | + --mask "auth.users.name='User ' || id" |
| 41 | +
|
| 42 | + # Preview without writing |
| 43 | + fixturize extract --connection "$DB" \ |
| 44 | + --root "users LIMIT 5" --dry-run`, |
| 45 | + RunE: runExtract, |
| 46 | + } |
| 47 | + |
| 48 | + extractConn string |
| 49 | + extractRoot string |
| 50 | + extractSchema string |
| 51 | + extractOutput string |
| 52 | + extractLimit int |
| 53 | + extractDepth int |
| 54 | + extractInclude string |
| 55 | + extractExclude string |
| 56 | + extractMask []string |
| 57 | + extractStatementTimeout int |
| 58 | + extractDryRun bool |
| 59 | +) |
| 60 | + |
| 61 | +func init() { |
| 62 | + RootCmd.AddCommand(extractCmd) |
| 63 | + |
| 64 | + extractCmd.Flags().StringVar(&extractConn, "connection", "", "PostgreSQL connection string (required)") |
| 65 | + extractCmd.Flags().StringVar(&extractRoot, "root", "", "Root table + optional WHERE/ORDER BY/LIMIT (required)") |
| 66 | + extractCmd.Flags().StringVar(&extractSchema, "schema", "public", "Default schema for unqualified names") |
| 67 | + extractCmd.Flags().StringVarP(&extractOutput, "output", "o", "", "Output file path (default: extracted.json)") |
| 68 | + extractCmd.Flags().IntVar(&extractLimit, "limit", 0, "Max rows per child table (0 = unlimited)") |
| 69 | + extractCmd.Flags().IntVar(&extractDepth, "depth", 0, "Max FK hops from root (0 = follow everything)") |
| 70 | + extractCmd.Flags().StringVar(&extractInclude, "include", "", "Extra tables to include (comma-separated)") |
| 71 | + extractCmd.Flags().StringVar(&extractExclude, "exclude", "", "Tables to skip (comma-separated)") |
| 72 | + extractCmd.Flags().StringArrayVar(&extractMask, "mask", nil, "Mask column with SQL expression (table.column=expr, repeatable)") |
| 73 | + extractCmd.Flags().IntVar(&extractStatementTimeout, "statement-timeout", 30, "Per-statement timeout in seconds") |
| 74 | + extractCmd.Flags().BoolVar(&extractDryRun, "dry-run", false, "Print JSON to stdout, don't write file") |
| 75 | + extractCmd.MarkFlagRequired("connection") |
| 76 | + extractCmd.MarkFlagRequired("root") |
| 77 | +} |
| 78 | + |
| 79 | +func runExtract(cmd *cobra.Command, args []string) error { |
| 80 | + db, err := fixturize.OpenDB(extractConn) |
| 81 | + if err != nil { |
| 82 | + return fmt.Errorf("failed to connect to database: %w", err) |
| 83 | + } |
| 84 | + defer db.Close() |
| 85 | + |
| 86 | + options := &fixturize.ExtractOptions{ |
| 87 | + Connection: extractConn, |
| 88 | + Root: extractRoot, |
| 89 | + Schema: extractSchema, |
| 90 | + Output: extractOutput, |
| 91 | + Limit: extractLimit, |
| 92 | + Depth: extractDepth, |
| 93 | + Include: parseCommaSeparated(extractInclude), |
| 94 | + Exclude: parseCommaSeparated(extractExclude), |
| 95 | + Mask: extractMask, |
| 96 | + StatementTimeout: extractStatementTimeout, |
| 97 | + DryRun: extractDryRun, |
| 98 | + } |
| 99 | + |
| 100 | + // Delete previous output before extraction so stale data can't persist on failure |
| 101 | + if !extractDryRun && extractOutput != "" { |
| 102 | + os.Remove(extractOutput) |
| 103 | + } |
| 104 | + |
| 105 | + extractor := fixturize.NewExtractor(db, options) |
| 106 | + result, err := extractor.Extract() |
| 107 | + if err != nil { |
| 108 | + return fmt.Errorf("extraction failed: %w", err) |
| 109 | + } |
| 110 | + |
| 111 | + for _, w := range result.Warnings { |
| 112 | + fmt.Fprintf(os.Stderr, "Warning: %s\n", w) |
| 113 | + } |
| 114 | + |
| 115 | + if extractDryRun { |
| 116 | + fmt.Println(string(result.JSON)) |
| 117 | + return nil |
| 118 | + } |
| 119 | + |
| 120 | + outputPath := extractOutput |
| 121 | + if outputPath == "" { |
| 122 | + outputPath = "extracted.json" |
| 123 | + } |
| 124 | + |
| 125 | + dir := filepath.Dir(outputPath) |
| 126 | + if dir != "." && dir != "" { |
| 127 | + if err := os.MkdirAll(dir, 0755); err != nil { |
| 128 | + return fmt.Errorf("failed to create directory: %w", err) |
| 129 | + } |
| 130 | + } |
| 131 | + |
| 132 | + if err := os.WriteFile(outputPath, result.JSON, 0644); err != nil { |
| 133 | + return fmt.Errorf("failed to write file: %w", err) |
| 134 | + } |
| 135 | + |
| 136 | + fmt.Printf("Fixture written to: %s\n", outputPath) |
| 137 | + return nil |
| 138 | +} |
| 139 | + |
| 140 | +func parseCommaSeparated(s string) []string { |
| 141 | + if s == "" { |
| 142 | + return nil |
| 143 | + } |
| 144 | + var parts []string |
| 145 | + start := 0 |
| 146 | + for i := 0; i < len(s); i++ { |
| 147 | + if s[i] == ',' { |
| 148 | + p := trimSpace(s[start:i]) |
| 149 | + if p != "" { |
| 150 | + parts = append(parts, p) |
| 151 | + } |
| 152 | + start = i + 1 |
| 153 | + } |
| 154 | + } |
| 155 | + p := trimSpace(s[start:]) |
| 156 | + if p != "" { |
| 157 | + parts = append(parts, p) |
| 158 | + } |
| 159 | + return parts |
| 160 | +} |
| 161 | + |
| 162 | +func trimSpace(s string) string { |
| 163 | + start := 0 |
| 164 | + end := len(s) |
| 165 | + for start < end && (s[start] == ' ' || s[start] == '\t') { |
| 166 | + start++ |
| 167 | + } |
| 168 | + for end > start && (s[end-1] == ' ' || s[end-1] == '\t') { |
| 169 | + end-- |
| 170 | + } |
| 171 | + return s[start:end] |
| 172 | +} |
0 commit comments