|
| 1 | +package cmd |
| 2 | + |
| 3 | +import ( |
| 4 | + "encoding/json" |
| 5 | + "fmt" |
| 6 | + "io" |
| 7 | + "os" |
| 8 | + "strings" |
| 9 | + |
| 10 | + "github.com/GrayCodeAI/tok" |
| 11 | + "github.com/spf13/cobra" |
| 12 | +) |
| 13 | + |
| 14 | +// hawk embeds the tok library directly (see internal/engine/token/tokenizer.go). |
| 15 | +// These verbs expose tok's compression, token-estimation, and secret-scanning |
| 16 | +// surface through the hawk CLI — tok ships no standalone binary. |
| 17 | + |
| 18 | +var ( |
| 19 | + tokInput string |
| 20 | + tokFormat string |
| 21 | + |
| 22 | + // compress flags |
| 23 | + tokIntensity string |
| 24 | + tokBudget int |
| 25 | + tokPrompt bool |
| 26 | + tokStats bool |
| 27 | + |
| 28 | + // estimate flags |
| 29 | + tokModel string |
| 30 | + |
| 31 | + // scan flags |
| 32 | + tokRedact bool |
| 33 | +) |
| 34 | + |
| 35 | +var tokCmd = &cobra.Command{ |
| 36 | + Use: "tok", |
| 37 | + Short: "Token compression, estimation, and secret scanning (embedded tok library)", |
| 38 | + Long: `tok exposes hawk's embedded token-efficiency library: |
| 39 | +
|
| 40 | + hawk tok compress shrink prose/prompts or fit text to a token budget |
| 41 | + hawk tok estimate count tokens and estimate cost for a model |
| 42 | + hawk tok scan detect (and optionally redact) secrets in text |
| 43 | +
|
| 44 | +Input is read from --input <file>, a trailing argument, or stdin. |
| 45 | +tok has no standalone binary — these verbs run the library in-process.`, |
| 46 | +} |
| 47 | + |
| 48 | +// readTokInput resolves input from --input <file>, the first positional arg, |
| 49 | +// or stdin (in that order). |
| 50 | +func readTokInput(args []string) (string, error) { |
| 51 | + if tokInput != "" { |
| 52 | + b, err := os.ReadFile(tokInput) |
| 53 | + if err != nil { |
| 54 | + return "", fmt.Errorf("read --input %q: %w", tokInput, err) |
| 55 | + } |
| 56 | + return string(b), nil |
| 57 | + } |
| 58 | + if len(args) > 0 { |
| 59 | + return strings.Join(args, " "), nil |
| 60 | + } |
| 61 | + b, err := io.ReadAll(os.Stdin) |
| 62 | + if err != nil { |
| 63 | + return "", fmt.Errorf("read stdin: %w", err) |
| 64 | + } |
| 65 | + if len(b) == 0 { |
| 66 | + return "", fmt.Errorf("no input: provide text as an argument, --input <file>, or via stdin") |
| 67 | + } |
| 68 | + return string(b), nil |
| 69 | +} |
| 70 | + |
| 71 | +var tokCompressCmd = &cobra.Command{ |
| 72 | + Use: "compress [text]", |
| 73 | + Short: "Compress prompts/prose or fit text to a token budget", |
| 74 | + Long: `Compress text with the tok pipeline. |
| 75 | +
|
| 76 | +By default uses prompt/prose compression at the chosen --intensity (lite, full, |
| 77 | +ultra). Pass --budget to instead run the full output pipeline targeting a token |
| 78 | +budget. Use --stats for a savings summary.`, |
| 79 | + Args: cobra.ArbitraryArgs, |
| 80 | + RunE: func(cmd *cobra.Command, args []string) error { |
| 81 | + text, err := readTokInput(args) |
| 82 | + if err != nil { |
| 83 | + return err |
| 84 | + } |
| 85 | + |
| 86 | + // --budget runs the token-budget output pipeline, unless --prompt forces |
| 87 | + // prose/prompt compression. |
| 88 | + if tokBudget > 0 && !tokPrompt { |
| 89 | + out, stats := tok.Compress(text, tok.WithBudget(tokBudget)) |
| 90 | + return emitCompress(cmd, out, &stats, nil) |
| 91 | + } |
| 92 | + |
| 93 | + intensity, err := parseIntensity(tokIntensity) |
| 94 | + if err != nil { |
| 95 | + return err |
| 96 | + } |
| 97 | + out, pstats := tok.PromptCompress(text, intensity) |
| 98 | + return emitCompress(cmd, out, nil, &pstats) |
| 99 | + }, |
| 100 | +} |
| 101 | + |
| 102 | +func parseIntensity(s string) (tok.Intensity, error) { |
| 103 | + switch strings.ToLower(strings.TrimSpace(s)) { |
| 104 | + case "", "full": |
| 105 | + return tok.IntensityFull, nil |
| 106 | + case "lite": |
| 107 | + return tok.IntensityLite, nil |
| 108 | + case "ultra": |
| 109 | + return tok.IntensityUltra, nil |
| 110 | + default: |
| 111 | + return tok.IntensityFull, fmt.Errorf("invalid --intensity %q: use lite, full, or ultra", s) |
| 112 | + } |
| 113 | +} |
| 114 | + |
| 115 | +// emitCompress prints compressed output, optionally with stats. Exactly one of |
| 116 | +// stats / pstats is non-nil depending on which compression path ran. |
| 117 | +func emitCompress(cmd *cobra.Command, out string, stats *tok.Stats, pstats *tok.PromptStats) error { |
| 118 | + if tokFormat == "json" { |
| 119 | + payload := map[string]any{"compressed": out} |
| 120 | + switch { |
| 121 | + case stats != nil: |
| 122 | + payload["stats"] = stats |
| 123 | + case pstats != nil: |
| 124 | + payload["stats"] = pstats |
| 125 | + } |
| 126 | + return writeJSON(cmd, payload) |
| 127 | + } |
| 128 | + |
| 129 | + cmd.Println(out) |
| 130 | + if tokStats { |
| 131 | + switch { |
| 132 | + case stats != nil: |
| 133 | + cmd.Println() |
| 134 | + cmd.Print(tok.FormatStats(*stats)) |
| 135 | + case pstats != nil: |
| 136 | + cmd.Println() |
| 137 | + cmd.Printf("intensity=%v bytes %d → %d (%.1f%% off)\n", |
| 138 | + pstats.Intensity, pstats.OriginalBytes, pstats.CompressedBytes, pstats.PercentOff) |
| 139 | + } |
| 140 | + } |
| 141 | + return nil |
| 142 | +} |
| 143 | + |
| 144 | +var tokEstimateCmd = &cobra.Command{ |
| 145 | + Use: "estimate [text]", |
| 146 | + Short: "Estimate token count and cost for a model", |
| 147 | + Args: cobra.ArbitraryArgs, |
| 148 | + RunE: func(cmd *cobra.Command, args []string) error { |
| 149 | + text, err := readTokInput(args) |
| 150 | + if err != nil { |
| 151 | + return err |
| 152 | + } |
| 153 | + |
| 154 | + tokens := tok.EstimateTokensForModel(text, tokModel) |
| 155 | + |
| 156 | + var costUSD float64 |
| 157 | + var priced bool |
| 158 | + if pricing, ok := tok.GetModelPricing(tokModel); ok { |
| 159 | + priced = true |
| 160 | + costUSD = float64(tokens) / 1000.0 * pricing.InputPricePer1K |
| 161 | + } |
| 162 | + |
| 163 | + if tokFormat == "json" { |
| 164 | + payload := map[string]any{"tokens": tokens, "model": tokModel} |
| 165 | + if priced { |
| 166 | + payload["input_cost_usd"] = costUSD |
| 167 | + } |
| 168 | + return writeJSON(cmd, payload) |
| 169 | + } |
| 170 | + |
| 171 | + cmd.Printf("%d tokens (model: %s)\n", tokens, tokModel) |
| 172 | + if priced { |
| 173 | + cmd.Printf("≈ $%.6f input cost\n", costUSD) |
| 174 | + } else { |
| 175 | + cmd.Printf("(no pricing registered for %q — token count only)\n", tokModel) |
| 176 | + } |
| 177 | + return nil |
| 178 | + }, |
| 179 | +} |
| 180 | + |
| 181 | +var tokScanCmd = &cobra.Command{ |
| 182 | + Use: "scan [text]", |
| 183 | + Short: "Detect (and optionally redact) secrets in text", |
| 184 | + Long: "Scan input for credentials, keys, and other secrets. Use --redact to print the input with secrets masked.", |
| 185 | + Args: cobra.ArbitraryArgs, |
| 186 | + RunE: func(cmd *cobra.Command, args []string) error { |
| 187 | + text, err := readTokInput(args) |
| 188 | + if err != nil { |
| 189 | + return err |
| 190 | + } |
| 191 | + |
| 192 | + detector := tok.NewSecretDetector() |
| 193 | + |
| 194 | + if tokRedact { |
| 195 | + redacted := detector.RedactSecrets(text) |
| 196 | + if tokFormat == "json" { |
| 197 | + return writeJSON(cmd, map[string]any{"redacted": redacted}) |
| 198 | + } |
| 199 | + cmd.Print(redacted) |
| 200 | + if !strings.HasSuffix(redacted, "\n") { |
| 201 | + cmd.Println() |
| 202 | + } |
| 203 | + return nil |
| 204 | + } |
| 205 | + |
| 206 | + findings := detector.DetectSecrets(text) |
| 207 | + |
| 208 | + if tokFormat == "json" { |
| 209 | + return writeJSON(cmd, map[string]any{ |
| 210 | + "count": len(findings), |
| 211 | + "secrets": findings, |
| 212 | + }) |
| 213 | + } |
| 214 | + |
| 215 | + if len(findings) == 0 { |
| 216 | + cmd.Println("no secrets detected") |
| 217 | + return nil |
| 218 | + } |
| 219 | + cmd.Printf("%d secret(s) detected:\n", len(findings)) |
| 220 | + for _, f := range findings { |
| 221 | + cmd.Printf(" - %s: %s\n", f.Type, f.Masked) |
| 222 | + } |
| 223 | + // Non-zero exit so callers (CI, hooks) can gate on detection. |
| 224 | + return fmt.Errorf("tok scan: %d secret(s) detected", len(findings)) |
| 225 | + }, |
| 226 | +} |
| 227 | + |
| 228 | +// writeJSON encodes v as indented JSON to stdout. |
| 229 | +func writeJSON(cmd *cobra.Command, v any) error { |
| 230 | + enc := json.NewEncoder(cmd.OutOrStdout()) |
| 231 | + enc.SetIndent("", " ") |
| 232 | + return enc.Encode(v) |
| 233 | +} |
| 234 | + |
| 235 | +func init() { |
| 236 | + // Shared input/output flags on each subcommand. |
| 237 | + for _, c := range []*cobra.Command{tokCompressCmd, tokEstimateCmd, tokScanCmd} { |
| 238 | + c.Flags().StringVar(&tokInput, "input", "", "read input from this file instead of stdin/args") |
| 239 | + c.Flags().StringVar(&tokFormat, "format", "text", "output format: text, json") |
| 240 | + } |
| 241 | + |
| 242 | + tokCompressCmd.Flags().StringVar(&tokIntensity, "intensity", "full", "prompt compression intensity: lite, full, ultra") |
| 243 | + tokCompressCmd.Flags().IntVar(&tokBudget, "budget", 0, "compress to fit within this many tokens (uses the output pipeline)") |
| 244 | + tokCompressCmd.Flags().BoolVar(&tokPrompt, "prompt", false, "force prompt/prose compression even when --budget is set") |
| 245 | + tokCompressCmd.Flags().BoolVar(&tokStats, "stats", false, "print a compression savings summary") |
| 246 | + |
| 247 | + tokEstimateCmd.Flags().StringVar(&tokModel, "model", "gpt-4o", "model to estimate tokens/cost for") |
| 248 | + |
| 249 | + tokScanCmd.Flags().BoolVar(&tokRedact, "redact", false, "print input with secrets masked instead of listing them") |
| 250 | + |
| 251 | + tokCmd.AddCommand(tokCompressCmd, tokEstimateCmd, tokScanCmd) |
| 252 | + rootCmd.AddCommand(tokCmd) |
| 253 | +} |
0 commit comments