Skip to content

Commit 8c914f9

Browse files
Patel230claude
andcommitted
feat(tok): add hawk tok compress/estimate/scan subcommand
Expose the embedded tok library through the hawk CLI so token compression, estimation, and secret scanning work via `hawk tok` with no standalone tok binary — matching how every other ecosystem tool is surfaced (hawk yaad, hawk models, hawk inspect/sight/trace). - compress: prose/prompt compression (--intensity lite/full/ultra) or token-budget pipeline (--budget), with optional --stats - estimate: token count + per-model cost (--model) - scan: detect secrets (non-zero exit for CI gating) or --redact - input via --input <file>, trailing arg, or stdin; --format text|json Wraps existing tok.* functions directly (no bridge package needed — hawk already imports tok in internal/engine/token). Makes the "provided via Hawk" references in tok's docs accurate. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
1 parent 0104401 commit 8c914f9

1 file changed

Lines changed: 253 additions & 0 deletions

File tree

cmd/tok.go

Lines changed: 253 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,253 @@
1+
package cmd
2+
3+
import (
4+
"encoding/json"
5+
"fmt"
6+
"io"
7+
"os"
8+
"strings"
9+
10+
"github.com/GrayCodeAI/tok"
11+
"github.com/spf13/cobra"
12+
)
13+
14+
// hawk embeds the tok library directly (see internal/engine/token/tokenizer.go).
15+
// These verbs expose tok's compression, token-estimation, and secret-scanning
16+
// surface through the hawk CLI — tok ships no standalone binary.
17+
18+
var (
19+
tokInput string
20+
tokFormat string
21+
22+
// compress flags
23+
tokIntensity string
24+
tokBudget int
25+
tokPrompt bool
26+
tokStats bool
27+
28+
// estimate flags
29+
tokModel string
30+
31+
// scan flags
32+
tokRedact bool
33+
)
34+
35+
var tokCmd = &cobra.Command{
36+
Use: "tok",
37+
Short: "Token compression, estimation, and secret scanning (embedded tok library)",
38+
Long: `tok exposes hawk's embedded token-efficiency library:
39+
40+
hawk tok compress shrink prose/prompts or fit text to a token budget
41+
hawk tok estimate count tokens and estimate cost for a model
42+
hawk tok scan detect (and optionally redact) secrets in text
43+
44+
Input is read from --input <file>, a trailing argument, or stdin.
45+
tok has no standalone binary — these verbs run the library in-process.`,
46+
}
47+
48+
// readTokInput resolves input from --input <file>, the first positional arg,
49+
// or stdin (in that order).
50+
func readTokInput(args []string) (string, error) {
51+
if tokInput != "" {
52+
b, err := os.ReadFile(tokInput)
53+
if err != nil {
54+
return "", fmt.Errorf("read --input %q: %w", tokInput, err)
55+
}
56+
return string(b), nil
57+
}
58+
if len(args) > 0 {
59+
return strings.Join(args, " "), nil
60+
}
61+
b, err := io.ReadAll(os.Stdin)
62+
if err != nil {
63+
return "", fmt.Errorf("read stdin: %w", err)
64+
}
65+
if len(b) == 0 {
66+
return "", fmt.Errorf("no input: provide text as an argument, --input <file>, or via stdin")
67+
}
68+
return string(b), nil
69+
}
70+
71+
var tokCompressCmd = &cobra.Command{
72+
Use: "compress [text]",
73+
Short: "Compress prompts/prose or fit text to a token budget",
74+
Long: `Compress text with the tok pipeline.
75+
76+
By default uses prompt/prose compression at the chosen --intensity (lite, full,
77+
ultra). Pass --budget to instead run the full output pipeline targeting a token
78+
budget. Use --stats for a savings summary.`,
79+
Args: cobra.ArbitraryArgs,
80+
RunE: func(cmd *cobra.Command, args []string) error {
81+
text, err := readTokInput(args)
82+
if err != nil {
83+
return err
84+
}
85+
86+
// --budget runs the token-budget output pipeline, unless --prompt forces
87+
// prose/prompt compression.
88+
if tokBudget > 0 && !tokPrompt {
89+
out, stats := tok.Compress(text, tok.WithBudget(tokBudget))
90+
return emitCompress(cmd, out, &stats, nil)
91+
}
92+
93+
intensity, err := parseIntensity(tokIntensity)
94+
if err != nil {
95+
return err
96+
}
97+
out, pstats := tok.PromptCompress(text, intensity)
98+
return emitCompress(cmd, out, nil, &pstats)
99+
},
100+
}
101+
102+
func parseIntensity(s string) (tok.Intensity, error) {
103+
switch strings.ToLower(strings.TrimSpace(s)) {
104+
case "", "full":
105+
return tok.IntensityFull, nil
106+
case "lite":
107+
return tok.IntensityLite, nil
108+
case "ultra":
109+
return tok.IntensityUltra, nil
110+
default:
111+
return tok.IntensityFull, fmt.Errorf("invalid --intensity %q: use lite, full, or ultra", s)
112+
}
113+
}
114+
115+
// emitCompress prints compressed output, optionally with stats. Exactly one of
116+
// stats / pstats is non-nil depending on which compression path ran.
117+
func emitCompress(cmd *cobra.Command, out string, stats *tok.Stats, pstats *tok.PromptStats) error {
118+
if tokFormat == "json" {
119+
payload := map[string]any{"compressed": out}
120+
switch {
121+
case stats != nil:
122+
payload["stats"] = stats
123+
case pstats != nil:
124+
payload["stats"] = pstats
125+
}
126+
return writeJSON(cmd, payload)
127+
}
128+
129+
cmd.Println(out)
130+
if tokStats {
131+
switch {
132+
case stats != nil:
133+
cmd.Println()
134+
cmd.Print(tok.FormatStats(*stats))
135+
case pstats != nil:
136+
cmd.Println()
137+
cmd.Printf("intensity=%v bytes %d → %d (%.1f%% off)\n",
138+
pstats.Intensity, pstats.OriginalBytes, pstats.CompressedBytes, pstats.PercentOff)
139+
}
140+
}
141+
return nil
142+
}
143+
144+
var tokEstimateCmd = &cobra.Command{
145+
Use: "estimate [text]",
146+
Short: "Estimate token count and cost for a model",
147+
Args: cobra.ArbitraryArgs,
148+
RunE: func(cmd *cobra.Command, args []string) error {
149+
text, err := readTokInput(args)
150+
if err != nil {
151+
return err
152+
}
153+
154+
tokens := tok.EstimateTokensForModel(text, tokModel)
155+
156+
var costUSD float64
157+
var priced bool
158+
if pricing, ok := tok.GetModelPricing(tokModel); ok {
159+
priced = true
160+
costUSD = float64(tokens) / 1000.0 * pricing.InputPricePer1K
161+
}
162+
163+
if tokFormat == "json" {
164+
payload := map[string]any{"tokens": tokens, "model": tokModel}
165+
if priced {
166+
payload["input_cost_usd"] = costUSD
167+
}
168+
return writeJSON(cmd, payload)
169+
}
170+
171+
cmd.Printf("%d tokens (model: %s)\n", tokens, tokModel)
172+
if priced {
173+
cmd.Printf("≈ $%.6f input cost\n", costUSD)
174+
} else {
175+
cmd.Printf("(no pricing registered for %q — token count only)\n", tokModel)
176+
}
177+
return nil
178+
},
179+
}
180+
181+
var tokScanCmd = &cobra.Command{
182+
Use: "scan [text]",
183+
Short: "Detect (and optionally redact) secrets in text",
184+
Long: "Scan input for credentials, keys, and other secrets. Use --redact to print the input with secrets masked.",
185+
Args: cobra.ArbitraryArgs,
186+
RunE: func(cmd *cobra.Command, args []string) error {
187+
text, err := readTokInput(args)
188+
if err != nil {
189+
return err
190+
}
191+
192+
detector := tok.NewSecretDetector()
193+
194+
if tokRedact {
195+
redacted := detector.RedactSecrets(text)
196+
if tokFormat == "json" {
197+
return writeJSON(cmd, map[string]any{"redacted": redacted})
198+
}
199+
cmd.Print(redacted)
200+
if !strings.HasSuffix(redacted, "\n") {
201+
cmd.Println()
202+
}
203+
return nil
204+
}
205+
206+
findings := detector.DetectSecrets(text)
207+
208+
if tokFormat == "json" {
209+
return writeJSON(cmd, map[string]any{
210+
"count": len(findings),
211+
"secrets": findings,
212+
})
213+
}
214+
215+
if len(findings) == 0 {
216+
cmd.Println("no secrets detected")
217+
return nil
218+
}
219+
cmd.Printf("%d secret(s) detected:\n", len(findings))
220+
for _, f := range findings {
221+
cmd.Printf(" - %s: %s\n", f.Type, f.Masked)
222+
}
223+
// Non-zero exit so callers (CI, hooks) can gate on detection.
224+
return fmt.Errorf("tok scan: %d secret(s) detected", len(findings))
225+
},
226+
}
227+
228+
// writeJSON encodes v as indented JSON to stdout.
229+
func writeJSON(cmd *cobra.Command, v any) error {
230+
enc := json.NewEncoder(cmd.OutOrStdout())
231+
enc.SetIndent("", " ")
232+
return enc.Encode(v)
233+
}
234+
235+
func init() {
236+
// Shared input/output flags on each subcommand.
237+
for _, c := range []*cobra.Command{tokCompressCmd, tokEstimateCmd, tokScanCmd} {
238+
c.Flags().StringVar(&tokInput, "input", "", "read input from this file instead of stdin/args")
239+
c.Flags().StringVar(&tokFormat, "format", "text", "output format: text, json")
240+
}
241+
242+
tokCompressCmd.Flags().StringVar(&tokIntensity, "intensity", "full", "prompt compression intensity: lite, full, ultra")
243+
tokCompressCmd.Flags().IntVar(&tokBudget, "budget", 0, "compress to fit within this many tokens (uses the output pipeline)")
244+
tokCompressCmd.Flags().BoolVar(&tokPrompt, "prompt", false, "force prompt/prose compression even when --budget is set")
245+
tokCompressCmd.Flags().BoolVar(&tokStats, "stats", false, "print a compression savings summary")
246+
247+
tokEstimateCmd.Flags().StringVar(&tokModel, "model", "gpt-4o", "model to estimate tokens/cost for")
248+
249+
tokScanCmd.Flags().BoolVar(&tokRedact, "redact", false, "print input with secrets masked instead of listing them")
250+
251+
tokCmd.AddCommand(tokCompressCmd, tokEstimateCmd, tokScanCmd)
252+
rootCmd.AddCommand(tokCmd)
253+
}

0 commit comments

Comments
 (0)