|
5 | 5 | "context" |
6 | 6 | "fmt" |
7 | 7 | "os/exec" |
| 8 | + "strings" |
8 | 9 | "time" |
9 | 10 |
|
10 | 11 | "github.com/pastelocal/pastelocal/internal/clipboard" |
@@ -122,3 +123,130 @@ func (p *ProcessorPipeline) runCommand(ctx context.Context, name, command string |
122 | 123 | func (p *ProcessorPipeline) StepCount() int { |
123 | 124 | return len(p.chain) |
124 | 125 | } |
| 126 | + |
| 127 | +// --- VisionPaste analysis pipeline (v1) --- |
| 128 | + |
| 129 | +// AnalysisResult holds text metadata extracted from an image via the vision pipeline. |
| 130 | +type AnalysisResult struct { |
| 131 | + OCRText string |
| 132 | + Description string |
| 133 | +} |
| 134 | + |
| 135 | +// AnalysisPipeline runs configured external commands to produce OCR / descriptions |
| 136 | +// from screenshot images. Commands receive PNG bytes on stdin and must emit |
| 137 | +// useful text on stdout. Results are best-effort; failures are logged and ignored |
| 138 | +// (fail-open) so that clipboard flow is never blocked by analysis. |
| 139 | +type AnalysisPipeline struct { |
| 140 | + chain []analysisStep |
| 141 | + timeout time.Duration |
| 142 | + logger interface { |
| 143 | + Error(string, ...interface{}) |
| 144 | + Warn(string, ...interface{}) |
| 145 | + } |
| 146 | +} |
| 147 | + |
| 148 | +type analysisStep struct { |
| 149 | + name string |
| 150 | + command string |
| 151 | +} |
| 152 | + |
| 153 | +// NewAnalysisPipeline builds from config (parallel to NewProcessorPipeline). |
| 154 | +func NewAnalysisPipeline(cfg *config.Config, logger interface { |
| 155 | + Error(string, ...interface{}) |
| 156 | + Warn(string, ...interface{}) |
| 157 | +}) *AnalysisPipeline { |
| 158 | + if !cfg.Vision.Enabled || len(cfg.Vision.Chain) == 0 { |
| 159 | + return &AnalysisPipeline{timeout: time.Duration(cfg.Vision.Timeout) * time.Second, logger: logger} |
| 160 | + } |
| 161 | + timeout := time.Duration(cfg.Vision.Timeout) * time.Second |
| 162 | + if timeout == 0 { |
| 163 | + timeout = 15 * time.Second |
| 164 | + } |
| 165 | + |
| 166 | + var steps []analysisStep |
| 167 | + for _, v := range cfg.Vision.Chain { |
| 168 | + steps = append(steps, analysisStep{ |
| 169 | + name: v.Name, |
| 170 | + command: v.Command, |
| 171 | + }) |
| 172 | + } |
| 173 | + return &AnalysisPipeline{ |
| 174 | + chain: steps, |
| 175 | + timeout: timeout, |
| 176 | + logger: logger, |
| 177 | + } |
| 178 | +} |
| 179 | + |
| 180 | +// Analyze runs the analysis chain on png content and returns collected results. |
| 181 | +// Returns nil if disabled, not an image, or no useful output produced. |
| 182 | +// Errors from individual steps are logged as warnings and skipped (fail-open). |
| 183 | +func (a *AnalysisPipeline) Analyze(ctx context.Context, content *clipboard.Content) *AnalysisResult { |
| 184 | + if len(a.chain) == 0 || content == nil || content.Format != "png" || len(content.Data) == 0 { |
| 185 | + return nil |
| 186 | + } |
| 187 | + |
| 188 | + res := &AnalysisResult{} |
| 189 | + for _, step := range a.chain { |
| 190 | + text, err := a.runTextCommand(ctx, step.name, step.command, content.Data) |
| 191 | + if err != nil { |
| 192 | + a.logger.Warn("analysis step failed, skipping", "step", step.name, "error", err) |
| 193 | + continue |
| 194 | + } |
| 195 | + text = strings.TrimSpace(text) |
| 196 | + if text == "" { |
| 197 | + continue |
| 198 | + } |
| 199 | + // Map step name to result field. Unknown names go to OCRText with prefix. |
| 200 | + switch strings.ToLower(step.name) { |
| 201 | + case "ocr", "tesseract", "text", "extract": |
| 202 | + if res.OCRText != "" { |
| 203 | + res.OCRText += "\n" |
| 204 | + } |
| 205 | + res.OCRText += text |
| 206 | + case "describe", "caption", "summary", "vision", "alt": |
| 207 | + if res.Description != "" { |
| 208 | + res.Description += " " |
| 209 | + } |
| 210 | + res.Description += text |
| 211 | + default: |
| 212 | + if res.OCRText != "" { |
| 213 | + res.OCRText += "\n" |
| 214 | + } |
| 215 | + res.OCRText += step.name + ": " + text |
| 216 | + } |
| 217 | + } |
| 218 | + |
| 219 | + if res.OCRText == "" && res.Description == "" { |
| 220 | + return nil |
| 221 | + } |
| 222 | + return res |
| 223 | +} |
| 224 | + |
| 225 | +// runTextCommand executes a shell command with image data on stdin and returns |
| 226 | +// the stdout as text (analysis output). Mirrors the processor runCommand pattern |
| 227 | +// but for text-producing commands and with slightly longer default timeout. |
| 228 | +func (a *AnalysisPipeline) runTextCommand(ctx context.Context, name, command string, data []byte) (string, error) { |
| 229 | + procCtx, cancel := context.WithTimeout(ctx, a.timeout) |
| 230 | + defer cancel() |
| 231 | + |
| 232 | + cmd := exec.CommandContext(procCtx, "sh", "-c", command) |
| 233 | + cmd.Stdin = bytes.NewReader(data) |
| 234 | + var stdout, stderr bytes.Buffer |
| 235 | + cmd.Stdout = &stdout |
| 236 | + cmd.Stderr = &stderr |
| 237 | + |
| 238 | + if err := cmd.Run(); err != nil { |
| 239 | + return "", fmt.Errorf("analysis %q failed: %w: %s", name, err, stderr.String()) |
| 240 | + } |
| 241 | + |
| 242 | + result := strings.TrimSpace(stdout.String()) |
| 243 | + if result == "" { |
| 244 | + return "", fmt.Errorf("analysis %q produced no output", name) |
| 245 | + } |
| 246 | + return result, nil |
| 247 | +} |
| 248 | + |
| 249 | +// AnalysisStepCount returns how many analysis steps are configured. |
| 250 | +func (a *AnalysisPipeline) AnalysisStepCount() int { |
| 251 | + return len(a.chain) |
| 252 | +} |
0 commit comments