feat(dispatch): detectar URLs de mídia e enviá-las no postback

DavidsonGomes · DavidsonGomes · commit d0b3f598fe4b · 2026-06-08T21:38:02.000-03:00
O dispatch enviava toda resposta como content_type text, então links de mídia
chegavam como texto. Agora extrai URLs de mídia (por extensão) do conteúdo ANTES
de segmentar, envia o texto residual segmentado normalmente, e despacha os
attachments num postback dedicado ao final (evita anexar mídia ao part de texto
errado em respostas multi-segmento).

- postbackRequest ganha campo Attachments []postbackAttachment {url, file_type}.
- extractMediaURLs/mediaFileType espelham o Ruby MediaTypeDetector (mesmas
  extensões, document→file, casa querystring/fragment). O CRM mantém detecção
  própria como fallback, então divergência Go/Ruby degrada sem perder mídia.

Realinhado dispatch_engine_test.go (órfão de refactor anterior do NewDispatchEngine)
e adicionado media_detection_test.go (table tests de detecção/extração).
Testes verdes.

DEPLOY: subir DEPOIS do evo-ai-crm-community (que processa os attachments).
diff --git a/pkg/dispatch/service/dispatch_engine.go b/pkg/dispatch/service/dispatch_engine.go
@@ -8,6 +8,7 @@ import (
 	"io"
 	"log/slog"
 	"net/http"
+	"regexp"
 	"strings"
 	"time"
 	"unicode/utf8"
@@ -32,9 +33,18 @@ type DispatchEngine interface {
 
 // postbackRequest is the JSON body for each HTTP POST to the postback endpoint.
 type postbackRequest struct {
-	Content     string `json:"content"`
-	MessageType string `json:"message_type"`
-	ContentType string `json:"content_type"`
+	Content     string               `json:"content"`
+	MessageType string               `json:"message_type"`
+	ContentType string               `json:"content_type"`
+	Attachments []postbackAttachment `json:"attachments,omitempty"`
+}
+
+// postbackAttachment is a media URL detected in the AI response, sent to the CRM
+// so it can download and render it as real media instead of a plain text link.
+// FileType matches the CRM Attachment enum: image / audio / video / file.
+type postbackAttachment struct {
+	URL      string `json:"url"`
+	FileType string `json:"file_type"`
 }
 
 type dispatchEngineImpl struct {
@@ -63,7 +73,13 @@ func (d *dispatchEngineImpl) Dispatch(
 	cfg            model.BotConfig,
 	postbackURL    string,
 ) error {
-	parts := segmentContent(content, cfg)
+	// Pull media URLs out of the full response BEFORE segmenting, so media is
+	// not split across text parts. Media is delivered in a single dedicated
+	// postback after the text parts (see below). The CRM also re-detects media
+	// from the text as a fallback, so this is an optimization, not the only path.
+	residual, atts := extractMediaURLs(content)
+
+	parts := segmentContent(residual, cfg)
 
 	// Prepend signature to the first part (FR-21)
 	if cfg.MessageSignature != "" && len(parts) > 0 {
@@ -73,6 +89,12 @@ func (d *dispatchEngineImpl) Dispatch(
 	start := time.Now()
 
 	for i, part := range parts {
+		// Skip empty residual (e.g. response was only a media URL): the media
+		// postback below still runs.
+		if part == "" {
+			continue
+		}
+
 		// Check cancellation BEFORE sending this part
 		select {
 		case <-ctx.Done():
@@ -85,7 +107,7 @@ func (d *dispatchEngineImpl) Dispatch(
 		default:
 		}
 
-		if err := d.sendPart(ctx, postbackURL, part); err != nil {
+		if err := d.sendPart(ctx, postbackURL, part, nil); err != nil {
 			return fmt.Errorf("pipeline.dispatch.send[%d]: %w", i, err)
 		}
 
@@ -105,21 +127,36 @@ func (d *dispatchEngineImpl) Dispatch(
 		}
 	}
 
+	// Deliver media (if any) in a single dedicated postback after the text.
+	if len(atts) > 0 {
+		select {
+		case <-ctx.Done():
+			return brtErrors.ErrDispatchInterrupted
+		default:
+		}
+		if err := d.sendPart(ctx, postbackURL, "", atts); err != nil {
+			return fmt.Errorf("pipeline.dispatch.send[media]: %w", err)
+		}
+	}
+
 	slog.Info("pipeline.dispatch.completed",
 		"contact_id",      contactID,
 		"conversation_id", conversationID,
 		"duration_ms",     time.Since(start).Milliseconds(),
 		"parts_total",     len(parts),
+		"attachments",     len(atts),
 	)
 	return nil
 }
 
-// sendPart sends a single content part to the postback URL.
-func (d *dispatchEngineImpl) sendPart(ctx context.Context, postbackURL, content string) error {
+// sendPart sends a single content part (and optional media attachments) to the
+// postback URL.
+func (d *dispatchEngineImpl) sendPart(ctx context.Context, postbackURL, content string, atts []postbackAttachment) error {
 	body, err := json.Marshal(postbackRequest{
 		Content:     content,
 		MessageType: "outgoing",
 		ContentType: "text",
+		Attachments: atts,
 	})
 	if err != nil {
 		return fmt.Errorf("marshal: %w", err)
@@ -205,3 +242,69 @@ func segmentContent(content string, cfg model.BotConfig) []string {
 	}
 	return merged
 }
+
+// --- Media URL detection ---------------------------------------------------
+//
+// extractMediaURLs pulls media URLs (by file extension) out of the response
+// text and returns the residual text plus the detected attachments.
+//
+// DRIFT WARNING: this MUST stay in sync with the Ruby
+// AgentBots::MediaTypeDetector (app/services/agent_bots/media_type_detector.rb)
+// in evo-ai-crm-community. The CRM re-detects media from text as a fallback, so
+// a divergence degrades gracefully rather than losing media.
+
+var urlRegex = regexp.MustCompile(`https?://[^\s<>"']+`)
+
+// extension -> file_type (matches Attachment enum; document maps to "file").
+var mediaExtToFileType = func() map[string]string {
+	m := map[string]string{}
+	for _, e := range []string{"jpg", "jpeg", "png", "gif", "bmp", "webp", "svg", "tiff"} {
+		m[e] = "image"
+	}
+	for _, e := range []string{"mp3", "wav", "ogg", "m4a", "aac", "flac"} {
+		m[e] = "audio"
+	}
+	for _, e := range []string{"mp4", "avi", "mov", "wmv", "flv", "mkv", "webm"} {
+		m[e] = "video"
+	}
+	for _, e := range []string{"pdf", "doc", "docx", "xls", "xlsx", "ppt", "pptx", "txt", "rtf", "odt"} {
+		m[e] = "file"
+	}
+	return m
+}()
+
+// trailing punctuation often captured when a URL ends a sentence.
+const trailingPunct = ")].,!?;:"
+
+func extractMediaURLs(content string) (residual string, atts []postbackAttachment) {
+	residual = content
+	for _, rawURL := range urlRegex.FindAllString(content, -1) {
+		url := strings.TrimRight(rawURL, trailingPunct)
+		fileType := mediaFileType(url)
+		if fileType == "" {
+			continue // non-media URL stays in the text
+		}
+		atts = append(atts, postbackAttachment{URL: url, FileType: fileType})
+		residual = strings.Replace(residual, rawURL, "", 1)
+	}
+	return strings.TrimSpace(residual), atts
+}
+
+// mediaFileType returns the Attachment file_type for a URL, or "" if not media.
+// Matches the extension in the PATH, ignoring query string / fragment.
+func mediaFileType(url string) string {
+	path := url
+	if i := strings.IndexAny(path, "?#"); i >= 0 {
+		path = path[:i]
+	}
+	seg := path
+	if i := strings.LastIndex(seg, "/"); i >= 0 {
+		seg = seg[i+1:]
+	}
+	dot := strings.LastIndex(seg, ".")
+	if dot < 0 {
+		return ""
+	}
+	ext := strings.ToLower(seg[dot+1:])
+	return mediaExtToFileType[ext]
+}
diff --git a/pkg/dispatch/service/dispatch_engine_test.go b/pkg/dispatch/service/dispatch_engine_test.go
@@ -42,7 +42,7 @@ func collectParts(t *testing.T) (*httptest.Server, *[]string, *sync.Mutex) {
 func TestDispatch_MultiPart_SignatureOnFirstOnly(t *testing.T) {
 	server, partsPtr, mu := collectParts(t)
 
-	eng := service.NewDispatchEngine()
+	eng := service.NewDispatchEngine("")
 	cfg := model.BotConfig{
 		TextSegmentationEnabled: true,
 		TextSegmentationLimit:   15, // forces multiple parts
@@ -79,7 +79,7 @@ func TestDispatch_MultiPart_SignatureOnFirstOnly(t *testing.T) {
 func TestDispatch_NoSegmentation_SinglePart(t *testing.T) {
 	server, partsPtr, mu := collectParts(t)
 
-	eng := service.NewDispatchEngine()
+	eng := service.NewDispatchEngine("")
 	cfg := model.BotConfig{
 		TextSegmentationEnabled: false,
 		MessageSignature:        "—signature ",
@@ -118,7 +118,7 @@ func TestDispatch_Cancellation_ReturnsInterrupted(t *testing.T) {
 
 	ctx, cancel := context.WithCancel(context.Background())
 
-	eng := service.NewDispatchEngine()
+	eng := service.NewDispatchEngine("")
 	cfg := model.BotConfig{
 		TextSegmentationEnabled: true,
 		TextSegmentationLimit:   5,  // small limit → many parts
@@ -149,7 +149,7 @@ func TestDispatch_Cancellation_ReturnsInterrupted(t *testing.T) {
 func TestDispatch_EmptySignature_NoSuffix(t *testing.T) {
 	server, partsPtr, mu := collectParts(t)
 
-	eng := service.NewDispatchEngine()
+	eng := service.NewDispatchEngine("")
 	cfg := model.BotConfig{
 		TextSegmentationEnabled: false,
 		MessageSignature:        "", // empty — no suffix
@@ -178,7 +178,7 @@ func TestDispatch_NonOKResponse_ReturnsError(t *testing.T) {
 	}))
 	defer server.Close()
 
-	eng := service.NewDispatchEngine()
+	eng := service.NewDispatchEngine("")
 	cfg := model.BotConfig{TextSegmentationEnabled: false}
 
 	err := eng.Dispatch(context.Background(), 8, 8, "some content", cfg, server.URL)
@@ -192,7 +192,7 @@ func TestSegmentContent_MergeDoesNotExceedLimit(t *testing.T) {
 	// would produce "hello world test"(16 runes) > limit=11 → must NOT merge.
 	server, partsPtr, mu := collectParts(t)
 
-	eng := service.NewDispatchEngine()
+	eng := service.NewDispatchEngine("")
 	cfg := model.BotConfig{
 		TextSegmentationEnabled: true,
 		TextSegmentationLimit:   11,
@@ -226,7 +226,7 @@ func TestSegmentContent_RuneAwareLimits(t *testing.T) {
 	// A byte-counting bug would compute 10 bytes > 9 and wrongly split into 2 parts.
 	server, partsPtr, mu := collectParts(t)
 
-	eng := service.NewDispatchEngine()
+	eng := service.NewDispatchEngine("")
 	cfg := model.BotConfig{
 		TextSegmentationEnabled: true,
 		TextSegmentationLimit:   9, // rune limit — "olá mundo" is exactly 9 runes
@@ -262,7 +262,7 @@ func TestDispatch_ValidatesPostBody(t *testing.T) {
 	}))
 	defer server.Close()
 
-	eng := service.NewDispatchEngine()
+	eng := service.NewDispatchEngine("")
 	cfg := model.BotConfig{TextSegmentationEnabled: false}
 
 	if err := eng.Dispatch(context.Background(), 5, 5, "test content", cfg, server.URL); err != nil {
diff --git a/pkg/dispatch/service/media_detection_test.go b/pkg/dispatch/service/media_detection_test.go
@@ -0,0 +1,75 @@
+package service
+
+import (
+	"reflect"
+	"testing"
+)
+
+func TestMediaFileType(t *testing.T) {
+	cases := map[string]string{
+		"https://x/v.mp4":            "video",
+		"https://x/v.MP4":            "video",
+		"https://x/v.mp4?token=abc":  "video", // querystring ignored
+		"https://x/v.mov#t=10":       "video",
+		"https://x/pic.jpg":          "image",
+		"https://x/pic.png":          "image",
+		"https://x/song.mp3":         "audio",
+		"https://x/doc.pdf":          "file", // document -> file
+		"https://x/sheet.xlsx":       "file",
+		"https://site.com/page":      "",     // no extension
+		"https://site.com/page.html": "",     // not media
+		"https://x/no-dot/segment":   "",
+	}
+	for url, want := range cases {
+		if got := mediaFileType(url); got != want {
+			t.Errorf("mediaFileType(%q) = %q, want %q", url, got, want)
+		}
+	}
+}
+
+func TestExtractMediaURLs(t *testing.T) {
+	t.Run("text with trailing media url", func(t *testing.T) {
+		text, atts := extractMediaURLs("Assiste aí https://pedrofelixtreinador.com.br/x/VLS_Atleta.mp4")
+		if text != "Assiste aí" {
+			t.Errorf("text = %q, want %q", text, "Assiste aí")
+		}
+		want := []postbackAttachment{{URL: "https://pedrofelixtreinador.com.br/x/VLS_Atleta.mp4", FileType: "video"}}
+		if !reflect.DeepEqual(atts, want) {
+			t.Errorf("atts = %+v, want %+v", atts, want)
+		}
+	})
+
+	t.Run("media only (residual empty)", func(t *testing.T) {
+		text, atts := extractMediaURLs("https://x/pic.jpg")
+		if text != "" {
+			t.Errorf("text = %q, want empty", text)
+		}
+		if len(atts) != 1 || atts[0].FileType != "image" {
+			t.Errorf("atts = %+v, want one image", atts)
+		}
+	})
+
+	t.Run("non-media url stays in text", func(t *testing.T) {
+		text, atts := extractMediaURLs("veja em https://site.com/pagina")
+		if text != "veja em https://site.com/pagina" {
+			t.Errorf("text = %q, should keep non-media url", text)
+		}
+		if len(atts) != 0 {
+			t.Errorf("atts = %+v, want none", atts)
+		}
+	})
+
+	t.Run("no urls", func(t *testing.T) {
+		text, atts := extractMediaURLs("plain text only")
+		if text != "plain text only" || len(atts) != 0 {
+			t.Errorf("got text=%q atts=%+v", text, atts)
+		}
+	})
+
+	t.Run("trailing punctuation trimmed", func(t *testing.T) {
+		_, atts := extractMediaURLs("olha (https://x/v.mp4).")
+		if len(atts) != 1 || atts[0].URL != "https://x/v.mp4" {
+			t.Errorf("atts = %+v, want trimmed url", atts)
+		}
+	})
+}