From 301e627e6fdd62461575f77237265009aa4a0ea0 Mon Sep 17 00:00:00 2001 From: "fengboyun.evan" Date: Tue, 3 Mar 2026 17:59:38 +0800 Subject: [PATCH 1/2] feat(backend): fix export detail field mismatch --- .../task/service/taskexe/processor/utils.go | 59 +++---------------- .../domain/trace/entity/loop_span/span.go | 16 ++++- .../trace/entity/loop_span/span_test.go | 53 +++++++++++++++++ .../trace/service/trace_export_service.go | 7 ++- 4 files changed, 80 insertions(+), 55 deletions(-) diff --git a/backend/modules/observability/domain/task/service/taskexe/processor/utils.go b/backend/modules/observability/domain/task/service/taskexe/processor/utils.go index aa29bb0c7..f790387f8 100644 --- a/backend/modules/observability/domain/task/service/taskexe/processor/utils.go +++ b/backend/modules/observability/domain/task/service/taskexe/processor/utils.go @@ -24,7 +24,6 @@ import ( "github.com/coze-dev/coze-loop/backend/modules/observability/domain/trace/entity/loop_span" "github.com/coze-dev/coze-loop/backend/pkg/json" "github.com/coze-dev/coze-loop/backend/pkg/logs" - "github.com/coze-dev/cozeloop-go/spec/tracespec" ) func getCategory(taskType task.TaskType) entity.DatasetCategory { @@ -194,7 +193,13 @@ func buildItem(ctx context.Context, span *loop_span.Span, fieldMappings []*task_ logs.CtxInfo(ctx, "Evaluator field key is empty, name:%v", fieldSchema.Name) continue } - value, err := span.ExtractByJsonpath(ctx, mapping.TraceFieldKey, mapping.TraceFieldJsonpath) + var value string + var err error + if fieldSchema.ContentType == entity.ContentType_MultiPart { + value, err = span.ExtractByJsonpathRaw(ctx, mapping.TraceFieldKey, mapping.TraceFieldJsonpath) + } else { + value, err = span.ExtractByJsonpath(ctx, mapping.TraceFieldKey, mapping.TraceFieldJsonpath) + } if err != nil { logs.CtxInfo(ctx, "Extract field failed, err:%v", err) continue @@ -214,53 +219,3 @@ func buildItem(ctx context.Context, span *loop_span.Span, fieldMappings []*task_ } return fieldDatas } - -// Deprecated: use common function entity.GetContentInfo instead -// GetContentInfo todo:[xun]和手动回流的代码逻辑一样,需要抽取公共代码 -func GetContentInfo(ctx context.Context, contentType entity.ContentType, value string) (*common.Content, error) { - var content *common.Content - switch contentType { - case entity.ContentType_MultiPart: - var parts []tracespec.ModelMessagePart - err := json.Unmarshal([]byte(value), &parts) - if err != nil { - logs.CtxInfo(ctx, "Unmarshal multi part failed, err:%v", err) - return nil, err - } - var multiPart []*common.Content - for _, part := range parts { - // 本期仅支持回流图片的多模态数据,非ImageURL信息的,打包放进text - switch part.Type { - case tracespec.ModelMessagePartTypeImage: - if part.ImageURL == nil { - continue - } - multiPart = append(multiPart, &common.Content{ - ContentType: gptr.Of(common.ContentTypeImage), - Image: &common.Image{ - Name: gptr.Of(part.ImageURL.Name), - URL: gptr.Of(part.ImageURL.URL), - }, - }) - case tracespec.ModelMessagePartTypeText, tracespec.ModelMessagePartTypeFile: - multiPart = append(multiPart, &common.Content{ - ContentType: gptr.Of(common.ContentTypeText), - Text: gptr.Of(part.Text), - }) - default: - logs.CtxWarn(ctx, "Unsupported part type: %s", part.Type) - return nil, err - } - } - content = &common.Content{ - ContentType: gptr.Of(common.ContentTypeMultiPart), - MultiPart: multiPart, - } - default: - content = &common.Content{ - ContentType: gptr.Of(common.ContentTypeText), - Text: gptr.Of(value), - } - } - return content, nil -} diff --git a/backend/modules/observability/domain/trace/entity/loop_span/span.go b/backend/modules/observability/domain/trace/entity/loop_span/span.go index ed3a9bfc0..ad80053c2 100644 --- a/backend/modules/observability/domain/trace/entity/loop_span/span.go +++ b/backend/modules/observability/domain/trace/entity/loop_span/span.go @@ -589,7 +589,18 @@ func (s *Span) AddAutoEvalAnnotation(taskID, evaluatorRecordID, evaluatorVersion } // ExtractByJsonpath 从Span的Input/Output/Tags中提取数据,根据jsonpath返回结果。时间戳按毫秒返回。 +// 会递归解析嵌套的 JSON 字符串。 func (s *Span) ExtractByJsonpath(ctx context.Context, key string, jsonpath string) (string, error) { + return s.extractByJsonpath(ctx, key, jsonpath, true) +} + +// ExtractByJsonpathRaw 从Span的Input/Output/Tags中提取数据,根据jsonpath返回结果。时间戳按毫秒返回。 +// 不会递归解析嵌套的 JSON 字符串,保持原始格式。适用于 MultiPart 类型数据提取。 +func (s *Span) ExtractByJsonpathRaw(ctx context.Context, key string, jsonpath string) (string, error) { + return s.extractByJsonpath(ctx, key, jsonpath, false) +} + +func (s *Span) extractByJsonpath(ctx context.Context, key string, jsonpath string, recursive bool) (string, error) { jsonpath = strings.TrimPrefix(jsonpath, key) jsonpath = strings.TrimPrefix(jsonpath, ".") data := "" @@ -618,7 +629,10 @@ func (s *Span) ExtractByJsonpath(ctx context.Context, key string, jsonpath strin return data, nil } - return json.GetStringByJSONPathRecursively(data, jsonpath) + if recursive { + return json.GetStringByJSONPathRecursively(data, jsonpath) + } + return json.GetStringByJSONPath(data, jsonpath) } func validField(clipFields *[]string, key, value string) string { diff --git a/backend/modules/observability/domain/trace/entity/loop_span/span_test.go b/backend/modules/observability/domain/trace/entity/loop_span/span_test.go index e5d2bffd0..fa8395afc 100644 --- a/backend/modules/observability/domain/trace/entity/loop_span/span_test.go +++ b/backend/modules/observability/domain/trace/entity/loop_span/span_test.go @@ -5,6 +5,7 @@ package loop_span import ( "context" + "github.com/coze-dev/cozeloop-go/spec/tracespec" "reflect" "strconv" "strings" @@ -1061,3 +1062,55 @@ func TestSpanList_FilterModelSpans(t *testing.T) { }) } } + +func TestSpan_ExtractByJsonpathRaw(t *testing.T) { + t.Parallel() + ctx := context.Background() + + t.Run("multipart data preserved as original format", func(t *testing.T) { + multipartData := `[{"text":"# Input Data\n\n","type":"text"},{"text":"[图片-1]\n","type":"text"},{"image_url":{"detail":{"image_resolution":"auto"},"url":"http://example.com/img.jpg"},"type":"image_url"}]` + span := &Span{ + Input: `{"content":` + multipartData + `}`, + } + + resultRaw, err := span.ExtractByJsonpathRaw(ctx, "Input", "content") + assert.NoError(t, err) + assert.Contains(t, resultRaw, `"detail":{"image_resolution":"auto"}`) + assert.Contains(t, resultRaw, `"type":"text"`) + assert.Contains(t, resultRaw, `"type":"image_url"`) + var parts []tracespec.ModelMessagePart + err = json.Unmarshal([]byte(resultRaw), &parts) + assert.Error(t, err) + }) + + t.Run("simple json should work same for both methods", func(t *testing.T) { + span := &Span{ + Input: `{"name": "test", "value": 123}`, + } + + resultRaw, err := span.ExtractByJsonpathRaw(ctx, "Input", "name") + assert.NoError(t, err) + assert.Equal(t, "test", resultRaw) + + resultRecursive, err := span.ExtractByJsonpath(ctx, "Input", "name") + assert.NoError(t, err) + assert.Equal(t, "test", resultRecursive) + }) + + t.Run("real multipart use case from dataset import", func(t *testing.T) { + multipartJSON := `[{"text":"# Input Data\n\u003cInput_Image\u003e\n","type":"text"},{"text":"[图片-1]\n","type":"text"},{"image_url":{"detail":"{\"image_resolution\":\"auto\"}","url":""},"type":"image_url"}]` + span := &Span{ + Input: `{"messages":[{"role":"user","content":` + multipartJSON + `}]}`, + } + + result, err := span.ExtractByJsonpathRaw(ctx, "Input", "messages[0].content") + assert.NoError(t, err) + + assert.Contains(t, result, `"type":"text"`) + assert.Contains(t, result, `"type":"image_url"`) + assert.Contains(t, result, `\u003cInput_Image\u003e`) + var parts []tracespec.ModelMessagePart + err = json.Unmarshal([]byte(result), &parts) + assert.NoError(t, err) + }) +} diff --git a/backend/modules/observability/domain/trace/service/trace_export_service.go b/backend/modules/observability/domain/trace/service/trace_export_service.go index 072a5b2c8..74bb029cb 100644 --- a/backend/modules/observability/domain/trace/service/trace_export_service.go +++ b/backend/modules/observability/domain/trace/service/trace_export_service.go @@ -499,9 +499,12 @@ func (r *TraceExportServiceImpl) buildItem(ctx context.Context, span *loop_span. } } } else { - value, err = span.ExtractByJsonpath(ctx, mapping.TraceFieldKey, mapping.TraceFieldJsonpath) + if mapping.FieldSchema.ContentType == entity.ContentType_MultiPart { + value, err = span.ExtractByJsonpathRaw(ctx, mapping.TraceFieldKey, mapping.TraceFieldJsonpath) + } else { + value, err = span.ExtractByJsonpath(ctx, mapping.TraceFieldKey, mapping.TraceFieldJsonpath) + } if err != nil { - // 非json但使用了jsonpath,也不报错,置空 logs.CtxInfo(ctx, "Extract field failed, err:%v", err) } } From df353369fba57ba0e5b6772d9908b58d16113ef7 Mon Sep 17 00:00:00 2001 From: "fengboyun.evan" Date: Tue, 3 Mar 2026 18:48:43 +0800 Subject: [PATCH 2/2] feat(backend): fix export detail field mismatch --- .../observability/domain/trace/entity/loop_span/span_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/modules/observability/domain/trace/entity/loop_span/span_test.go b/backend/modules/observability/domain/trace/entity/loop_span/span_test.go index fa8395afc..29d307317 100644 --- a/backend/modules/observability/domain/trace/entity/loop_span/span_test.go +++ b/backend/modules/observability/domain/trace/entity/loop_span/span_test.go @@ -5,13 +5,14 @@ package loop_span import ( "context" - "github.com/coze-dev/cozeloop-go/spec/tracespec" "reflect" "strconv" "strings" "testing" "time" + "github.com/coze-dev/cozeloop-go/spec/tracespec" + "github.com/coze-dev/coze-loop/backend/pkg/json" "github.com/stretchr/testify/assert"