Skip to content

Commit 53efda2

Browse files
Ev4nFengCeloria
authored andcommitted
[feat][backend] Trace 2 dataset support video audio (#374)
* feat: video&audio * chore: update audio definition * feat: openapi * fix(backend): add some logs * feat(backend): support video & audio * chore: gen code * chore: gen code * feat: max_file_size_by_type & supported_formats_by_type * feat: multi modal spec dto cvt * feat(backend): support video & audio * feat(backend): support video & audio * feat(backend): support video & audio * feat(backend): remove unused function * feat(backend): UT * feat: convert * feat: convert * feat(backend): UT * feat(backend): mod * feat(backend): UT * feat(backend): UT * feat(backend): remove * feat(backend): converter func move from ev 2 ob * feat(backend): comment deal --------- Co-authored-by: zhuangjiaqi.777 <zhuangjiaqi.777@bytedance.com>
1 parent 04a38d8 commit 53efda2

11 files changed

Lines changed: 446 additions & 118 deletions

File tree

backend/modules/observability/application/convertor/trace/trace_export.go

Lines changed: 6 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,13 @@ package trace
55

66
import (
77
"github.com/bytedance/gg/gptr"
8-
98
"github.com/coze-dev/coze-loop/backend/kitex_gen/coze/loop/data/domain/dataset"
10-
eval_common "github.com/coze-dev/coze-loop/backend/kitex_gen/coze/loop/evaluation/domain/common"
119
dataset0 "github.com/coze-dev/coze-loop/backend/kitex_gen/coze/loop/observability/domain/dataset"
1210
"github.com/coze-dev/coze-loop/backend/kitex_gen/coze/loop/observability/trace"
1311
"github.com/coze-dev/coze-loop/backend/modules/observability/domain/trace/entity"
1412
"github.com/coze-dev/coze-loop/backend/modules/observability/domain/trace/entity/loop_span"
1513
"github.com/coze-dev/coze-loop/backend/modules/observability/domain/trace/service"
14+
"github.com/coze-dev/coze-loop/backend/modules/observability/infra/rpc/evaluationset"
1615
)
1716

1817
// ExportRequestDTO2DO 将导出请求从 DTO 转换为 DO
@@ -151,14 +150,14 @@ func convertDatasetConfigDTO2DO(config *trace.DatasetConfig) service.DatasetConf
151150
result.DatasetName = config.DatasetName
152151
}
153152
if config.IsSetDatasetSchema() {
154-
result.DatasetSchema = convertDatasetSchemaDTO2DO(config.GetDatasetSchema())
153+
result.DatasetSchema = ConvertDatasetSchemaDTO2DO(config.GetDatasetSchema())
155154
}
156155

157156
return result
158157
}
159158

160-
// convertDatasetSchemaDTO2DO 转换数据集模式
161-
func convertDatasetSchemaDTO2DO(schema *dataset0.DatasetSchema) entity.DatasetSchema {
159+
// ConvertDatasetSchemaDTO2DO 转换数据集模式
160+
func ConvertDatasetSchemaDTO2DO(schema *dataset0.DatasetSchema) entity.DatasetSchema {
162161
if schema == nil {
163162
return entity.DatasetSchema{}
164163
}
@@ -177,7 +176,7 @@ func convertDatasetSchemaDTO2DO(schema *dataset0.DatasetSchema) entity.DatasetSc
177176
Key: &key,
178177
Name: name,
179178
Description: description,
180-
ContentType: convertContentTypeDTO2DO(fs.GetContentType()),
179+
ContentType: evaluationset.ConvertContentTypeDTO2DO(fs.GetContentType()),
181180
TextSchema: textSchema,
182181
SchemaKey: entity.SchemaKey(fs.GetSchemaKey()),
183182
}
@@ -200,7 +199,7 @@ func ConvertFieldMappingsDTO2DO(mappings []*dataset0.FieldMapping) []entity.Fiel
200199
Key: mapping.GetFieldSchema().Key,
201200
Name: mapping.GetFieldSchema().GetName(),
202201
Description: mapping.GetFieldSchema().GetDescription(),
203-
ContentType: convertContentTypeDTO2DO(mapping.GetFieldSchema().GetContentType()),
202+
ContentType: evaluationset.ConvertContentTypeDTO2DO(mapping.GetFieldSchema().GetContentType()),
204203
SchemaKey: entity.SchemaKey(mapping.GetFieldSchema().GetSchemaKey()),
205204
TextSchema: mapping.GetFieldSchema().GetTextSchema(),
206205
},
@@ -350,19 +349,3 @@ func convertSpanIdsDTO2DO(spanIDs []*trace.SpanID) []service.SpanID {
350349
}
351350
return result
352351
}
353-
354-
// convertContentTypeDTO2DO 转换内容类型
355-
func convertContentTypeDTO2DO(contentType eval_common.ContentType) entity.ContentType {
356-
switch contentType {
357-
case eval_common.ContentTypeText:
358-
return entity.ContentType_Text
359-
case eval_common.ContentTypeImage:
360-
return entity.ContentType_Image
361-
case eval_common.ContentTypeAudio:
362-
return entity.ContentType_Audio
363-
case eval_common.ContentTypeMultiPart:
364-
return entity.ContentType_MultiPart
365-
default:
366-
return entity.ContentType_Text
367-
}
368-
}

backend/modules/observability/application/convertor/trace/trace_export_test.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ package trace
55
import (
66
"testing"
77

8+
"github.com/coze-dev/coze-loop/backend/modules/observability/infra/rpc/evaluationset"
9+
810
"github.com/bytedance/gg/gptr"
911
"github.com/coze-dev/coze-loop/backend/kitex_gen/coze/loop/data/domain/dataset"
1012
eval_common "github.com/coze-dev/coze-loop/backend/kitex_gen/coze/loop/evaluation/domain/common"
@@ -498,7 +500,7 @@ func TestConvertDatasetSchemaDTO2DO(t *testing.T) {
498500

499501
for _, tt := range tests {
500502
t.Run(tt.name, func(t *testing.T) {
501-
got := convertDatasetSchemaDTO2DO(tt.schema)
503+
got := ConvertDatasetSchemaDTO2DO(tt.schema)
502504
assert.Equal(t, tt.want, got)
503505
})
504506
}
@@ -980,7 +982,7 @@ func TestConvertContentTypeDTO2DO(t *testing.T) {
980982

981983
for _, tt := range tests {
982984
t.Run(tt.name, func(t *testing.T) {
983-
got := convertContentTypeDTO2DO(tt.contentType)
985+
got := evaluationset.ConvertContentTypeDTO2DO(tt.contentType)
984986
assert.Equal(t, tt.want, got)
985987
})
986988
}

backend/modules/observability/domain/task/service/taskexe/processor/utils.go

Lines changed: 14 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ import (
77
"context"
88
"time"
99

10+
"github.com/coze-dev/coze-loop/backend/modules/observability/infra/rpc/evaluationset"
11+
12+
"github.com/coze-dev/coze-loop/backend/modules/observability/application/convertor/trace"
13+
1014
"github.com/bytedance/gg/gptr"
1115
"github.com/bytedance/sonic"
1216
"github.com/coze-dev/coze-loop/backend/kitex_gen/coze/loop/evaluation/domain/common"
@@ -15,6 +19,7 @@ import (
1519
dataset0 "github.com/coze-dev/coze-loop/backend/kitex_gen/coze/loop/observability/domain/dataset"
1620
"github.com/coze-dev/coze-loop/backend/kitex_gen/coze/loop/observability/domain/task"
1721
task_entity "github.com/coze-dev/coze-loop/backend/modules/observability/domain/task/entity"
22+
1823
"github.com/coze-dev/coze-loop/backend/modules/observability/domain/trace/entity"
1924
"github.com/coze-dev/coze-loop/backend/modules/observability/domain/trace/entity/loop_span"
2025
"github.com/coze-dev/coze-loop/backend/pkg/json"
@@ -100,58 +105,26 @@ func getBasicEvaluationSetSchema(basicColumns []string) (*dataset0.DatasetSchema
100105
return evaluationSetSchema, fromEvalSet
101106
}
102107

103-
// todo:[xun]和手动回流的代码逻辑一样,需要抽取公共代码
104-
// convertDatasetSchemaDTO2DO 转换数据集模式
105108
func convertDatasetSchemaDTO2DO(schema *dataset0.DatasetSchema) entity.DatasetSchema {
106109
if schema == nil {
107110
return entity.DatasetSchema{}
108111
}
109-
110-
result := entity.DatasetSchema{}
111-
112+
result := trace.ConvertDatasetSchemaDTO2DO(schema)
112113
if schema.IsSetFieldSchemas() {
113114
fieldSchemas := schema.GetFieldSchemas()
114-
result.FieldSchemas = make([]entity.FieldSchema, len(fieldSchemas))
115+
// result.FieldSchemas = make([]entity.FieldSchema, len(fieldSchemas))
115116
for i, fs := range fieldSchemas {
116117
key := fs.GetKey()
117118
if key == "" {
118119
key = fs.GetName()
119120
}
120-
name := fs.GetName()
121-
description := fs.GetDescription()
122-
textSchema := fs.GetTextSchema()
123-
schemaKey := fs.GetSchemaKey()
124-
result.FieldSchemas[i] = entity.FieldSchema{
125-
Key: &key,
126-
Name: name,
127-
Description: description,
128-
ContentType: convertContentTypeDTO2DO(fs.GetContentType()),
129-
TextSchema: textSchema,
130-
SchemaKey: entity.SchemaKey(schemaKey),
131-
}
121+
result.FieldSchemas[i].Key = &key
132122
}
133123
}
134124

135125
return result
136126
}
137127

138-
// todo:[xun]和手动回流的代码逻辑一样,需要抽取公共代码
139-
// convertContentTypeDTO2DO 转换内容类型
140-
func convertContentTypeDTO2DO(contentType common.ContentType) entity.ContentType {
141-
switch contentType {
142-
case common.ContentTypeText:
143-
return entity.ContentType_Text
144-
case common.ContentTypeImage:
145-
return entity.ContentType_Image
146-
case common.ContentTypeAudio:
147-
return entity.ContentType_Audio
148-
case common.ContentTypeMultiPart:
149-
return entity.ContentType_MultiPart
150-
default:
151-
return entity.ContentType_Text
152-
}
153-
}
154-
155128
// todo:[xun]和手动回流的代码逻辑一样,需要抽取公共代码
156129
func buildItems(ctx context.Context, spans []*loop_span.Span, fieldMappings []*task_entity.EvaluateFieldMapping,
157130
evaluationSetSchema string, taskRunID string,
@@ -226,23 +199,24 @@ func buildItem(ctx context.Context, span *loop_span.Span, fieldMappings []*task_
226199
logs.CtxInfo(ctx, "Extract field failed, err:%v", err)
227200
continue
228201
}
229-
content, err := GetContentInfo(ctx, fieldSchema.ContentType, value)
230-
if err != nil {
231-
logs.CtxInfo(ctx, "GetContentInfo failed, err:%v", err)
202+
content, errCode := entity.GetContentInfo(ctx, fieldSchema.ContentType, value)
203+
if errCode == entity.DatasetErrorType_MismatchSchema {
204+
logs.CtxInfo(ctx, "GetContentInfo failed")
232205
return nil
233206
}
234207
fieldDatas = append(fieldDatas, &eval_set.FieldData{
235208
Key: key,
236209
Name: gptr.Of(fieldSchema.Name),
237-
Content: content,
210+
Content: evaluationset.ConvertContentDO2DTO(content),
238211
})
239212
}
240213
}
241214
}
242215
return fieldDatas
243216
}
244217

245-
// todo:[xun]和手动回流的代码逻辑一样,需要抽取公共代码
218+
// Deprecated: use common function entity.GetContentInfo instead
219+
// GetContentInfo todo:[xun]和手动回流的代码逻辑一样,需要抽取公共代码
246220
func GetContentInfo(ctx context.Context, contentType entity.ContentType, value string) (*common.Content, error) {
247221
var content *common.Content
248222
switch contentType {

backend/modules/observability/domain/task/service/taskexe/processor/utils_test.go

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ import (
88
"testing"
99
"time"
1010

11+
"github.com/coze-dev/coze-loop/backend/modules/observability/infra/rpc/evaluationset"
12+
1113
"github.com/bytedance/gg/gptr"
1214
"github.com/stretchr/testify/assert"
1315

@@ -187,14 +189,15 @@ func TestConvertContentTypeDTO2DO(t *testing.T) {
187189
{common.ContentTypeImage, entity.ContentType_Image},
188190
{common.ContentTypeAudio, entity.ContentType_Audio},
189191
{common.ContentTypeMultiPart, entity.ContentType_MultiPart},
192+
{common.ContentTypeVideo, entity.ContentType_Video},
190193
{"unknown", entity.ContentType_Text},
191194
}
192195

193196
for _, tt := range cases {
194197
tt := tt
195198
t.Run(tt.input, func(t *testing.T) {
196199
t.Parallel()
197-
assert.Equal(t, tt.expected, convertContentTypeDTO2DO(tt.input))
200+
assert.Equal(t, tt.expected, evaluationset.ConvertContentTypeDTO2DO(tt.input))
198201
})
199202
}
200203
}
@@ -312,10 +315,10 @@ func TestGetContentInfo(t *testing.T) {
312315
t.Parallel()
313316
ctx := context.Background()
314317

315-
content, err := GetContentInfo(ctx, common.ContentTypeText, "plain-text")
316-
assert.NoError(t, err)
317-
assert.Equal(t, common.ContentTypeText, content.GetContentType())
318-
assert.Equal(t, "plain-text", content.GetText())
318+
c, code := entity.GetContentInfo(ctx, entity.ContentType_Text, "plain-text")
319+
assert.Equal(t, int64(0), code)
320+
assert.Equal(t, entity.ContentType_Text, c.ContentType)
321+
assert.Equal(t, "plain-text", c.Text)
319322

320323
parts := []tracespec.ModelMessagePart{
321324
{
@@ -337,21 +340,20 @@ func TestGetContentInfo(t *testing.T) {
337340
payload, err := json.Marshal(parts)
338341
assert.NoError(t, err)
339342

340-
content, err = GetContentInfo(ctx, common.ContentTypeMultiPart, string(payload))
341-
assert.NoError(t, err)
342-
assert.Equal(t, common.ContentTypeMultiPart, content.GetContentType())
343-
assert.Len(t, content.GetMultiPart(), 3)
344-
assert.Equal(t, common.ContentTypeImage, content.GetMultiPart()[0].GetContentType())
345-
assert.Equal(t, common.ContentTypeText, content.GetMultiPart()[1].GetContentType())
343+
c, code = entity.GetContentInfo(ctx, entity.ContentType_MultiPart, string(payload))
344+
assert.Equal(t, int64(0), code)
345+
assert.Equal(t, entity.ContentType_MultiPart, c.ContentType)
346+
assert.Len(t, c.MultiPart, 3)
347+
assert.Equal(t, entity.ContentType_Image, c.MultiPart[0].ContentType)
348+
assert.Equal(t, entity.ContentType_Text, c.MultiPart[1].ContentType)
346349

347-
_, err = GetContentInfo(ctx, common.ContentTypeMultiPart, "invalid json")
348-
assert.Error(t, err)
350+
_, code = entity.GetContentInfo(ctx, entity.ContentType_MultiPart, "invalid json")
351+
assert.Equal(t, entity.DatasetErrorType_MismatchSchema, code)
349352

350-
// unsupported part type should return nil content without error
351353
parts = []tracespec.ModelMessagePart{{Type: "unsupported"}}
352354
payload, err = json.Marshal(parts)
353355
assert.NoError(t, err)
354-
content, err = GetContentInfo(ctx, common.ContentTypeMultiPart, string(payload))
355-
assert.NoError(t, err)
356-
assert.Nil(t, content)
356+
c, code = entity.GetContentInfo(ctx, entity.ContentType_MultiPart, string(payload))
357+
assert.Equal(t, entity.DatasetErrorType_MismatchSchema, code)
358+
assert.Nil(t, c)
357359
}

0 commit comments

Comments
 (0)