Skip to content

Commit 5fdb6dc

Browse files
committed
feat: add video message handling and improve video context processing
1 parent 57b3aa1 commit 5fdb6dc

File tree

8 files changed

+106
-50
lines changed

8 files changed

+106
-50
lines changed

apps/application/flow/step_node/image_understand_step_node/impl/base_image_understand_node.py

Lines changed: 31 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,6 @@ def execute(self, model_id, system, prompt, dialogue_number, dialogue_type, hist
7777
image,
7878
**kwargs) -> NodeResult:
7979
# 处理不正确的参数
80-
if image is None or not isinstance(image, list):
81-
image = []
8280
workspace_id = self.workflow_manage.get_body().get('workspace_id')
8381
image_model = get_model_instance_by_model_workspace_id(model_id, workspace_id,
8482
**model_params_setting)
@@ -91,7 +89,7 @@ def execute(self, model_id, system, prompt, dialogue_number, dialogue_type, hist
9189
message_list = self.generate_message_list(image_model, system, prompt,
9290
self.get_history_message(history_chat_record, dialogue_number), image)
9391
self.context['message_list'] = message_list
94-
self.context['image_list'] = image
92+
self.generate_context_image(image)
9593
self.context['dialogue_type'] = dialogue_type
9694
if stream:
9795
r = image_model.stream(message_list)
@@ -104,6 +102,12 @@ def execute(self, model_id, system, prompt, dialogue_number, dialogue_type, hist
104102
'history_message': history_message, 'question': question.content}, {},
105103
_write_context=write_context)
106104

105+
def generate_context_image(self, image):
106+
if isinstance(image, str) and image.startswith('http'):
107+
self.context['image_list'] = [{'url': image}]
108+
elif image is not None and len(image) > 0:
109+
self.context['image_list'] = image
110+
107111
def get_history_message_for_details(self, history_chat_record, dialogue_number):
108112
start_index = len(history_chat_record) - dialogue_number
109113
history_message = reduce(lambda x, y: [*x, *y], [
@@ -164,28 +168,32 @@ def generate_history_human_message(self, chat_record):
164168
def generate_prompt_question(self, prompt):
165169
return HumanMessage(self.workflow_manage.generate_prompt(prompt))
166170

167-
def generate_message_list(self, image_model, system: str, prompt: str, history_message, image):
168-
if image is not None and len(image) > 0:
169-
# 处理多张图片
170-
images = []
171+
def _process_images(self, image):
172+
"""
173+
处理图像数据,转换为模型可识别的格式
174+
"""
175+
images = []
176+
if isinstance(image, str) and image.startswith('http'):
177+
images.append({'type': 'image_url', 'image_url': {'url': image}})
178+
elif image is not None and len(image) > 0:
171179
for img in image:
172-
if isinstance(img, str) and img.startswith('http'):
173-
images.append({'type': 'image_url', 'image_url': {'url': img}})
174-
else:
175-
file_id = img['file_id']
176-
file = QuerySet(File).filter(id=file_id).first()
177-
image_bytes = file.get_bytes()
178-
base64_image = base64.b64encode(image_bytes).decode("utf-8")
179-
image_format = what(None, image_bytes)
180-
images.append(
181-
{'type': 'image_url', 'image_url': {'url': f'data:image/{image_format};base64,{base64_image}'}})
182-
messages = [HumanMessage(
183-
content=[
184-
{'type': 'text', 'text': self.workflow_manage.generate_prompt(prompt)},
185-
*images
186-
])]
180+
file_id = img['file_id']
181+
file = QuerySet(File).filter(id=file_id).first()
182+
image_bytes = file.get_bytes()
183+
base64_image = base64.b64encode(image_bytes).decode("utf-8")
184+
image_format = what(None, image_bytes)
185+
images.append(
186+
{'type': 'image_url', 'image_url': {'url': f'data:image/{image_format};base64,{base64_image}'}})
187+
return images
188+
189+
def generate_message_list(self, image_model, system: str, prompt: str, history_message, image):
190+
prompt_text = self.workflow_manage.generate_prompt(prompt)
191+
images = self._process_images(image)
192+
193+
if images:
194+
messages = [HumanMessage(content=[{'type': 'text', 'text': prompt_text}, *images])]
187195
else:
188-
messages = [HumanMessage(self.workflow_manage.generate_prompt(prompt))]
196+
messages = [HumanMessage(prompt_text)]
189197

190198
if system is not None and len(system) > 0:
191199
return [

apps/application/flow/step_node/video_understand_step_node/impl/base_video_understand_node.py

Lines changed: 29 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# coding=utf-8
22
import base64
3+
import mimetypes
34
import time
45
from functools import reduce
56
from imghdr import what
@@ -76,9 +77,6 @@ def execute(self, model_id, system, prompt, dialogue_number, dialogue_type, hist
7677
chat_record_id,
7778
video,
7879
**kwargs) -> NodeResult:
79-
# 处理不正确的参数
80-
if video is None or not isinstance(video, list):
81-
video = []
8280
workspace_id = self.workflow_manage.get_body().get('workspace_id')
8381
video_model = get_model_instance_by_model_workspace_id(model_id, workspace_id,
8482
**model_params_setting)
@@ -91,7 +89,7 @@ def execute(self, model_id, system, prompt, dialogue_number, dialogue_type, hist
9189
message_list = self.generate_message_list(video_model, system, prompt,
9290
self.get_history_message(history_chat_record, dialogue_number), video)
9391
self.context['message_list'] = message_list
94-
self.context['video_list'] = video
92+
self.generate_context_video(video)
9593
self.context['dialogue_type'] = dialogue_type
9694
if stream:
9795
r = video_model.stream(message_list)
@@ -104,6 +102,12 @@ def execute(self, model_id, system, prompt, dialogue_number, dialogue_type, hist
104102
'history_message': history_message, 'question': question.content}, {},
105103
_write_context=write_context)
106104

105+
def generate_context_video(self, video):
106+
if isinstance(video, str) and video.startswith('http'):
107+
self.context['video_list'] = [{'url': video}]
108+
elif video is not None and len(video) > 0:
109+
self.context['video_list'] = video
110+
107111
def get_history_message_for_details(self, history_chat_record, dialogue_number):
108112
start_index = len(history_chat_record) - dialogue_number
109113
history_message = reduce(lambda x, y: [*x, *y], [
@@ -164,28 +168,29 @@ def generate_history_human_message(self, chat_record):
164168
def generate_prompt_question(self, prompt):
165169
return HumanMessage(self.workflow_manage.generate_prompt(prompt))
166170

171+
def _process_videos(self, image):
172+
videos = []
173+
if isinstance(image, str) and image.startswith('http'):
174+
videos.append({'type': 'video_url', 'video_url': {'url': image}})
175+
elif image is not None and len(image) > 0:
176+
for img in image:
177+
file_id = img['file_id']
178+
file = QuerySet(File).filter(id=file_id).first()
179+
video_bytes = file.get_bytes()
180+
base64_video = base64.b64encode(video_bytes).decode("utf-8")
181+
video_format = mimetypes.guess_type(file.file_name)[0] # 获取MIME类型
182+
videos.append(
183+
{'type': 'video_url', 'video_url': {'url': f'data:{video_format};base64,{base64_video}'}})
184+
return videos
185+
167186
def generate_message_list(self, video_model, system: str, prompt: str, history_message, video):
168-
if video is not None and len(video) > 0:
169-
# 处理多张图片
170-
videos = []
171-
for img in video:
172-
if isinstance(img, str) and img.startswith('http'):
173-
videos.append({'type': 'video_url', 'video_url': {'url': img}})
174-
else:
175-
file_id = img['file_id']
176-
file = QuerySet(File).filter(id=file_id).first()
177-
video_bytes = file.get_bytes()
178-
base64_video = base64.b64encode(video_bytes).decode("utf-8")
179-
video_format = what(None, video_bytes)
180-
videos.append(
181-
{'type': 'video_url', 'video_url': {'url': f'data:video/{video_format};base64,{base64_video}'}})
182-
messages = [HumanMessage(
183-
content=[
184-
{'type': 'text', 'text': self.workflow_manage.generate_prompt(prompt)},
185-
*videos
186-
])]
187+
prompt_text = self.workflow_manage.generate_prompt(prompt)
188+
videos = self._process_videos(video)
189+
190+
if videos:
191+
messages = [HumanMessage(content=[{'type': 'text', 'text': prompt_text}, *videos])]
187192
else:
188-
messages = [HumanMessage(self.workflow_manage.generate_prompt(prompt))]
193+
messages = [HumanMessage(prompt_text)]
189194

190195
if system is not None and len(system) > 0:
191196
return [

ui/src/components/ai-chat/component/chat-input-operate/index.vue

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -807,6 +807,7 @@ const getQuestion = () => {
807807
uploadImageList.value.length > 0,
808808
uploadDocumentList.value.length > 0,
809809
uploadAudioList.value.length > 0,
810+
uploadVideoList.value.length > 0,
810811
uploadOtherList.value.length > 0,
811812
]
812813
if (fileLength.filter((f) => f).length > 1) {
@@ -818,6 +819,8 @@ const getQuestion = () => {
818819
} else if (fileLength[2]) {
819820
return t('chat.uploadFile.audioMessage')
820821
} else if (fileLength[3]) {
822+
return t('chat.uploadFile.videoMessage')
823+
} else if (fileLength[4]) {
821824
return t('chat.uploadFile.otherMessage')
822825
}
823826
}

ui/src/components/ai-chat/component/knowledge-source-component/ExecutionDetailCard.vue

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,21 @@
102102
</template>
103103
</el-space>
104104
</div>
105+
<div v-if="data.video_list?.length > 0">
106+
<p class="mb-8 color-secondary">{{ $t('common.fileUpload.image') }}:</p>
107+
108+
<el-space wrap>
109+
<template v-for="(f, i) in data.video_list" :key="i">
110+
<video
111+
:src="f.url"
112+
style="width: 170px; display: block"
113+
controls
114+
autoplay
115+
class="border-r-6"
116+
/>
117+
</template>
118+
</el-space>
119+
</div>
105120
<div v-if="data.other_list?.length > 0">
106121
<p class="mb-8 color-secondary">{{ $t('common.fileUpload.document') }}:</p>
107122

@@ -581,8 +596,6 @@
581596
<video
582597
v-if="h.type === 'video_url'"
583598
:src="h.video_url.url"
584-
alt=""
585-
fit="cover"
586599
style="width: 40px; height: 40px; display: inline-block"
587600
class="border-r-6 mr-8"
588601
/>

ui/src/locales/lang/en-US/ai-chat.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ export default {
7676
imageMessage: 'Please process the image content',
7777
documentMessage: 'Please understand the content of the document',
7878
audioMessage: 'Please understand the audio content',
79+
videoMessage: 'Please understand the video content',
7980
otherMessage: 'Please understand the file content',
8081
errorMessage: 'Upload Failed',
8182
fileMessage: 'Please process the file content',

ui/src/locales/lang/zh-CN/ai-chat.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ export default {
7474
imageMessage: '请解析图片内容',
7575
documentMessage: '请理解文档内容',
7676
audioMessage: '请理解音频内容',
77+
videoMessage: '请理解视频内容',
7778
otherMessage: '请理解文件内容',
7879
errorMessage: '上传失败',
7980
fileMessage: '请解析文件内容',

ui/src/locales/lang/zh-Hant/ai-chat.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ export default {
7474
imageMessage: '請解析圖片內容',
7575
documentMessage: '請理解檔案內容',
7676
audioMessage: '請理解音訊內容',
77+
videoMessage: '請理解視頻內容',
7778
otherMessage: '請理解檔案內容',
7879
fileMessage: '請解析文件內容',
7980
errorMessage: '上傳失敗',

ui/src/workflow/nodes/application-node/index.vue

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,28 @@
9292
v-model="form_data.audio_list"
9393
/>
9494
</el-form-item>
95+
<el-form-item
96+
v-if="form_data.hasOwnProperty('video_list') || 'video_list' in form_data"
97+
:label="$t('views.applicationWorkflow.nodes.videoUnderstandNode.video.label')"
98+
prop="video_list"
99+
:rules="{
100+
message: $t(
101+
'views.applicationWorkflow.nodes.videoUnderstandNode.video.requiredMessage',
102+
),
103+
trigger: 'blur',
104+
required: false,
105+
}"
106+
>
107+
<NodeCascader
108+
ref="nodeCascaderRef"
109+
:nodeModel="nodeModel"
110+
class="w-full"
111+
:placeholder="
112+
$t('views.applicationWorkflow.nodes.videoUnderstandNode.video.requiredMessage')
113+
"
114+
v-model="form_data.video_list"
115+
/>
116+
</el-form-item>
95117
<div v-for="(field, index) in form_data.api_input_field_list" :key="'api-input-' + index">
96118
<el-form-item
97119
:label="field.variable"
@@ -191,6 +213,7 @@ const form = {
191213
document_list: ['start-node', 'document'],
192214
image_list: ['start-node', 'image'],
193215
audio_list: ['start-node', 'audio'],
216+
video_list: ['start-node', 'video'],
194217
}
195218
196219
const applicationNodeFormRef = ref<FormInstance>()
@@ -294,8 +317,9 @@ const update_field = () => {
294317
handleFileUpload('document', fileUploadSetting.document)
295318
handleFileUpload('image', fileUploadSetting.image)
296319
handleFileUpload('audio', fileUploadSetting.audio)
320+
handleFileUpload('video', fileUploadSetting.video)
297321
} else {
298-
;['document_list', 'image_list', 'audio_list'].forEach((list) => {
322+
;['document_list', 'image_list', 'audio_list', 'video_list'].forEach((list) => {
299323
// eslint-disable-next-line vue/no-mutating-props
300324
delete props.nodeModel.properties.node_data[list]
301325
})

0 commit comments

Comments
 (0)