Skip to content

Commit 9dd8d69

Browse files
authored
fix: Enhance synthesis task management and export functionality (#465)
* feat: add export functionality for synthesis tasks with format selection * fix: adjust chunk size and overlap parameters for synthesis tasks * refactor: modularize generation service and optimize concurrency settings * feat: enhance archive functionality with format selection for synthesis tasks * feat: add synthesis data management functions including delete and update operations * feat: add synthesis data management functions including delete and update operations * feat: add synthesis data management functions including delete and update operations * feat: implement task executor for background processing and enhance task management * feat: implement task executor for background processing and enhance task management * feat: implement task executor for background processing and enhance task management
1 parent 4083f92 commit 9dd8d69

21 files changed

+2294
-970
lines changed

frontend/src/pages/SynthesisTask/CreateTask.tsx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ export default function SynthesisTaskCreate() {
5353
| "PARAGRAPH_CHUNK"
5454
| "FIXED_LENGTH_CHUNK"
5555
| "CUSTOM_SEPARATOR_CHUNK",
56-
chunkSize: 3000,
57-
overlapSize: 100,
56+
chunkSize: 500,
57+
overlapSize: 50,
5858
delimiter: "",
5959
});
6060

frontend/src/pages/SynthesisTask/SynthFileTask.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,7 @@ export default function SynthFileTask() {
290290
danger: true,
291291
confirm: {
292292
title: t('synthesisTask.fileTask.confirm.deleteTitle'),
293-
description: t('synthesisTask.fileTask.confirm.deleteDescription', { name: synthesisTask?.name || '' }),
293+
description: t('synthesisTask.fileTask.confirm.deleteDescription', { name: taskInfo?.name || '' }),
294294
okText: t('dataManagement.confirm.deleteConfirm'),
295295
cancelText: t('dataManagement.confirm.deleteCancel'),
296296
onConfirm: handleDelete,

frontend/src/pages/SynthesisTask/components/SynthesisTaskTab.tsx

Lines changed: 50 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ export default function SynthesisTaskTab() {
6868
const [evalLoading, setEvalLoading] = useState(false);
6969
const [models, setModels] = useState<ModelI[]>([]);
7070
const [modelLoading, setModelLoading] = useState(false);
71+
const [archiveModalVisible, setArchiveModalVisible] = useState(false);
72+
const [currentArchiveTask, setCurrentArchiveTask] = useState<SynthesisTask | null>(null);
73+
const [archiveFormat, setArchiveFormat] = useState<string>("alpaca");
7174

7275
const [evalForm] = Form.useForm();
7376

@@ -210,7 +213,7 @@ export default function SynthesisTaskTab() {
210213
title: t('synthesisTask.home.columns.actions'),
211214
key: "actions",
212215
fixed: "right" as const,
213-
width: 120,
216+
width: 160,
214217
render: (_: unknown, task: SynthesisTask) => (
215218
<div className="flex items-center justify-start gap-1">
216219
<Tooltip title={t('synthesisTask.actions.viewDetail')}>
@@ -234,15 +237,7 @@ export default function SynthesisTaskTab() {
234237
type="text"
235238
className="hover:bg-green-50 p-1 h-7 w-7 flex items-center justify-center text-green-600"
236239
icon={<FolderOpenOutlined />}
237-
onClick={() => {
238-
Modal.confirm({
239-
title: t('synthesisTask.home.confirm.archiveTitle'),
240-
content: t('synthesisTask.home.confirm.archiveContent', { name: task.name }),
241-
okText: t('synthesisTask.actions.archive'),
242-
cancelText: t('synthesisTask.actions.cancel'),
243-
onOk: () => handleArchiveTask(task),
244-
});
245-
}}
240+
onClick={() => openArchiveModal(task)}
246241
/>
247242
</Tooltip>
248243
<Tooltip title={t('synthesisTask.actions.delete')}>
@@ -276,9 +271,8 @@ export default function SynthesisTaskTab() {
276271
},
277272
];
278273

279-
const handleArchiveTask = async (task: SynthesisTask) => {
274+
const handleArchiveTask = async (task: SynthesisTask, format: string = "alpaca") => {
280275
try {
281-
// 1. 创建目标数据集(使用简单的默认命名 + 随机后缀,可后续扩展为弹窗自定义)
282276
const randomSuffix = Math.random().toString(36).slice(2, 8);
283277
const datasetReq: {
284278
name: string;
@@ -302,18 +296,28 @@ export default function SynthesisTaskTab() {
302296
return;
303297
}
304298

305-
// 2. 调用后端归档接口,将合成数据写入该数据集
306-
await archiveSynthesisTaskToDatasetUsingPost(task.id, datasetId);
299+
await archiveSynthesisTaskToDatasetUsingPost(task.id, datasetId, format);
307300

308301
message.success(t('synthesisTask.home.archive.success'));
309-
// 3. 可选:跳转到数据集详情页
310302
navigate(`/data/management/detail/${datasetId}`);
311303
} catch (e) {
312304
console.error(e);
313305
message.error(t('synthesisTask.home.archive.failed'));
314306
}
315307
};
316308

309+
const openArchiveModal = (task: SynthesisTask) => {
310+
setCurrentArchiveTask(task);
311+
setArchiveFormat("alpaca");
312+
setArchiveModalVisible(true);
313+
};
314+
315+
const handleArchiveConfirm = async () => {
316+
if (!currentArchiveTask) return;
317+
setArchiveModalVisible(false);
318+
await handleArchiveTask(currentArchiveTask, archiveFormat);
319+
};
320+
317321
const openEvalModal = (task: SynthesisTask) => {
318322
setCurrentEvalTask(task);
319323
setEvalModalVisible(true);
@@ -464,6 +468,37 @@ export default function SynthesisTaskTab() {
464468
/>
465469
</Card>
466470

471+
<Modal
472+
title={t('synthesisTask.home.confirm.archiveTitle')}
473+
open={archiveModalVisible}
474+
onCancel={() => {
475+
setArchiveModalVisible(false);
476+
setCurrentArchiveTask(null);
477+
}}
478+
onOk={handleArchiveConfirm}
479+
okText={t('synthesisTask.actions.archive')}
480+
cancelText={t('synthesisTask.actions.cancel')}
481+
>
482+
<div>
483+
<p>{t('synthesisTask.home.confirm.archiveContent', { name: currentArchiveTask?.name })}</p>
484+
<div style={{ marginTop: 16 }}>
485+
<span style={{ marginRight: 8 }}>导出格式:</span>
486+
<Select
487+
value={archiveFormat}
488+
style={{ width: 120 }}
489+
options={[
490+
{ label: "Alpaca", value: "alpaca" },
491+
{ label: "ShareGPT", value: "sharegpt" },
492+
{ label: "原始格式", value: "raw" },
493+
]}
494+
onChange={(value) => {
495+
setArchiveFormat(value);
496+
}}
497+
/>
498+
</div>
499+
</div>
500+
</Modal>
501+
467502
<Modal
468503
title={t('synthesisTask.home.modal.evalTitle')}
469504
open={evalModalVisible}

frontend/src/pages/SynthesisTask/synthesis-api.ts

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { get, post, del } from "@/utils/request";
1+
import { get, post, del, patch } from "@/utils/request";
22

33
// 创建数据合成任务
44
export function createSynthesisTaskUsingPost(data: Record<string, unknown>) {
@@ -65,28 +65,25 @@ export function getPromptByTypeUsingGet(synthType: string) {
6565
}
6666

6767
// 将合成任务数据归档到已存在的数据集中
68-
export function archiveSynthesisTaskToDatasetUsingPost(taskId: string, datasetId: string) {
69-
return post(`/api/synthesis/gen/task/${taskId}/export-dataset/${datasetId}`);
68+
export function archiveSynthesisTaskToDatasetUsingPost(
69+
taskId: string,
70+
datasetId: string,
71+
format: string = "alpaca"
72+
) {
73+
return post(`/api/synthesis/gen/task/${taskId}/export-dataset/${datasetId}?format=${format}`);
7074
}
7175

72-
// ---------------- 数据记录级别:chunk 与 synthesis data ----------------
73-
74-
// 根据 chunkId 删除单个 chunk 及其下所有合成数据
76+
// 删除 chunk 及其关联的合成数据
7577
export function deleteChunkWithDataUsingDelete(chunkId: string) {
7678
return del(`/api/synthesis/gen/chunk/${chunkId}`);
7779
}
7880

79-
// 删除某个 chunk 下的所有合成数据,返回删除条数
80-
export function deleteSynthesisDataByChunkUsingDelete(chunkId: string) {
81-
return del(`/api/synthesis/gen/chunk/${chunkId}/data`);
82-
}
83-
84-
// 批量删除合成数据记录
85-
export function batchDeleteSynthesisDataUsingDelete(body: { ids: string[] }) {
86-
return del(`/api/synthesis/gen/data/batch`, null, { body: JSON.stringify(body) });
81+
// 批量删除合成数据
82+
export function batchDeleteSynthesisDataUsingDelete(data: { ids: string[] }) {
83+
return del("/api/synthesis/gen/data/batch", data as unknown as Record<string, never>);
8784
}
8885

89-
// 更新单条合成数据的完整 JSON 内容
90-
export function updateSynthesisDataUsingPatch(dataId: string, body: { data: Record<string, unknown> }) {
91-
return post(`/api/synthesis/gen/data/${dataId}`, body, { method: "PATCH" });
86+
// 更新合成数据
87+
export function updateSynthesisDataUsingPatch(dataId: string, data: { data: Record<string, unknown> }) {
88+
return patch(`/api/synthesis/gen/data/${dataId}`, data as unknown as Record<string, never>);
9289
}

frontend/src/utils/request.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,28 @@ class Request {
405405
return this.request(fullURL, config);
406406
}
407407

408+
/**
409+
* PATCH请求
410+
* @param {string} url - 请求URL
411+
* @param {object} data - 请求体数据
412+
* @param {object} options - 额外的fetch选项,包括showLoading, onUploadProgress, onDownloadProgress
413+
*/
414+
async patch(url, data = null, options = {}) {
415+
const config = {
416+
method: "PATCH",
417+
credentials: "include",
418+
mode: "cors",
419+
headers: {
420+
...this.defaultHeaders,
421+
...options.headers,
422+
},
423+
body: data ? JSON.stringify(data) : undefined,
424+
...options,
425+
};
426+
427+
return this.request(this.baseURL + url, config);
428+
}
429+
408430
/**
409431
* 从 Content-Disposition 头中解析文件名
410432
*/
@@ -661,6 +683,7 @@ export const get = request.get.bind(request);
661683
export const post = request.post.bind(request);
662684
export const put = request.put.bind(request);
663685
export const del = request.delete.bind(request);
686+
export const patch = request.patch.bind(request);
664687
export const download = request.download.bind(request);
665688
export const upload = request.upload.bind(request);
666689

frontend/vite.config.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ export default defineConfig({
1515
host: "0.0.0.0",
1616
proxy: (() => {
1717
const pythonProxyConfig = {
18-
target: "http://localhost:32033",
18+
target: "http://localhost:18000",
1919
changeOrigin: true,
2020
secure: false,
2121
configure: (proxy: { on: (event: string, handler: (arg: unknown) => void) => void }) => {
@@ -32,7 +32,7 @@ export default defineConfig({
3232
};
3333

3434
const javaProxyConfig = {
35-
target: "http://localhost:32033",
35+
target: "http://localhost:8080",
3636
changeOrigin: true,
3737
secure: false,
3838
configure: (proxy: { on: (event: string, handler: (arg: unknown) => void) => void }) => {

runtime/datamate-python/app/core/exception/middleware.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ async def dispatch(self, request: Request, call_next):
6969
except Exception as exc:
7070
# 捕获所有未处理的异常
7171
logger.error(
72-
f"Unhandled exception occurred at {request.method} {request.url.path}", exc,
72+
f"Unhandled exception occurred at {request.method} {request.url.path}",
7373
exc_info=True
7474
)
7575
return self._error_response(

runtime/datamate-python/app/main.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
set_collection_scheduler,
2626
)
2727
from app.module.shared.schedule import Scheduler
28+
from app.module.generation.service.task_executor import init_executor, shutdown_executor
2829

2930
setup_logging()
3031
logger = get_logger(__name__)
@@ -68,10 +69,15 @@ def mask_db_url(url: str) -> Literal[b""] | str:
6869
set_collection_scheduler(collection_scheduler)
6970
await load_scheduled_collection_tasks()
7071

72+
# Initialize generation task executor
73+
init_executor(max_workers=10, max_concurrent_tasks=5)
74+
logger.info("Generation task executor initialized")
75+
7176
yield
7277

7378
# @shutdown
7479
collection_scheduler.shutdown()
80+
shutdown_executor()
7581
logger.info("DataMate Python Backend shutting down ...\n\n")
7682

7783

0 commit comments

Comments
 (0)