Skip to content

Commit 97e5992

Browse files
committed
Merge remote-tracking branch 'origin/develop_930' into develop_930
2 parents 6efcb66 + f0d9e20 commit 97e5992

5 files changed

Lines changed: 152 additions & 33 deletions

File tree

frontend/src/mock/mock-seed/data-cleansing.cjs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,16 @@ function operatorItem() {
1010
inputs: Mock.Random.integer(1, 5),
1111
outputs: Mock.Random.integer(1, 5),
1212
settings: JSON.stringify({
13+
fileLength: {
14+
name: "文档字数",
15+
description:
16+
"过滤字数不在指定范围内的文档,如[10,10000000]。若输入为空,则不对字数上/下限做限制。",
17+
type: "range",
18+
defaultVal: [10, 10000000],
19+
min: 0,
20+
max: 10000000000000000,
21+
step: 1,
22+
},
1323
host: { type: "input", name: "主机地址", defaultVal: "localhost" },
1424
limit: {
1525
type: "range",
@@ -26,6 +36,12 @@ function operatorItem() {
2636
defaultVal: "utf-8",
2737
options: ["utf-8", "gbk", "ascii"],
2838
},
39+
radio: {
40+
type: "radio",
41+
name: "radio",
42+
defaultVal: "utf-8",
43+
options: ["utf-8", "gbk", "ascii"],
44+
},
2945
features: {
3046
type: "checkbox",
3147
name: "特征列",

frontend/src/pages/DataCleansing/Create/components/ParamConfig.tsx

Lines changed: 111 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,14 @@ import {
77
Form,
88
InputNumber,
99
Slider,
10+
Space,
1011
} from "antd";
11-
import { OperatorI } from "@/pages/OperatorMarket/operator.model";
12+
import { ConfigI, OperatorI } from "@/pages/OperatorMarket/operator.model";
1213

1314
interface ParamConfigProps {
1415
operator: OperatorI;
1516
paramKey: string;
16-
param: any;
17+
param: ConfigI;
1718
onParamChange?: (operatorId: string, paramKey: string, value: any) => void;
1819
}
1920

@@ -23,15 +24,21 @@ const ParamConfig: React.FC<ParamConfigProps> = ({
2324
param,
2425
onParamChange,
2526
}) => {
27+
if (!param) return null;
2628
const [value, setValue] = React.useState(param.value || param.defaultVal);
2729
const updateValue = (newValue: any) => {
2830
setValue(newValue);
2931
return onParamChange && onParamChange(operator.id, paramKey, newValue);
3032
};
33+
3134
switch (param.type) {
3235
case "input":
3336
return (
34-
<Form.Item label={param.name} tooltip={param.description} key={paramKey}>
37+
<Form.Item
38+
label={param.name}
39+
tooltip={param.description}
40+
key={paramKey}
41+
>
3542
<Input
3643
value={value}
3744
onChange={(e) => updateValue(e.target.value)}
@@ -42,7 +49,11 @@ const ParamConfig: React.FC<ParamConfigProps> = ({
4249
);
4350
case "select":
4451
return (
45-
<Form.Item label={param.name} tooltip={param.description} key={paramKey}>
52+
<Form.Item
53+
label={param.name}
54+
tooltip={param.description}
55+
key={paramKey}
56+
>
4657
<Select
4758
value={value}
4859
onChange={updateValue}
@@ -58,7 +69,11 @@ const ParamConfig: React.FC<ParamConfigProps> = ({
5869
);
5970
case "radio":
6071
return (
61-
<Form.Item label={param.name} tooltip={param.description} key={paramKey}>
72+
<Form.Item
73+
label={param.name}
74+
tooltip={param.description}
75+
key={paramKey}
76+
>
6277
<Radio.Group
6378
value={value}
6479
onChange={(e) => updateValue(e.target.value)}
@@ -76,7 +91,11 @@ const ParamConfig: React.FC<ParamConfigProps> = ({
7691
);
7792
case "checkbox":
7893
return (
79-
<Form.Item label={param.name} tooltip={param.description} key={paramKey}>
94+
<Form.Item
95+
label={param.name}
96+
tooltip={param.description}
97+
key={paramKey}
98+
>
8099
<Checkbox.Group
81100
value={value}
82101
onChange={updateValue}
@@ -86,17 +105,22 @@ const ParamConfig: React.FC<ParamConfigProps> = ({
86105
);
87106
case "slider":
88107
return (
89-
<Form.Item label={param.name} tooltip={param.description} key={paramKey}>
108+
<Form.Item
109+
label={param.name}
110+
tooltip={param.description}
111+
key={paramKey}
112+
>
90113
<div className="flex items-center gap-1">
91114
<Slider
92115
value={value}
93116
onChange={updateValue}
94117
tooltip={{ open: true }}
95118
marks={{
96-
[param.min || 0]: param.minLabel || `${param.min || 0}`,
97-
[param.min + (param.max - param.min) / 2]:
98-
param.midLabel || `${(param.min + param.max) / 2}`,
99-
[param.max || 100]: param.maxLabel || `${param.max || 100}`,
119+
[param.min || 0]: `${param.min || 0}`,
120+
[param.min + (param.max - param.min) / 2]: `${
121+
(param.min + param.max) / 2
122+
}`,
123+
[param.max || 100]: `${param.max || 100}`,
100124
}}
101125
min={param.min || 0}
102126
max={param.max || 100}
@@ -114,28 +138,94 @@ const ParamConfig: React.FC<ParamConfigProps> = ({
114138
</div>
115139
</Form.Item>
116140
);
117-
case "range":
141+
case "range": {
142+
const min = param.min || 0;
143+
const max = param.max || 100;
118144
return (
119-
<Form.Item label={param.name} tooltip={param.description} key={paramKey}>
145+
<Form.Item
146+
label={param.name}
147+
tooltip={param.description}
148+
key={paramKey}
149+
>
120150
<Slider
121151
value={Array.isArray(value) ? value : [value, value]}
122152
onChange={(val) =>
123153
updateValue(Array.isArray(val) ? val : [val, val])
124154
}
125155
range
126-
marks={{
127-
[param.min || 0]: param.minLabel || `${param.min || 0}`,
128-
[param.min + (param.max - param.min) / 2]:
129-
param.midLabel || `${(param.min + param.max) / 2}`,
130-
[param.max || 100]: param.maxLabel || `${param.max || 100}`,
131-
}}
132-
min={param.min || 0}
133-
max={param.max || 100}
156+
min={min}
157+
max={max}
134158
step={param.step || 1}
135159
className="w-full"
136160
/>
161+
<Space>
162+
<InputNumber
163+
min={min}
164+
max={max}
165+
value={value[0]}
166+
onChange={(val1) => updateValue([val1, value[1]])}
167+
changeOnWheel
168+
/>
169+
~
170+
<InputNumber
171+
min={min}
172+
max={max}
173+
value={value[1]}
174+
onChange={(val2) => updateValue([value[0], val2])}
175+
changeOnWheel
176+
/>
177+
</Space>
137178
</Form.Item>
138179
);
180+
}
181+
case "inputNumber":
182+
return (
183+
<Form.Item
184+
label={param.name}
185+
tooltip={param.description}
186+
key={paramKey}
187+
>
188+
<InputNumber
189+
value={value}
190+
onChange={(val) => updateValue(val)}
191+
placeholder={`请输入${param.name}`}
192+
className="w-full"
193+
min={param.min}
194+
max={param.max}
195+
step={param.step || 1}
196+
/>
197+
</Form.Item>
198+
);
199+
200+
case "switch":
201+
return (
202+
<Form.Item
203+
label={param.name}
204+
tooltip={param.description}
205+
key={paramKey}
206+
>
207+
<Checkbox
208+
checked={value as boolean}
209+
onChange={(e) => updateValue(e.target.checked)}
210+
>
211+
{param.name}
212+
</Checkbox>
213+
</Form.Item>
214+
);
215+
case "multiple":
216+
return (
217+
<div className="pl-4 border-l border-gray-300">
218+
{param.properties.map((subParam) => (
219+
<Config
220+
key={subParam.key}
221+
operator={operator}
222+
paramKey={subParam.key}
223+
param={subParam}
224+
onParamChange={onParamChange}
225+
/>
226+
))}
227+
</div>
228+
);
139229
default:
140230
return null;
141231
}

frontend/src/pages/DataCleansing/Create/hooks/useOperatorOperations.ts

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,6 @@ export function useOperatorOperations() {
106106
paramKey: string,
107107
value: any
108108
) => {
109-
console.log(operatorId, paramKey, value);
110-
111109
setSelectedOperators((prev) =>
112110
prev.map((op) =>
113111
op.id === operatorId

frontend/src/pages/OperatorMarket/operator.model.ts

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,26 @@
1+
export interface ConfigI {
2+
type:
3+
| "input"
4+
| "select"
5+
| "radio"
6+
| "checkbox"
7+
| "range"
8+
| "slider"
9+
| "inputNumber"
10+
| "switch"
11+
| "multiple";
12+
value?: number | string | boolean | string[] | number[];
13+
required?: boolean;
14+
description?: string;
15+
key: string;
16+
defaultVal: number | string | boolean | string[];
17+
options?: string[] | { label: string; value: string }[];
18+
min?: number;
19+
max?: number;
20+
step?: number;
21+
properties?: ConfigI[]; // 用于嵌套配置
22+
}
23+
124
export interface OperatorI {
225
id: string;
326
name: string;
@@ -12,15 +35,7 @@ export interface OperatorI {
1235
overrides?: { [key: string]: any }; // 用户配置的参数
1336
defaultParams?: { [key: string]: any }; // 默认参数
1437
configs: {
15-
[key: string]: {
16-
type: "input" | "select" | "radio" | "checkbox" | "range";
17-
label: string;
18-
value: any;
19-
options?: string[] | { label: string; value: any }[];
20-
min?: number;
21-
max?: number;
22-
step?: number;
23-
};
38+
[key: string]: ConfigI;
2439
};
2540
}
2641

scripts/db/data-engine-operator-init.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ VALUES ('TextFormatter', 'TXT文本抽取', '抽取TXT中的文本。', '1.0.0',
7474
('FileWithHighSpecialCharRateFilter', '文档特殊字符率检查', '去除特殊字符过多的文档。', '1.0.0', 'text', 'text', null, '{"specialCharRatio": {"name": "文档特殊字符率", "description": "特殊字符的统计数/文档总字数 > 设定值,该文档被去除。", "type": "slider", "defaultVal": 0.3, "min": 0, "max": 1, "step": 0.1}}', '', 'false'),
7575
('DuplicateFilesFilter', '相似文档去除', '相似文档去除。', '1.0.0', 'text', 'text', null, '{"fileDuplicateThreshold": {"name": "文档相似度", "description": "基于MinHash算法和Jaccard相似度,计算当前文档与数据集中其它文档相似性,超过设定值,该文档被去除。", "type": "slider", "defaultVal": 0.5, "min": 0, "max": 1, "step": 0.1}}', '', 'false'),
7676
('FileWithManySensitiveWordsFilter', '文档敏感词率检查', '去除敏感词过多的文档。', '1.0.0', 'text', 'text', null, '{"sensitiveWordsRate": {"name": "文档敏感词率", "description": "敏感词的字数/文档总字数 > 设定值,该文档被去除。", "type": "slider", "defaultVal": 0.01, "min": 0, "max": 1, "step": 0.01}}', '', 'false'),
77-
('FileWithShortOrLongLengthFilter', '文档字数检查', '字数不在指定范围会被过滤掉。', '1.0.0', 'text', 'text', null, '{"fileLength": {"name": "文档字数", "description": "过滤字数不在指定范围内的文档,如[10,10000000]。若输入为空,则不对字数上/下限做限制。", "type": "range", "properties": [{"name": "fileMinimumLength", "type": "inputNumber", "defaultVal": 10, "min": 0, "max": 10000000000000000, "step": 1}, {"name": "fileMaximumLength", "type": "inputNumber", "defaultVal": 10000000, "min": 0, "max": 10000000000000000, "step": 1}]}}', '', 'false'),
77+
('FileWithShortOrLongLengthFilter', '文档字数检查', '字数不在指定范围会被过滤掉。', '1.0.0', 'text', 'text', null, '{"fileLength": {"name": "文档字数", "description": "过滤字数不在指定范围内的文档,如[10,10000000]。若输入为空,则不对字数上/下限做限制。", "type": "range", "defaultVal": [10, 10000000], "min": 0, "max": 10000000000000000, "step": 1}}', '', 'false'),
7878
('ContentCleaner', '文档目录去除', '去除文档中的目录。', '1.0.0', 'text', 'text', null, null, '', 'false'),
7979
('AnonymizedCreditCardNumber', '信用卡号匿名化', '信用卡号匿名化', '1.0.0', 'text', 'text', null, null, '', 'false'),
8080
('EmailNumberCleaner', '邮件地址匿名化', '邮件地址匿名化', '1.0.0', 'text', 'text', null, null, '', 'false'),

0 commit comments

Comments
 (0)