Skip to content

Commit 3557c0c

Browse files
authored
fix: dataset updeletion bugs (#424)
* ⚡ delete huge recursive folder once for all * ⚡ latent upload folder error * 🔒 Uncontrolled data used in path expression #98
1 parent c01bfd6 commit 3557c0c

File tree

10 files changed

+356
-101
lines changed

10 files changed

+356
-101
lines changed

backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java

Lines changed: 123 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetRepository;
2525
import com.datamate.datamanagement.interfaces.converter.DatasetConverter;
2626
import com.datamate.datamanagement.interfaces.dto.AddFilesRequest;
27+
import com.datamate.datamanagement.interfaces.dto.BatchDeleteFilesRequest;
2728
import com.datamate.datamanagement.interfaces.dto.CreateDirectoryRequest;
2829
import com.datamate.datamanagement.interfaces.dto.UploadFileRequest;
2930
import com.datamate.datamanagement.interfaces.dto.UploadFilesPreRequest;
@@ -239,21 +240,89 @@ public void deleteDatasetFile(String datasetId, String fileId, String prefix) {
239240
// 删除文件时,上传到数据集中的文件会同时删除数据库中的记录和文件系统中的文件,归集过来的文件仅删除数据库中的记录
240241
if (file.getFilePath().startsWith(dataset.getPath())) {
241242
try {
242-
Path filePath = Paths.get(file.getFilePath());
243+
Path filePath = validateAndResolvePath(file.getFilePath(), dataset.getPath());
243244
Files.deleteIfExists(filePath);
244245
} catch (IOException ex) {
245246
throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR);
246247
}
247248
}
248249
}
249250

251+
/**
252+
* 批量删除文件
253+
*/
254+
@Transactional
255+
public void batchDeleteFiles(String datasetId, BatchDeleteFilesRequest request) {
256+
Dataset dataset = datasetRepository.getById(datasetId);
257+
if (dataset == null) {
258+
throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND);
259+
}
260+
261+
List<String> fileIds = request.getFileIds();
262+
if (fileIds == null || fileIds.isEmpty()) {
263+
throw BusinessException.of(CommonErrorCode.PARAM_ERROR);
264+
}
265+
266+
List<DatasetFile> filesToDelete = new ArrayList<>();
267+
List<String> failedFileIds = new ArrayList<>();
268+
269+
for (String fileId : fileIds) {
270+
try {
271+
DatasetFile file = getDatasetFile(dataset, fileId, request.getPrefix());
272+
filesToDelete.add(file);
273+
datasetFileRepository.removeById(fileId);
274+
} catch (Exception e) {
275+
log.error("Failed to delete file with id: {}", fileId, e);
276+
failedFileIds.add(fileId);
277+
}
278+
}
279+
280+
// 更新数据集(避免 ConcurrentModificationException)
281+
List<DatasetFile> datasetFiles = dataset.getFiles();
282+
if (datasetFiles != null) {
283+
// 创建一个新的列表来存储要保留的文件
284+
List<DatasetFile> remainingFiles = new ArrayList<>(datasetFiles);
285+
// 移除要删除的文件
286+
remainingFiles.removeAll(filesToDelete);
287+
dataset.setFiles(remainingFiles);
288+
}
289+
datasetRepository.updateById(dataset);
290+
291+
// 删除文件系统中的文件
292+
for (DatasetFile file : filesToDelete) {
293+
// 上传到数据集中的文件会同时删除数据库中的记录和文件系统中的文件,归集过来的文件仅删除数据库中的记录
294+
if (file.getFilePath().startsWith(dataset.getPath())) {
295+
try {
296+
Path filePath = validateAndResolvePath(file.getFilePath(), dataset.getPath());
297+
Files.deleteIfExists(filePath);
298+
} catch (IllegalArgumentException ex) {
299+
log.warn("Invalid file path detected, skipping deletion: {}", file.getFilePath());
300+
} catch (IOException ex) {
301+
log.error("Failed to delete file from filesystem: {}", file.getFilePath(), ex);
302+
}
303+
}
304+
}
305+
306+
// 如果有失败的文件,记录日志但不抛出异常
307+
if (!failedFileIds.isEmpty()) {
308+
log.warn("Failed to delete {} files out of {}", failedFileIds.size(), fileIds.size());
309+
}
310+
}
311+
250312
/**
251313
* 下载文件
252314
*/
253315
@Transactional(readOnly = true)
254316
public Resource downloadFile(DatasetFile file) {
255317
try {
256-
Path filePath = Paths.get(file.getFilePath()).normalize();
318+
// 获取对应的数据集以验证路径安全性
319+
Dataset dataset = datasetRepository.getById(file.getDatasetId());
320+
if (dataset == null) {
321+
throw new RuntimeException("Dataset not found for file: " + file.getFileName());
322+
}
323+
324+
// 验证路径安全性,防止路径遍历攻击
325+
Path filePath = validateAndResolvePath(file.getFilePath(), dataset.getPath());
257326
log.info("start download file {}", file.getFilePath());
258327
Resource resource = new UrlResource(filePath.toUri());
259328
if (resource.exists()) {
@@ -637,10 +706,14 @@ public void deleteDirectory(String datasetId, String prefix) {
637706
throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR);
638707
}
639708

640-
// 更新数据集
641-
dataset.setFiles(filesToDelete);
642-
for (DatasetFile file : filesToDelete) {
643-
dataset.removeFile(file);
709+
// 更新数据集(避免 ConcurrentModificationException,先获取文件列表再删除)
710+
List<DatasetFile> datasetFiles = dataset.getFiles();
711+
if (datasetFiles != null) {
712+
// 创建一个新的列表来存储要保留的文件
713+
List<DatasetFile> remainingFiles = new ArrayList<>(datasetFiles);
714+
// 移除要删除的文件
715+
remainingFiles.removeAll(filesToDelete);
716+
dataset.setFiles(remainingFiles);
644717
}
645718
datasetRepository.updateById(dataset);
646719
}
@@ -867,8 +940,24 @@ private void addFile(String sourPath, String targetPath, boolean softAdd) {
867940
if (StringUtils.isBlank(sourPath) || StringUtils.isBlank(targetPath)) {
868941
return;
869942
}
870-
Path source = Paths.get(sourPath).normalize();
871-
Path target = Paths.get(targetPath).normalize();
943+
944+
// 规范化并验证源文件路径
945+
Path source;
946+
try {
947+
source = Paths.get(sourPath).normalize();
948+
} catch (Exception e) {
949+
log.warn("Invalid source file path: {}", sourPath);
950+
throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR);
951+
}
952+
953+
// 规范化并验证目标文件路径
954+
Path target;
955+
try {
956+
target = Paths.get(targetPath).normalize();
957+
} catch (Exception e) {
958+
log.warn("Invalid target file path: {}", targetPath);
959+
throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR);
960+
}
872961

873962
// 检查源文件是否存在且为普通文件
874963
if (!Files.exists(source) || !Files.isRegularFile(source)) {
@@ -926,4 +1015,30 @@ private static DatasetFile getDatasetFileForAdd(AddFilesRequest req, AddFilesReq
9261015
.metadata(objectMapper.writeValueAsString(file.getMetadata()))
9271016
.build();
9281017
}
1018+
1019+
/**
1020+
* 安全地验证并获取文件路径,防止路径遍历攻击
1021+
*
1022+
* @param filePath 用户提供的文件路径
1023+
* @param basePath 允许的基础路径(数据集路径)
1024+
* @return 规范化后的绝对路径
1025+
* @throws IllegalArgumentException 如果路径不在基础路径内
1026+
*/
1027+
private Path validateAndResolvePath(String filePath, String basePath) {
1028+
if (StringUtils.isEmpty(filePath)) {
1029+
throw new IllegalArgumentException("File path cannot be empty");
1030+
}
1031+
1032+
Path normalizedPath = Paths.get(filePath).normalize();
1033+
Path normalizedBasePath = Paths.get(basePath).normalize();
1034+
1035+
// 验证规范化后的路径是否在基础路径内
1036+
if (!normalizedPath.startsWith(normalizedBasePath)) {
1037+
throw new IllegalArgumentException(
1038+
"File path is outside the allowed directory: " + filePath
1039+
);
1040+
}
1041+
1042+
return normalizedPath;
1043+
}
9291044
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
package com.datamate.datamanagement.interfaces.dto;
2+
3+
import jakarta.validation.constraints.NotEmpty;
4+
import lombok.AllArgsConstructor;
5+
import lombok.Data;
6+
import lombok.NoArgsConstructor;
7+
8+
import java.util.List;
9+
10+
/**
11+
* 批量删除文件请求
12+
*/
13+
@Data
14+
@NoArgsConstructor
15+
@AllArgsConstructor
16+
public class BatchDeleteFilesRequest {
17+
18+
/**
19+
* 要删除的文件ID列表
20+
*/
21+
@NotEmpty(message = "文件ID列表不能为空")
22+
private List<String> fileIds;
23+
24+
/**
25+
* 文件路径前缀(用于处理子目录中的文件)
26+
*/
27+
private String prefix = "";
28+
}

backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UploadFileRequest.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
package com.datamate.datamanagement.interfaces.dto;
22

3-
import com.datamate.datamanagement.interfaces.validation.ValidFileName;
3+
import com.datamate.datamanagement.interfaces.validation.ValidFilePath;
44
import jakarta.validation.constraints.Min;
55
import jakarta.validation.constraints.NotBlank;
66
import jakarta.validation.constraints.NotNull;
@@ -25,9 +25,9 @@ public class UploadFileRequest {
2525
@Min(value = 0, message = "文件编号必须为非负整数")
2626
private int fileNo;
2727

28-
/** 文件名称 */
28+
/** 文件名称(支持相对路径,用于文件夹上传) */
2929
@NotBlank(message = "文件名称不能为空")
30-
@ValidFileName
30+
@ValidFilePath
3131
@Size(max = 255, message = "文件名称长度不能超过255个字符")
3232
private String fileName;
3333

backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/rest/DatasetFileController.java

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
1212
import com.datamate.datamanagement.interfaces.converter.DatasetConverter;
1313
import com.datamate.datamanagement.interfaces.dto.AddFilesRequest;
14+
import com.datamate.datamanagement.interfaces.dto.BatchDeleteFilesRequest;
1415
import com.datamate.datamanagement.interfaces.dto.CopyFilesRequest;
1516
import com.datamate.datamanagement.interfaces.dto.CreateDirectoryRequest;
1617
import com.datamate.datamanagement.interfaces.dto.DatasetFileResponse;
@@ -88,6 +89,21 @@ public ResponseEntity<Response<Void>> deleteDatasetFile(
8889
}
8990
}
9091

92+
/**
93+
* 批量删除文件
94+
*/
95+
@DeleteMapping("/batch")
96+
public ResponseEntity<Response<Void>> batchDeleteFiles(
97+
@PathVariable("datasetId") String datasetId,
98+
@RequestBody @Valid BatchDeleteFilesRequest request) {
99+
try {
100+
datasetFileApplicationService.batchDeleteFiles(datasetId, request);
101+
return ResponseEntity.ok().build();
102+
} catch (IllegalArgumentException e) {
103+
return ResponseEntity.status(HttpStatus.NOT_FOUND).body(Response.error(SystemErrorCode.UNKNOWN_ERROR, null));
104+
}
105+
}
106+
91107
@IgnoreResponseWrap
92108
@GetMapping(value = "/{fileId}/download", produces = MediaType.APPLICATION_OCTET_STREAM_VALUE + ";charset=UTF-8")
93109
public ResponseEntity<Resource> downloadDatasetFileById(@PathVariable("datasetId") String datasetId,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
package com.datamate.datamanagement.interfaces.validation;
2+
3+
import jakarta.validation.Constraint;
4+
import jakarta.validation.Payload;
5+
6+
import java.lang.annotation.*;
7+
8+
/**
9+
* 文件路径校验注解
10+
* 验证文件路径不包含非法字符(允许 / 用于支持文件夹上传)
11+
*
12+
* @author DataMate
13+
* @since 2026/03/12
14+
*/
15+
@Documented
16+
@Constraint(validatedBy = ValidFilePathValidator.class)
17+
@Target({ElementType.FIELD, ElementType.PARAMETER})
18+
@Retention(RetentionPolicy.RUNTIME)
19+
public @interface ValidFilePath {
20+
21+
String message() default "文件路径包含非法字符";
22+
23+
Class<?>[] groups() default {};
24+
25+
Class<? extends Payload>[] payload() default {};
26+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
package com.datamate.datamanagement.interfaces.validation;
2+
3+
import com.datamate.datamanagement.infrastructure.exception.DataManagementErrorCode;
4+
import jakarta.validation.ConstraintValidator;
5+
import jakarta.validation.ConstraintValidatorContext;
6+
7+
import java.util.regex.Pattern;
8+
9+
/**
10+
* 文件路径校验器
11+
* 允许路径分隔符 / 用于支持文件夹上传
12+
*
13+
* @author DataMate
14+
* @since 2026/03/12
15+
*/
16+
public class ValidFilePathValidator implements ConstraintValidator<ValidFilePath, String> {
17+
18+
/**
19+
* 文件路径正则表达式
20+
* 不允许包含特殊字符: \ : * ? " < > | \0
21+
* 允许字母、数字、中文、常见符号(- _ . space /)
22+
* 注意:允许 / 是为了支持文件夹上传的相对路径
23+
*/
24+
private static final Pattern FILE_PATH_PATTERN = Pattern.compile(
25+
"^[^\\\\:*?\"<>|\\x00]+$"
26+
);
27+
28+
@Override
29+
public boolean isValid(String value, ConstraintValidatorContext context) {
30+
if (value == null || value.isEmpty()) {
31+
return true; // 空值由 @NotBlank 等其他注解处理
32+
}
33+
34+
boolean isValid = FILE_PATH_PATTERN.matcher(value).matches();
35+
36+
if (!isValid) {
37+
context.disableDefaultConstraintViolation();
38+
context.buildConstraintViolationWithTemplate(
39+
DataManagementErrorCode.FILE_NAME_INVALID.getMessage()
40+
).addConstraintViolation();
41+
}
42+
43+
return isValid;
44+
}
45+
}

backend/shared/domain-common/src/main/java/com/datamate/common/domain/utils/ChunksSaver.java

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,24 @@ public static Optional<File> save(ChunkUploadRequest fileUploadRequest, ChunkUpl
4949
}
5050

5151
File finalFile = new File(preUploadReq.getUploadPath(), fileUploadRequest.getFileName());
52+
// 确保父目录存在(处理嵌套文件夹上传的情况)
53+
File parentDir = finalFile.getParentFile();
54+
if (parentDir != null && !parentDir.exists()) {
55+
try {
56+
boolean created = parentDir.mkdirs();
57+
if (!created && !parentDir.exists()) {
58+
// mkdirs 返回 false 且目录仍不存在,才是真正的失败
59+
log.error("failed to create parent directory for file:{}, req Id:{}", finalFile.getPath(), fileUploadRequest.getReqId());
60+
throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR);
61+
}
62+
} catch (Exception e) {
63+
log.error("failed to create parent directory for file:{}, req Id:{}, error:{}", finalFile.getPath(), fileUploadRequest.getReqId(), e.getMessage(), e);
64+
throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR);
65+
}
66+
}
5267
if (!targetFile.renameTo(finalFile)) {
5368
log.error("failed to mv file:{}, req Id:{}", targetFile.getName(), fileUploadRequest.getReqId());
54-
throw new IllegalArgumentException("failed to move file to target dir");
69+
throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR);
5570
}
5671
log.debug("save chunk {} cost {}", fileUploadRequest.getChunkNo(),
5772
ChronoUnit.MILLIS.between(startTime, LocalDateTime.now()));
@@ -76,6 +91,21 @@ private static InputStream getFileInputStream(MultipartFile file) {
7691
public static File saveFile(ChunkUploadRequest fileUploadRequest, ChunkUploadPreRequest preUploadReq) {
7792
// 保存文件
7893
File targetFile = new File(preUploadReq.getUploadPath(), fileUploadRequest.getFileName());
94+
// 确保父目录存在(处理嵌套文件夹上传的情况)
95+
File parentDir = targetFile.getParentFile();
96+
if (parentDir != null && !parentDir.exists()) {
97+
try {
98+
boolean created = parentDir.mkdirs();
99+
if (!created && !parentDir.exists()) {
100+
// mkdirs 返回 false 且目录仍不存在,才是真正的失败
101+
log.error("failed to create parent directory for file:{}", targetFile.getPath());
102+
throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR);
103+
}
104+
} catch (Exception e) {
105+
log.error("failed to create parent directory for file:{}, error:{}", targetFile.getPath(), e.getMessage(), e);
106+
throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR);
107+
}
108+
}
79109
try {
80110
log.info("file path {}, file size {}", targetFile.toPath(), targetFile.getTotalSpace());
81111
FileUtils.copyInputStreamToFile(getFileInputStream(fileUploadRequest.getFile()), targetFile);

0 commit comments

Comments
 (0)