diff --git a/app-builder/plugins/aipp-file-extract-excel/pom.xml b/app-builder/plugins/aipp-file-extract-excel/pom.xml new file mode 100644 index 0000000000..174e17b214 --- /dev/null +++ b/app-builder/plugins/aipp-file-extract-excel/pom.xml @@ -0,0 +1,102 @@ + + + 4.0.0 + + + modelengine.fit.jade + app-builder-plugin-parent + 1.0.0-SNAPSHOT + + + modelengine.fit.jade.plugin + aipp-file-extract-excel + + + + + org.fitframework + fit-api + + + org.fitframework + fit-util + + + + + cn.idev.excel + fastexcel + + + + + modelengine.fit.jade + aipp-file-extract-service + + + modelengine.fit.jade + aipp-service + + + + + org.junit.jupiter + junit-jupiter + test + + + org.fitframework + fit-test-framework + test + + + org.assertj + assertj-core + test + + + + + + + org.fitframework + fit-build-maven-plugin + ${fit.version} + + + build-plugin + + build-plugin + + + + package-plugin + + package-plugin + + + + + + org.apache.maven.plugins + maven-antrun-plugin + ${maven.antrun.version} + + + install + + + + + + + run + + + + + + + \ No newline at end of file diff --git a/app-builder/plugins/aipp-file-extract-excel/src/main/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractor.java b/app-builder/plugins/aipp-file-extract-excel/src/main/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractor.java new file mode 100644 index 0000000000..07cf4d9c81 --- /dev/null +++ b/app-builder/plugins/aipp-file-extract-excel/src/main/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractor.java @@ -0,0 +1,189 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * This file is a part of the ModelEngine Project. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +package modelengine.fit.jade.aipp.file.extract; + +import cn.idev.excel.ExcelReader; +import cn.idev.excel.FastExcel; +import cn.idev.excel.context.AnalysisContext; +import cn.idev.excel.converters.Converter; +import cn.idev.excel.enums.CellDataTypeEnum; +import cn.idev.excel.metadata.GlobalConfiguration; +import cn.idev.excel.metadata.data.DataFormatData; +import cn.idev.excel.metadata.data.ReadCellData; +import cn.idev.excel.metadata.property.ExcelContentProperty; +import cn.idev.excel.read.listener.ReadListener; +import cn.idev.excel.read.metadata.ReadSheet; +import cn.idev.excel.util.DateUtils; +import cn.idev.excel.util.StringUtils; +import lombok.NonNull; +import modelengine.fit.jober.aipp.service.OperatorService; +import modelengine.fitframework.annotation.Component; +import modelengine.fitframework.annotation.Fitable; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.math.BigDecimal; +import java.nio.file.Files; +import java.nio.file.InvalidPathException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.text.SimpleDateFormat; +import java.util.Arrays; +import java.util.Date; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +/** + * Excel文件的提取器。 + * + * @author 黄政炫 + * @since 2025-09-06 + */ +@Component +public class ExcelFileExtractor implements FileExtractor { + /** + * 把单元格转换成格式化字符串。 + * + * @param cell 表示单元格数据 {@link ReadCellData}。 + * @return 转换后的内容 {@link String}。 + */ + private static String getCellValueAsString(@NonNull ReadCellData cell) { + switch (cell.getType()) { + case STRING: + return cell.getStringValue(); + case NUMBER: + DataFormatData fmt = cell.getDataFormatData(); + if (DateUtils.isADateFormat(fmt.getIndex(), fmt.getFormat())) { + double value = cell.getNumberValue().doubleValue(); + Date date = DateUtils.getJavaDate(value, true); + return new SimpleDateFormat("yyyy-MM-dd").format(date); + } else { + BigDecimal num = cell.getNumberValue(); + return num.stripTrailingZeros().toPlainString(); + } + case BOOLEAN: + return Boolean.toString(cell.getBooleanValue()); + default: + return ""; + } + } + + /** + * 该文件提取器支持EXCEL和CSV类型。 + * + * @return 支持的枚举常量类型列表 {@link List}{@code <}{@link String}{@code >}。 + */ + @Override + @Fitable(id = "get-fileType-excel") + public List supportedFileTypes() { + return Arrays.asList(OperatorService.FileType.EXCEL.toString(), OperatorService.FileType.CSV.toString()); + } + + /** + * 判断文件路径是否有效 + * + * @param fileUrl 表示文件路径 {@link String}。 + * @return 表示路径是否有效 {@code boolean}。 + */ + private boolean isValidPath(String fileUrl) { + try { + Path path = Paths.get(fileUrl); + return Files.exists(path) && Files.isRegularFile(path); + } catch (InvalidPathException e) { + return false; + } + } + + /** + * 从指定路径的 Excel 文件中提取内容,并返回为字符串形式。 + * + * @param fileUrl 表示文件路径的 {@link String}。 + * @return 表示文件内容的 {@link String}。 + */ + @Override + @Fitable(id = "extract-file-excel") + public String extractFile(String fileUrl) { + if (!isValidPath(fileUrl)) { + throw new IllegalArgumentException(String.format("Invalid FilePath. [fileUrl=%s]", fileUrl)); + } + File file = Paths.get(fileUrl).toFile(); + StringBuilder excelContent = new StringBuilder(); + ExcelReadListener listener = new ExcelReadListener(excelContent); + ExcelReader reader = null; + try (InputStream is = new BufferedInputStream(Files.newInputStream(file.toPath()))) { + reader = FastExcel.read(is, listener) + .registerConverter(new CustomCellStringConverter()) + .headRowNumber(0) + .build(); + + List sheets = reader.excelExecutor().sheetList(); + for (ReadSheet meta : sheets) { + excelContent.append("Sheet ").append(meta.getSheetNo() + 1).append(':').append('\n'); + ReadSheet readSheet = FastExcel.readSheet(meta.getSheetNo()).headRowNumber(0).build(); + reader.read(readSheet); + } + excelContent.append('\n'); + } catch (IOException e) { + throw new IllegalStateException(String.format("Fail to extract excel file. [exception=%s]", e.getMessage()), + e); + } finally { + if (reader != null) { + reader.finish(); // 关闭资源 + } + } + return excelContent.toString(); + } + + /** + * 读取监听器的内部类实现。 + */ + private class ExcelReadListener implements ReadListener> { + private final StringBuilder excelContent; + + ExcelReadListener(StringBuilder excelContent) { + this.excelContent = excelContent; + } + + @Override + public void invoke(Map data, AnalysisContext context) { + String line = data.entrySet() + .stream() + .sorted(Map.Entry.comparingByKey()) + .map(e -> e.getValue() == null ? "" : e.getValue()) + .collect(Collectors.joining("\t")); + this.excelContent.append(line).append('\n'); + } + + @Override + public void doAfterAllAnalysed(AnalysisContext context) {} + } + + /** + * 自定义单元格数据转换器。 + * 该转换器实现了能够处理单元格数据并将其转换为字符串形式。 + */ + public static class CustomCellStringConverter implements Converter { + @Override + public Class supportJavaTypeKey() { + return String.class; + } + + @Override + public CellDataTypeEnum supportExcelTypeKey() { + return null; + } + + @Override + public String convertToJavaData(ReadCellData cellData, ExcelContentProperty contentProperty, + GlobalConfiguration globalConfiguration) { + return (cellData != null) ? getCellValueAsString(cellData) : StringUtils.EMPTY; + } + } +} diff --git a/app-builder/plugins/aipp-file-extract-excel/src/main/resources/application.yml b/app-builder/plugins/aipp-file-extract-excel/src/main/resources/application.yml new file mode 100644 index 0000000000..bcb7e72b91 --- /dev/null +++ b/app-builder/plugins/aipp-file-extract-excel/src/main/resources/application.yml @@ -0,0 +1,4 @@ +fit: + beans: + packages: + - 'modelengine.fit.jade.aipp.file.extract' \ No newline at end of file diff --git a/app-builder/plugins/aipp-file-extract-excel/src/test/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractorTest.java b/app-builder/plugins/aipp-file-extract-excel/src/test/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractorTest.java new file mode 100644 index 0000000000..ca977c1e73 --- /dev/null +++ b/app-builder/plugins/aipp-file-extract-excel/src/test/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractorTest.java @@ -0,0 +1,72 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * This file is a part of the ModelEngine Project. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +package modelengine.fit.jade.aipp.file.extract; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import modelengine.fit.jober.aipp.service.OperatorService; +import modelengine.fitframework.annotation.Fit; +import modelengine.fitframework.test.annotation.FitTestWithJunit; + +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +import java.io.File; +import java.util.Arrays; +import java.util.List; + +/** + * 表示{@link ExcelFileExtractor}的测试集。 + * + * @author 黄政炫 + * @since 2025-09-06 + */ +@FitTestWithJunit(includeClasses = ExcelFileExtractor.class) +class ExcelFileExtractorTest { + @Fit + ExcelFileExtractor excelFileExtractor; + + @Test + @DisplayName("测试获取支持文件类型") + void supportedFileType() { + List supportedTypes = + Arrays.asList(OperatorService.FileType.EXCEL.toString(), OperatorService.FileType.CSV.toString()); + assertThat(this.excelFileExtractor.supportedFileTypes()).isEqualTo(supportedTypes); + } + + @Test + @DisplayName("测试能否捕获错误路径") + void validPath() { + assertThrows(IllegalArgumentException.class, () -> { + this.excelFileExtractor.extractFile("invalidPath.csv"); + }); + } + + @Test + @DisplayName("测试 excel 文件提取成功") + void extractFile() { + File file = new File(this.getClass().getClassLoader().getResource("file/content.csv").getFile()); + String expected = """ + Sheet 1: + This is an excel test + ID\tName\tAge\tJoinDate\tActive\tSalary\tDepartment\tNotes + 1\tJohn Doe\t25\t2023-01-15\tTRUE\t8000.50\tIT\tRegular employee + 2\tJane Smith\t30\t2022-05-20\tTRUE\t12000.00\tMarketing\tTeam leader + 3\tBob Johnson\t28\t2023-03-10\tFALSE\t7500.00\tSales\tLeft company + 4\tAlice Brown\t35\t2020-12-01\tTRUE\t15000.75\tIT\tSenior engineer + 5\tTom Wilson\t22\t2023-08-25\tTRUE\t6000.00\tHR\tIntern + 6\t\t40\t2019-06-15\tTRUE\t18000.00\tFinance\tDepartment manager + 7\tLucy Davis\t27\t2023-02-28\tFALSE\t7000.00\tOperations\tContract ended + 8\tMike Miller\t32\t2021-09-10\tTRUE\t13500.50\tIT\tProject lead + 9\tSarah Lee\t29\t2022-11-05\tTRUE\t9500.00\tMarketing\tMarketing specialist + 10\tDavid Zhang\t26\t2023-07-12\tTRUE\t8500.25\tSales\tSales representative + + """; + assertThat(this.excelFileExtractor.extractFile(file.getAbsolutePath())).isEqualTo(expected); + } +} \ No newline at end of file diff --git a/app-builder/plugins/aipp-file-extract-excel/src/test/resources/file/content.csv b/app-builder/plugins/aipp-file-extract-excel/src/test/resources/file/content.csv new file mode 100644 index 0000000000..4e03d459f0 --- /dev/null +++ b/app-builder/plugins/aipp-file-extract-excel/src/test/resources/file/content.csv @@ -0,0 +1,12 @@ +This is an excel test +ID,Name,Age,JoinDate,Active,Salary,Department,Notes +1,John Doe,25,2023-01-15,TRUE,8000.50,IT,"Regular employee" +2,Jane Smith,30,2022-05-20,TRUE,12000.00,Marketing,"Team leader" +3,Bob Johnson,28,2023-03-10,FALSE,7500.00,Sales,"Left company" +4,Alice Brown,35,2020-12-01,TRUE,15000.75,IT,"Senior engineer" +5,Tom Wilson,22,2023-08-25,TRUE,6000.00,HR,"Intern" +6,,40,2019-06-15,TRUE,18000.00,Finance,"Department manager" +7,Lucy Davis,27,2023-02-28,FALSE,7000.00,Operations,"Contract ended" +8,Mike Miller,32,2021-09-10,TRUE,13500.50,IT,"Project lead" +9,Sarah Lee,29,2022-11-05,TRUE,9500.00,Marketing,"Marketing specialist" +10,David Zhang,26,2023-07-12,TRUE,8500.25,Sales,"Sales representative" \ No newline at end of file diff --git a/app-builder/plugins/aipp-plugin/pom.xml b/app-builder/plugins/aipp-plugin/pom.xml index 53718801f6..8241a16855 100644 --- a/app-builder/plugins/aipp-plugin/pom.xml +++ b/app-builder/plugins/aipp-plugin/pom.xml @@ -143,6 +143,16 @@ org.redisson redisson + + + modelengine.fit.jade + aipp-file-extract-service + + + modelengine.fit.jade.plugin + aipp-file-extract-excel + test + diff --git a/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/service/impl/OperatorServiceImpl.java b/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/service/impl/OperatorServiceImpl.java index 93f14c6659..2095244114 100644 --- a/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/service/impl/OperatorServiceImpl.java +++ b/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/service/impl/OperatorServiceImpl.java @@ -10,6 +10,7 @@ import modelengine.fit.jober.aipp.common.exception.AippException; import modelengine.fit.jober.aipp.service.LLMService; import modelengine.fit.jober.aipp.service.OperatorService; +import modelengine.fit.jober.aipp.tool.FileExtractorContainer; import modelengine.fit.jober.aipp.util.AippFileUtils; import modelengine.fit.jober.aipp.util.AippStringUtils; import modelengine.fitframework.annotation.Component; @@ -20,12 +21,6 @@ import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.text.PDFTextStripper; import org.apache.poi.poifs.filesystem.FileMagic; -import org.apache.poi.ss.usermodel.Cell; -import org.apache.poi.ss.usermodel.DateUtil; -import org.apache.poi.ss.usermodel.Row; -import org.apache.poi.ss.usermodel.Sheet; -import org.apache.poi.ss.usermodel.Workbook; -import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.apache.poi.xwpf.extractor.XWPFWordExtractor; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.apache.poi.xwpf.usermodel.XWPFParagraph; @@ -41,13 +36,10 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Paths; -import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.Date; import java.util.EnumMap; -import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Objects; @@ -97,8 +89,8 @@ public class OperatorServiceImpl implements OperatorService { private final LLMService llmService; private final BrokerClient client; + private final FileExtractorContainer fileExtractorContainer; private final Function pdfExtractor = this::extractPdfFile; - private final Function excelExtractor = this::extractExcelFile; private final Function wordExtractor = this::extractWordFile; private final Function textExtractor = this::extractTextFile; private final EnumMap> outlineOperatorMap = @@ -113,7 +105,6 @@ public class OperatorServiceImpl implements OperatorService { { put(FileType.PDF, pdfExtractor); put(FileType.WORD, wordExtractor); - put(FileType.EXCEL, excelExtractor); put(FileType.TXT, textExtractor); put(FileType.HTML, textExtractor); put(FileType.MARKDOWN, textExtractor); @@ -121,30 +112,11 @@ public class OperatorServiceImpl implements OperatorService { } }; - public OperatorServiceImpl(LLMService llmService, BrokerClient client) { + public OperatorServiceImpl(LLMService llmService, BrokerClient client, + FileExtractorContainer fileExtractorContainer) { this.llmService = llmService; this.client = client; - } - - private static String getCellValueAsString(Cell cell) { - switch (cell.getCellType()) { - case STRING: - return cell.getStringCellValue(); - case NUMERIC: - if (DateUtil.isCellDateFormatted(cell)) { - Date dateCellValue = cell.getDateCellValue(); - SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); - return dateFormat.format(dateCellValue); - } else { - return Double.toString(cell.getNumericCellValue()); - } - case BOOLEAN: - return Boolean.toString(cell.getBooleanCellValue()); - case FORMULA: - return cell.getCellFormula(); - default: - return ""; - } + this.fileExtractorContainer = fileExtractorContainer; } private static String extractDocHandle(InputStream fis, String fileName) throws IOException { @@ -248,42 +220,12 @@ public File createDoc(String instanceId, String fileName, String txt) throws IOE public String fileExtractor(String fileUrl, Optional optionalFileType) { if (optionalFileType.isPresent()) { Function function = this.fileOperatorMap.get(optionalFileType.get()); - return Optional.ofNullable(function).map(f -> f.apply(fileUrl)).orElse(StringUtils.EMPTY); - } - return this.extractTextFile(fileUrl); - } + return fileExtractorContainer.extract(fileUrl, optionalFileType.get()) + .or(() -> Optional.ofNullable(function).map(f -> f.apply(fileUrl))) + .orElse(StringUtils.EMPTY); - private String iterExcel(Workbook workbook) { - StringBuilder excelContent = new StringBuilder(); - for (int sheetIndex = 0; sheetIndex < workbook.getNumberOfSheets(); sheetIndex++) { - Sheet sheet = workbook.getSheetAt(sheetIndex); - StringBuilder sheetContent = new StringBuilder(); - for (Row row : sheet) { - StringBuilder rowContent = new StringBuilder(); - Iterator cellIterator = row.cellIterator(); - while (cellIterator.hasNext()) { - Cell cell = cellIterator.next(); - String cellValue = getCellValueAsString(cell); - rowContent.append(cellValue).append("\t"); - } - sheetContent.append(rowContent.toString().trim()).append("\n"); - } - excelContent.append("Sheet ").append(sheetIndex + 1).append(":\n").append(sheetContent).append("\n"); - } - return excelContent.toString(); - } - - private String extractExcelFile(String fileUrl) { - File file = Paths.get(fileUrl).toFile(); - String excelContent = ""; - try (InputStream fis = new BufferedInputStream(Files.newInputStream(file.toPath()))) { - Workbook workbook = new XSSFWorkbook(fis); - excelContent = this.iterExcel(workbook); - } catch (IOException e) { - log.error("read excel fail.", e); - throw new AippException(AippErrCode.EXTRACT_FILE_FAILED); } - return excelContent; + return this.extractTextFile(fileUrl); } private String iterPdf(PDDocument doc) throws IOException { diff --git a/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileExtractorContainer.java b/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileExtractorContainer.java new file mode 100644 index 0000000000..d205b5c07a --- /dev/null +++ b/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileExtractorContainer.java @@ -0,0 +1,72 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * This file is a part of the ModelEngine Project. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +package modelengine.fit.jober.aipp.tool; + +import modelengine.fit.jade.aipp.file.extract.FileExtractor; +import modelengine.fit.jober.aipp.service.OperatorService; +import modelengine.fitframework.annotation.Component; +import modelengine.fitframework.log.Logger; +import modelengine.fitframework.util.CollectionUtils; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +/** + * 管理文件提取器的容器。 + * + * @author 黄政炫 + * @since 2025-09-06 + */ +@Component +public class FileExtractorContainer { + private static final Logger log = Logger.get(FileExtractorContainer.class); + + /** + * 一种文件类型对应一个提取器集合。 + */ + private final Map> fileExtractorMap; + + /** + * 初始化用框架注入提取器。 + * + * @param extractors 文件提取器 {@link FileExtractor}。 + */ + public FileExtractorContainer(List extractors) { + this.fileExtractorMap = new HashMap<>(); + for (FileExtractor fileExtractor : extractors) { + for (String supportedFileType : fileExtractor.supportedFileTypes()) { + this.fileExtractorMap.computeIfAbsent(supportedFileType, k -> new ArrayList<>()).add(fileExtractor); + } + } + } + + /** + * 根据文件类型找到支持文件类型的提取器。 + * + * @param fileUrl 文件路径 {@link String}。 + * @param fileType 文件枚举类型 {@link OperatorService.FileType}。 + * @return 提取的字符串 {@link Optional}{@code <}{@link String}{@code >}。 + */ + public Optional extract(String fileUrl, OperatorService.FileType fileType) { + if (fileType == null) { + log.warn("File type cannot be null."); + return Optional.empty(); + } + List extractors = this.fileExtractorMap.get(fileType.toString()); + if (CollectionUtils.isEmpty(extractors)) { + return Optional.empty(); + } + if (extractors.size() > 1) { + log.warn("Multiple extractors found, using the first one instead. [name={}]", + extractors.get(0).getClass().getSimpleName()); + } + return Optional.ofNullable(extractors.get(0)).map(extractor -> extractor.extractFile(fileUrl)); + } +} diff --git a/app-builder/plugins/aipp-plugin/src/test/java/modelengine/fit/jober/aipp/service/OperatorServiceImplTest.java b/app-builder/plugins/aipp-plugin/src/test/java/modelengine/fit/jober/aipp/service/OperatorServiceImplTest.java index a26b08114f..73b28ed9f3 100644 --- a/app-builder/plugins/aipp-plugin/src/test/java/modelengine/fit/jober/aipp/service/OperatorServiceImplTest.java +++ b/app-builder/plugins/aipp-plugin/src/test/java/modelengine/fit/jober/aipp/service/OperatorServiceImplTest.java @@ -8,7 +8,9 @@ import static org.assertj.core.api.Assertions.assertThat; +import modelengine.fit.jade.aipp.file.extract.ExcelFileExtractor; import modelengine.fit.jober.aipp.service.impl.OperatorServiceImpl; +import modelengine.fit.jober.aipp.tool.FileExtractorContainer; import modelengine.fitframework.annotation.Fit; import modelengine.fitframework.test.annotation.FitTestWithJunit; import modelengine.fitframework.test.annotation.Mock; @@ -26,7 +28,7 @@ * @author 兰宇晨 * @since 2025-01-15 */ -@FitTestWithJunit(includeClasses = OperatorServiceImpl.class) +@FitTestWithJunit(includeClasses = {OperatorServiceImpl.class, FileExtractorContainer.class, ExcelFileExtractor.class}) @Disabled public class OperatorServiceImplTest { @Fit @@ -64,8 +66,7 @@ void shouldOkWhenExtractExcelFile() { } private String getContent(String filePath, OperatorService.FileType fileType) { - String fileUrl = "/path/mockurl.mock"; File file = new File(this.getClass().getClassLoader().getResource(filePath).getFile()); - return this.operatorService.fileExtractor(fileUrl, Optional.of(fileType)); + return this.operatorService.fileExtractor(file.getAbsolutePath(), Optional.of(fileType)); } } diff --git a/app-builder/plugins/pom.xml b/app-builder/plugins/pom.xml index dd7180c795..4a0ab34250 100644 --- a/app-builder/plugins/pom.xml +++ b/app-builder/plugins/pom.xml @@ -18,6 +18,7 @@ aipp-custom-model-center aipp-document-extract-node aipp-extractor + aipp-file-extract-excel aipp-http-call aipp-loop-tool aipp-memory diff --git a/app-builder/services/aipp-file-extract-service/pom.xml b/app-builder/services/aipp-file-extract-service/pom.xml new file mode 100644 index 0000000000..87757a5187 --- /dev/null +++ b/app-builder/services/aipp-file-extract-service/pom.xml @@ -0,0 +1,62 @@ + + + 4.0.0 + + + modelengine.fit.jade + app-builder-service-parent + 1.0.0-SNAPSHOT + + + aipp-file-extract-service + + + + + org.fitframework + fit-api + + + org.fitframework + fit-util + + + + + modelengine.fit.jade + aipp-service + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + ${maven.compiler.version} + + ${java.version} + ${java.version} + ${project.build.sourceEncoding} + + -parameters + + + + + org.fitframework + fit-build-maven-plugin + ${fit.version} + + + build-service + + build-service + + + + + + + \ No newline at end of file diff --git a/app-builder/services/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/FileExtractor.java b/app-builder/services/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/FileExtractor.java new file mode 100644 index 0000000000..c3b8bb1049 --- /dev/null +++ b/app-builder/services/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/FileExtractor.java @@ -0,0 +1,36 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * This file is a part of the ModelEngine Project. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +package modelengine.fit.jade.aipp.file.extract; + +import modelengine.fitframework.annotation.Genericable; + +import java.util.List; + +/** + * Excel文件提取器的抽象接口。 + * + * @author 黄政炫 + * @since 2025-09-06 + */ +public interface FileExtractor { + /** + * 提取文件函数。 + * + * @param fileUrl 表示文件路径 {@link String}。 + * @return 表示提取的文件信息的 {@link String}。 + */ + @Genericable(id = "modelengine.fit.jade.file.extractFile") + String extractFile(String fileUrl); + + /** + * 返回提取器支持文件类型。 + * + * @return 支持的枚举常量类型列表 {@link List}{@code <}{@link String}{@code >}。 + */ + @Genericable(id = "modelengine.fit.jade.file.getFileTypes") + List supportedFileTypes(); +} diff --git a/app-builder/services/pom.xml b/app-builder/services/pom.xml index 952383ab08..46a9cd5419 100644 --- a/app-builder/services/pom.xml +++ b/app-builder/services/pom.xml @@ -16,6 +16,7 @@ aipp-classify-question aipp-code aipp-extractor + aipp-file-extract-service aipp-genericable aipp-http-call aipp-memory diff --git a/common/dependency/pom.xml b/common/dependency/pom.xml index b700cd44dd..72ea911cf5 100644 --- a/common/dependency/pom.xml +++ b/common/dependency/pom.xml @@ -58,6 +58,7 @@ 2.24 1.12.468 5.7.1 + 1.1.0 3.22.0 @@ -68,6 +69,7 @@ 2.16.1 + 1.0.0-SNAPSHOT @@ -370,6 +372,11 @@ aipp-extractor-service 1.0.0-SNAPSHOT + + modelengine.fit.jade + aipp-file-extract-service + 1.0.0-SNAPSHOT + @@ -559,6 +566,11 @@ opencsv ${opencsv.version} + + cn.idev.excel + fastexcel + ${fastexcel.version} + @@ -586,6 +598,11 @@ poi-scratchpad ${poi.version} + + modelengine.fit.jade.plugin + aipp-file-extract-excel + ${file-extract.version} +