Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 102 additions & 0 deletions app-builder/plugins/aipp-file-extract-excel/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<parent>
<groupId>modelengine.fit.jade</groupId>
<artifactId>app-builder-plugin-parent</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>

<groupId>modelengine.fit.jade.plugin</groupId>
<artifactId>aipp-file-extract-excel</artifactId>

<dependencies>
<!-- FIT -->
<dependency>
<groupId>org.fitframework</groupId>
<artifactId>fit-api</artifactId>
</dependency>
<dependency>
<groupId>org.fitframework</groupId>
<artifactId>fit-util</artifactId>
</dependency>

<!-- fast excel -->
<dependency>
<groupId>cn.idev.excel</groupId>
<artifactId>fastexcel</artifactId>
</dependency>

<!-- Services -->
<dependency>
<groupId>modelengine.fit.jade</groupId>
<artifactId>aipp-file-extract-service</artifactId>
</dependency>
<dependency>
<groupId>modelengine.fit.jade</groupId>
<artifactId>aipp-service</artifactId>
</dependency>

<!-- Tests -->
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.fitframework</groupId>
<artifactId>fit-test-framework</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<scope>test</scope>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.fitframework</groupId>
<artifactId>fit-build-maven-plugin</artifactId>
<version>${fit.version}</version>
<executions>
<execution>
<id>build-plugin</id>
<goals>
<goal>build-plugin</goal>
</goals>
</execution>
<execution>
<id>package-plugin</id>
<goals>
<goal>package-plugin</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<version>${maven.antrun.version}</version>
<executions>
<execution>
<phase>install</phase>
<configuration>
<target>
<copy file="${project.build.directory}/${project.build.finalName}.jar"
todir="../../../build/plugins"/>
</target>
</configuration>
<goals>
<goal>run</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
* This file is a part of the ModelEngine Project.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/

package modelengine.fit.jade.aipp.file.extract;

import cn.idev.excel.ExcelReader;
import cn.idev.excel.FastExcel;
import cn.idev.excel.context.AnalysisContext;
import cn.idev.excel.converters.Converter;
import cn.idev.excel.enums.CellDataTypeEnum;
import cn.idev.excel.metadata.GlobalConfiguration;
import cn.idev.excel.metadata.data.DataFormatData;
import cn.idev.excel.metadata.data.ReadCellData;
import cn.idev.excel.metadata.property.ExcelContentProperty;
import cn.idev.excel.read.listener.ReadListener;
import cn.idev.excel.read.metadata.ReadSheet;
import cn.idev.excel.util.DateUtils;
import cn.idev.excel.util.StringUtils;
import lombok.NonNull;
import modelengine.fit.jober.aipp.service.OperatorService;
import modelengine.fitframework.annotation.Component;
import modelengine.fitframework.annotation.Fitable;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigDecimal;
import java.nio.file.Files;
import java.nio.file.InvalidPathException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

/**
* Excel文件的提取器。
*
* @author 黄政炫
* @since 2025-09-06
*/
@Component
public class ExcelFileExtractor implements FileExtractor {
/**
* 把单元格转换成格式化字符串。
*
* @param cell 表示单元格数据 {@link ReadCellData}。
* @return 转换后的内容 {@link String}。
*/
private static String getCellValueAsString(@NonNull ReadCellData<?> cell) {
switch (cell.getType()) {
case STRING:
return cell.getStringValue();
case NUMBER:
DataFormatData fmt = cell.getDataFormatData();
if (DateUtils.isADateFormat(fmt.getIndex(), fmt.getFormat())) {
double value = cell.getNumberValue().doubleValue();
Date date = DateUtils.getJavaDate(value, true);
return new SimpleDateFormat("yyyy-MM-dd").format(date);
} else {
BigDecimal num = cell.getNumberValue();
return num.stripTrailingZeros().toPlainString();
}
case BOOLEAN:
return Boolean.toString(cell.getBooleanValue());
default:
return "";
}
}

/**
* 该文件提取器支持EXCEL和CSV类型。
*
* @return 支持的枚举常量类型列表 {@link List}{@code <}{@link String}{@code >}。
*/
@Override
@Fitable(id = "get-fileType-excel")
public List<String> supportedFileTypes() {
return Arrays.asList(OperatorService.FileType.EXCEL.toString(), OperatorService.FileType.CSV.toString());
}

/**
* 判断文件路径是否有效
*
* @param fileUrl 表示文件路径 {@link String}。
* @return 表示路径是否有效 {@code boolean}。
*/
private boolean isValidPath(String fileUrl) {
try {
Path path = Paths.get(fileUrl);
return Files.exists(path) && Files.isRegularFile(path);
} catch (InvalidPathException e) {
return false;
}
}

/**
* 从指定路径的 Excel 文件中提取内容,并返回为字符串形式。
*
* @param fileUrl 表示文件路径的 {@link String}。
* @return 表示文件内容的 {@link String}。
*/
@Override
@Fitable(id = "extract-file-excel")
public String extractFile(String fileUrl) {
if (!isValidPath(fileUrl)) {
throw new IllegalArgumentException(String.format("Invalid FilePath. [fileUrl=%s]", fileUrl));
}
File file = Paths.get(fileUrl).toFile();
StringBuilder excelContent = new StringBuilder();
ExcelReadListener listener = new ExcelReadListener(excelContent);
ExcelReader reader = null;
try (InputStream is = new BufferedInputStream(Files.newInputStream(file.toPath()))) {
reader = FastExcel.read(is, listener)
.registerConverter(new CustomCellStringConverter())
.headRowNumber(0)
.build();

List<ReadSheet> sheets = reader.excelExecutor().sheetList();
for (ReadSheet meta : sheets) {
excelContent.append("Sheet ").append(meta.getSheetNo() + 1).append(':').append('\n');
ReadSheet readSheet = FastExcel.readSheet(meta.getSheetNo()).headRowNumber(0).build();
reader.read(readSheet);
}
excelContent.append('\n');
} catch (IOException e) {
throw new IllegalStateException(String.format("Fail to extract excel file. [exception=%s]", e.getMessage()),
e);
} finally {
if (reader != null) {
reader.finish(); // 关闭资源
}
}
return excelContent.toString();
}

/**
* 读取监听器的内部类实现。
*/
private class ExcelReadListener implements ReadListener<Map<Integer, String>> {
private final StringBuilder excelContent;

ExcelReadListener(StringBuilder excelContent) {
this.excelContent = excelContent;
}

@Override
public void invoke(Map<Integer, String> data, AnalysisContext context) {
String line = data.entrySet()
.stream()
.sorted(Map.Entry.comparingByKey())
.map(e -> e.getValue() == null ? "" : e.getValue())
.collect(Collectors.joining("\t"));
this.excelContent.append(line).append('\n');
}

@Override
public void doAfterAllAnalysed(AnalysisContext context) {}
}

/**
* 自定义单元格数据转换器。
* 该转换器实现了能够处理单元格数据并将其转换为字符串形式。
*/
public static class CustomCellStringConverter implements Converter<String> {
@Override
public Class<String> supportJavaTypeKey() {
return String.class;
}

@Override
public CellDataTypeEnum supportExcelTypeKey() {
return null;
}

@Override
public String convertToJavaData(ReadCellData<?> cellData, ExcelContentProperty contentProperty,
GlobalConfiguration globalConfiguration) {
return (cellData != null) ? getCellValueAsString(cellData) : StringUtils.EMPTY;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
fit:
beans:
packages:
- 'modelengine.fit.jade.aipp.file.extract'
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
* This file is a part of the ModelEngine Project.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/

package modelengine.fit.jade.aipp.file.extract;

import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertThrows;

import modelengine.fit.jober.aipp.service.OperatorService;
import modelengine.fitframework.annotation.Fit;
import modelengine.fitframework.test.annotation.FitTestWithJunit;

import org.junit.jupiter.api.DisplayName;
import org.junit.jupiter.api.Test;

import java.io.File;
import java.util.Arrays;
import java.util.List;

/**
* 表示{@link ExcelFileExtractor}的测试集。
*
* @author 黄政炫
* @since 2025-09-06
*/
@FitTestWithJunit(includeClasses = ExcelFileExtractor.class)
class ExcelFileExtractorTest {
@Fit
ExcelFileExtractor excelFileExtractor;

@Test
@DisplayName("测试获取支持文件类型")
void supportedFileType() {
List<String> supportedTypes =
Arrays.asList(OperatorService.FileType.EXCEL.toString(), OperatorService.FileType.CSV.toString());
assertThat(this.excelFileExtractor.supportedFileTypes()).isEqualTo(supportedTypes);
}

@Test
@DisplayName("测试能否捕获错误路径")
void validPath() {
assertThrows(IllegalArgumentException.class, () -> {
this.excelFileExtractor.extractFile("invalidPath.csv");
});
}

@Test
@DisplayName("测试 excel 文件提取成功")
void extractFile() {
File file = new File(this.getClass().getClassLoader().getResource("file/content.csv").getFile());
String expected = """
Sheet 1:
This is an excel test
ID\tName\tAge\tJoinDate\tActive\tSalary\tDepartment\tNotes
1\tJohn Doe\t25\t2023-01-15\tTRUE\t8000.50\tIT\tRegular employee
2\tJane Smith\t30\t2022-05-20\tTRUE\t12000.00\tMarketing\tTeam leader
3\tBob Johnson\t28\t2023-03-10\tFALSE\t7500.00\tSales\tLeft company
4\tAlice Brown\t35\t2020-12-01\tTRUE\t15000.75\tIT\tSenior engineer
5\tTom Wilson\t22\t2023-08-25\tTRUE\t6000.00\tHR\tIntern
6\t\t40\t2019-06-15\tTRUE\t18000.00\tFinance\tDepartment manager
7\tLucy Davis\t27\t2023-02-28\tFALSE\t7000.00\tOperations\tContract ended
8\tMike Miller\t32\t2021-09-10\tTRUE\t13500.50\tIT\tProject lead
9\tSarah Lee\t29\t2022-11-05\tTRUE\t9500.00\tMarketing\tMarketing specialist
10\tDavid Zhang\t26\t2023-07-12\tTRUE\t8500.25\tSales\tSales representative

""";
assertThat(this.excelFileExtractor.extractFile(file.getAbsolutePath())).isEqualTo(expected);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
This is an excel test
ID,Name,Age,JoinDate,Active,Salary,Department,Notes
1,John Doe,25,2023-01-15,TRUE,8000.50,IT,"Regular employee"
2,Jane Smith,30,2022-05-20,TRUE,12000.00,Marketing,"Team leader"
3,Bob Johnson,28,2023-03-10,FALSE,7500.00,Sales,"Left company"
4,Alice Brown,35,2020-12-01,TRUE,15000.75,IT,"Senior engineer"
5,Tom Wilson,22,2023-08-25,TRUE,6000.00,HR,"Intern"
6,,40,2019-06-15,TRUE,18000.00,Finance,"Department manager"
7,Lucy Davis,27,2023-02-28,FALSE,7000.00,Operations,"Contract ended"
8,Mike Miller,32,2021-09-10,TRUE,13500.50,IT,"Project lead"
9,Sarah Lee,29,2022-11-05,TRUE,9500.00,Marketing,"Marketing specialist"
10,David Zhang,26,2023-07-12,TRUE,8500.25,Sales,"Sales representative"
Loading