From 381b95cadb9d45326e6f45f403443342eff50068 Mon Sep 17 00:00:00 2001 From: jsbjfkbsjk <2504892220@qq.com> Date: Fri, 5 Sep 2025 11:46:14 +0800 Subject: [PATCH 1/8] =?UTF-8?q?=E6=8A=8A=E6=8F=90=E5=8F=96=E5=88=86?= =?UTF-8?q?=E6=9E=90excel=E6=96=87=E4=BB=B6=E7=9A=84=E5=8A=9F=E8=83=BD?= =?UTF-8?q?=E5=81=9A=E4=BA=86=E6=8F=92=E4=BB=B6=E5=8C=96=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../plugins/aipp-file-extract-excel/pom.xml | 89 +++++++++++ .../aipp/file/extract/ExcelFileExtractor.java | 142 ++++++++++++++++++ .../src/main/resources/application.yml | 4 + .../file/extract/ExcelFileExtractorTest.java | 39 +++++ .../src/test/resources/file/content.xlsx | Bin 0 -> 9434 bytes .../plugins/aipp-file-extract-service/pom.xml | 57 +++++++ .../file/extract/AbstractFileExtractor.java | 27 ++++ .../aipp/file/extract/FileTypeConstant.java | 24 +++ app-builder/plugins/aipp-plugin/pom.xml | 10 ++ .../service/impl/OperatorServiceImpl.java | 76 ++-------- .../aipp/tool/FileExtractorContainer.java | 39 +++++ .../jober/aipp/tool/FileTypeConvertor.java | 29 ++++ .../aipp/service/OperatorServiceImplTest.java | 7 +- app-builder/plugins/pom.xml | 2 + common/dependency/pom.xml | 17 +++ 15 files changed, 492 insertions(+), 70 deletions(-) create mode 100644 app-builder/plugins/aipp-file-extract-excel/pom.xml create mode 100644 app-builder/plugins/aipp-file-extract-excel/src/main/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractor.java create mode 100644 app-builder/plugins/aipp-file-extract-excel/src/main/resources/application.yml create mode 100644 app-builder/plugins/aipp-file-extract-excel/src/test/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractorTest.java create mode 100644 app-builder/plugins/aipp-file-extract-excel/src/test/resources/file/content.xlsx create mode 100644 app-builder/plugins/aipp-file-extract-service/pom.xml create mode 100644 app-builder/plugins/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/AbstractFileExtractor.java create mode 100644 app-builder/plugins/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/FileTypeConstant.java create mode 100644 app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileExtractorContainer.java create mode 100644 app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileTypeConvertor.java diff --git a/app-builder/plugins/aipp-file-extract-excel/pom.xml b/app-builder/plugins/aipp-file-extract-excel/pom.xml new file mode 100644 index 0000000000..5712a12111 --- /dev/null +++ b/app-builder/plugins/aipp-file-extract-excel/pom.xml @@ -0,0 +1,89 @@ + + + 4.0.0 + + modelengine.fit.jade + app-builder-plugin-parent + 1.0.0-SNAPSHOT + + + aipp-file-extract-excel + + + + + org.fitframework + fit-api + + + org.fitframework + fit-util + + + cn.idev.excel + fastexcel + + + modelengine.fit.jade + aipp-file-extract-service + + + org.junit.jupiter + junit-jupiter + + + org.fitframework + fit-test-framework + + + org.assertj + assertj-core + + + + + + + org.fitframework + fit-build-maven-plugin + ${fit.version} + + + build-plugin + + build-plugin + + + + package-plugin + + package-plugin + + + + + + org.apache.maven.plugins + maven-antrun-plugin + ${maven.antrun.version} + + + install + + + + + + + run + + + + + + + + \ No newline at end of file diff --git a/app-builder/plugins/aipp-file-extract-excel/src/main/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractor.java b/app-builder/plugins/aipp-file-extract-excel/src/main/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractor.java new file mode 100644 index 0000000000..ac0e783ce5 --- /dev/null +++ b/app-builder/plugins/aipp-file-extract-excel/src/main/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractor.java @@ -0,0 +1,142 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * This file is a part of the ModelEngine Project. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +package modelengine.fit.jade.aipp.file.extract; + +import cn.idev.excel.ExcelReader; +import cn.idev.excel.FastExcel; +import cn.idev.excel.context.AnalysisContext; +import cn.idev.excel.converters.Converter; +import cn.idev.excel.enums.CellDataTypeEnum; +import cn.idev.excel.metadata.GlobalConfiguration; +import cn.idev.excel.metadata.data.DataFormatData; +import cn.idev.excel.metadata.data.ReadCellData; +import cn.idev.excel.metadata.property.ExcelContentProperty; +import cn.idev.excel.read.listener.ReadListener; +import cn.idev.excel.read.metadata.ReadSheet; +import cn.idev.excel.util.DateUtils; +import modelengine.fitframework.annotation.Component; +import modelengine.fitframework.annotation.Fitable; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.math.BigDecimal; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +@Component +public class ExcelFileExtractor implements AbstractFileExtractor { + + private static String getCellValueAsString(ReadCellData cell) { + switch (cell.getType()) { + case STRING: + return cell.getStringValue(); + case NUMBER: + DataFormatData fmt = cell.getDataFormatData(); + short formatIndex = fmt.getIndex(); + String formatString = fmt.getFormat(); + if (DateUtils.isADateFormat(formatIndex, formatString)) { + double value = cell.getNumberValue().doubleValue(); + Date date = DateUtils.getJavaDate(value, true); + return new SimpleDateFormat("yyyy-MM-dd").format(date); + } else { + BigDecimal num = cell.getNumberValue(); + return num.stripTrailingZeros().toPlainString(); + } + case BOOLEAN: + return Boolean.toString(cell.getBooleanValue()); + default: + return ""; + } + } + + @Override + @Fitable(id = "get-fileType-excel") + public FileTypeConstant.FileType supportedFileType() { + return FileTypeConstant.FileType.EXCEL; + } + + /** + * 从指定路径的 Excel 文件中提取内容,并返回为字符串形式。 + * 实现方式: + * 基于 fast-excel 包,使用流式读取(ReadListener)逐行解析,避免一次性加载整表造成的内存开销。 + * 每行数据会被转换为以制表符(\t)分隔的文本,并在行末追加换行符。 + * 支持多 sheet 解析,会依次读取工作簿中的每一个 sheet。 + * + * @param fileUrl 表示文件路径的 {@link String}. + * @return 表示文件内容的 {@link String}。 + * @throws RuntimeException 当文件读取或解析失败时抛出 + */ + @Override + @Fitable(id = "extract-file-excel") + public String extractFile(String fileUrl) { + File file = Paths.get(fileUrl).toFile(); + StringBuilder excelContent = new StringBuilder(); + ReadListener> listener = new ReadListener<>() { + @Override + public void invoke(Map data, AnalysisContext context) { + String line = data.entrySet() + .stream() + .sorted(Map.Entry.comparingByKey()) + .map(e -> e.getValue() == null ? "" : e.getValue()) + .collect(Collectors.joining("\t")); + excelContent.append(line).append('\n'); + } + + @Override + public void doAfterAllAnalysed(AnalysisContext context) { + } + }; + try (InputStream is = new BufferedInputStream(Files.newInputStream(file.toPath()))) { + ExcelReader reader = FastExcel.read(is, listener) + .registerConverter(new CustomCellStringConverter()) + .headRowNumber(0) + .build(); + + List sheets = reader.excelExecutor().sheetList(); + for (ReadSheet meta : sheets) { + excelContent.append("Sheet ").append(meta.getSheetNo() + 1).append(':').append('\n'); + ReadSheet readSheet = FastExcel.readSheet(meta.getSheetNo()).headRowNumber(0).build(); + reader.read(readSheet); + } + excelContent.append('\n'); + reader.finish(); // 关闭资源 + } catch (IOException e) { + throw new RuntimeException(e); + } + return excelContent.toString(); + } + + /** + * 自定义单元格数据转换器。 + * 将 Excel 单元格数据统一转换为字符串,避免数值/日期等类型在读取时格式不一致的问题。 + * 缺点:由于采用fast excel包,没有 FORMULA类,会将公式单元格自动计算为值 + */ + public static class CustomCellStringConverter implements Converter { + @Override + public Class supportJavaTypeKey() { + return String.class; + } + + @Override + public CellDataTypeEnum supportExcelTypeKey() { + return null; + } + + @Override + public String convertToJavaData(ReadCellData cellData, ExcelContentProperty contentProperty, + GlobalConfiguration globalConfiguration) { + return getCellValueAsString(cellData); + } + } +} diff --git a/app-builder/plugins/aipp-file-extract-excel/src/main/resources/application.yml b/app-builder/plugins/aipp-file-extract-excel/src/main/resources/application.yml new file mode 100644 index 0000000000..bcb7e72b91 --- /dev/null +++ b/app-builder/plugins/aipp-file-extract-excel/src/main/resources/application.yml @@ -0,0 +1,4 @@ +fit: + beans: + packages: + - 'modelengine.fit.jade.aipp.file.extract' \ No newline at end of file diff --git a/app-builder/plugins/aipp-file-extract-excel/src/test/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractorTest.java b/app-builder/plugins/aipp-file-extract-excel/src/test/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractorTest.java new file mode 100644 index 0000000000..8b4d9a08e4 --- /dev/null +++ b/app-builder/plugins/aipp-file-extract-excel/src/test/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractorTest.java @@ -0,0 +1,39 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * This file is a part of the ModelEngine Project. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +package modelengine.fit.jade.aipp.file.extract; + +import static org.assertj.core.api.Assertions.assertThat; + +import modelengine.fitframework.annotation.Fit; +import modelengine.fitframework.test.annotation.FitTestWithJunit; + +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +import java.io.File; + +@FitTestWithJunit(includeClasses = ExcelFileExtractor.class) +@Disabled +class ExcelFileExtractorTest { + @Fit + ExcelFileExtractor excelFileExtractor; + + @Test + @DisplayName("测试获取支持文件类型") + void supportedFileType() { + assertThat(this.excelFileExtractor.supportedFileType()).isEqualTo(FileTypeConstant.FileType.EXCEL); + } + + @Test + @DisplayName("测试 excel 文件提取成功") + void extractFile() { + File file = new File(this.getClass().getClassLoader().getResource("file/content.xlsx").getFile()); + assertThat(this.excelFileExtractor.extractFile(file.getAbsolutePath())).isEqualTo( + "Sheet 1:\nThis is an excel test\n\n"); + } +} \ No newline at end of file diff --git a/app-builder/plugins/aipp-file-extract-excel/src/test/resources/file/content.xlsx b/app-builder/plugins/aipp-file-extract-excel/src/test/resources/file/content.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..ca17d408e0d80eec3ce5c87340bd79d20a923b9a GIT binary patch literal 9434 zcmeHNgE2P+GdXOS-#5M7lvrq@^3~=sD+l4##`{ zf$!V%%-+v_X0Lbl`t9|u+=?>LFxUV%00IC2cm@#W^c%5#1OULm0svS51SlO*J6mUv zt+T!=*dFAh%jjlf{WJ>(iaG-T1-bse+kfy1l&B8ab~2;4Xlx6LwW`0%3@R!@AZ{eq zA^!%S;1;DxUb{g+ed9@Gs-#wd&_SVOJ)%Q-%!gk(N2w(uv}VnCrKLb_0L%|7bv)$1 zl}s1+%PCP0tBMTa$iPQAIDi|kftRhHGh(zPy(gu><)}tt+w3@!-x4W)D=13QLi@D{ z>T+g>f1pzfN`}S---yf2UzveWdP$?XS&5ZRo%}MIhd?ZGkZ z4BZNI0v=HkA&Zhn1vacC$ZUa#=oKv3M^DXs;F!o38wcxIk-FyGJPdXu@EcRt%4%Ko zZ+QJiBSY0P6ecTO8!Xo&fX9hW$GhzPxux$I2DL%E^5=+mZulUNQ#<;cV?5uHZ%@-$ zA`ihW0SHck=oX8BrN%vs6Jn=QsBegeC+eJd-|?poWjD!ZkA)pd1GlW0sg?i)ns&*0Miz$Bx`1c*|6C}6O)f*8^uJB!tX`j%1{we$fd>H4AXh+4=I_Q*qBd`v&5Z7;Z*l+V zGSv?;5{5&UBGR1BMMG_Vul1=~xPzZgxz;>;VbR@nc;?B~Sc_P99ZR2?>8FG)rt3yW z+r+V#!33S~0*(n8m0yf_MFknv_hGo_(3K*kk)DgX%4Rc3qzgXY&uP?>0j~jkKC`J3 zj+3Y5TMSrcp_z2SXg*h->~2@J;a{YFfuzf7x;+T=5InQ&gx$u#JjFn6n6)2dm6}gN z9QUVI$ACU@86e`%m^>k~=TPFbSyR^H1XO&*;dFgW?P!@qZ1+ahMAvuTotWDlRsq?2 zR}>4G%6Dr~i6dXyvVjF2uy>lXd>P@1wx;oHrY)kQFH%IILdVwWqFcJhdw|xfGdR7Y z{C>n7KWEZ{!uQK9h4x7|gOEfJkzU1+UL&eS(m_sbVo{_DhD{tSXGuaPi|j^ti?*Uo zl{px8s+~m6X#Fw&z@;iT?0jS`E8f%fM^9+U!t;eqT>Y58#u`eacPY~#fBQcA!F%#$ zP-+||_I^r*K7$4_67U4JRYb;IWa=s=Va!5}LsTm&ztRA#VLTaYJ3Sqmd7B63N4rLmtl}vaHgLP{4ZP8P%5$Cm65DYa?0jz2yc6er)EV`Suf+#$@Kb0vzlc;nId@XG;bDN zhLgLnas8>yvN#?$ZG~CE8kFlSkzRIAmLhZQ&U5(+64^b}T287tXMXcoXnx|W;|es7 z8wz`5s`2(zfIcTml>tFm{H!C!Z1+rr)RAD>M43s{3LQu*PAm~9{td-r{?m-9Fgtaj zE5d~zI{ehq_o-34-?Az+j3BQY^2;IQi~R?yoXia!K_)8Bjuy6NPCxuGdqjsNGz+rW z!J9k(&n=_1LCV3?+%QNjr;%PhhqM)j47_o*klF6FK}oi9y)M7 zEAMVJ;U2jFd$_v^D*`J6YLG_UjC)$z+MJ5uy6AX}S*<>Z5n{z>G|g%DZmPN?J~t7J zuKi><+HDQW&9``+hgtcwyYXF^UVz*YXsxU5OuowXYXl8lC=kjctl6(3iR`#6_mK&= zmot}I#rXzjudR)uG{5Sl&YShew*jLVn|t)s8)Q)xPSSd{O=zR5Ik@@w$aCyl_bOsg`^JX- z$j0;JYraAvQ`GnL6OB-B){XJZ%fGwNF|-L+aBVMOBQo$s8jdrzCPB-~OTX)j)WiW+ zyip>*#qaX!o~;N+EQ}QRIxF=K%sg-zLiD+$$kOIWAHJGNzUF{_-BZJ#hhJIiZa3VM zgwHm+?UUruNPrY~q=7(|BzM2~-jR=y@Q&+Qnw2LiOrFihq2n{e-+9xl@_RHrh$kI> z`lFcq=}n!@K{g?|BmR`onFR9jIBzE|YIY9=$(T?K5TflRkp9NCcG+u( zbS?@byaN8{-HZr>V3cF68htPZGMR(#uszhW9Dc<600{wxaBd#B78V9|>0E7=7vzU6 zFg+P(b0#7$X0C5XnY=wtzt^D&1PXe)b4#RmioL_OG!VONO?sr`N60HcPZyy1mT_YV zBa49~kEm+xSu2@m1nIMtkU0G@K`)1EBMcjmh^lQQ$2!~N)$il zf*Or04xd4nmu*WQPOa!X34LCWX~=WOY$KzI$SQOBl0iWp(koa1pp2=YunZ8D6$0zPm^>iFor)3-+?un;hxp}6s3PnB0_%u z-p%noKN?Mtui;+zd7V0FU@ybOa`5)_G~e6%(D!^3P5y2tfsbh@`^ z(rZnJ{ouZI@B8(>0}{cT^$5EsT-9~lJj^Sin8qu*J$*|kxLr_>&(@&tj4-5f{Uor3`Z-uJPaTF{PH_huBAX)9(Au6+c{oZ-Tj51l1rS63I9 z)kWdcCgnbyh(uXK6LkgDLi&i0-a8o(XJ=gEG>tTQ(khIMf@dEp%d3)oPZ9dI{r4DV zesVeI0*5?rxpY~{;nhhuM0m*q>*-&@i4-s_?=Bwkgf^-#c1b+PtcA@&T?$D~hc$zS1Q5f|2YT&%_ELgL~L5m1IZ280F?__tIR?(HQ!PAM(9p?!xq7!W0k0^+`JVOlV7Dg!jl7-6h zq`-a!c3&4ThGY_EuN{P`5SldehK)mX=To?rbZ@VxVCa+ z^lPVU)3*g2!fDK+FtC9kw!+;=-i%qF)EJ(-4$AGyB?mo1A}d~cCw263byIZ~>y8%` z(d5JJ@7Gs)p(dRw7gOm})IJ+BPb+hIp zm|^Yp^=pP2S-n;vP_!%9d4y_d<|q+e`h3H0RX4Lq0BXs+7aD(Jk9slsWeva#!{Z~N zEUxX~`?y1{=mQLbJqdHT(#%R*19|bh*R;(9Sf5$Ox*D^C6}F~4^TEx1y%zaZFV%I2 zC*D8}cr;^|^=8UYs5qCVxgMx*y9hcLHI>Lf$LxmS8_JpzDY@V)>7$BZr`Sre(lq#RQd2KIiED_@A$k?qevA*GV^|`?{kWJ{|WvSGA5=f=lUc?*55#Ut@ ziN)}|jFumSV|kz89@EXB&)^-4!ahozWkaXogTAD>mLx=Hm*jx%%-iqA+CgR!xQMTm zCQstKqEcq>RotWhq=bByxU+>METRRPXOj~>M^lrD4nF*yPl&(c_ArZYayicXjZ4>U zgF*A@=HM~zm~b+l5XDzn2_|Ws9Pi3Ljw;ZR;491~b!6@^@+A~OpT#OHnvqNnI>A|d zHOuRv-JWoc-rC_^M=3rMov5VEwY`;Ra4fT8Y(T}F1WWU+O?ZYwcQ7-ge`J-@YL@Q` zuBuX+uV$l1KvtM|eqrPCf{XvKl1i{Y^^>-X5dDsH>c!R9h2}|9_&Q&{*Y@6A`O4oE zJ5xC%6i;$FBWY~faU0002r&e}PV(A!i^?;ojj`oA)Eu2&&kEpjMaZl*fP2g(ao2_o z6FaK=9UY|E^~8B~XVCSX86ms-dJ!KMm-~ivl7M8mc!{m)OqN}zx1J5Fjcp`F$>3!& zAs$z&SmCO<-3g>&OLs+t*QqZuXG>j>TD0qyh)x;}v$^Xx-6aT#tBV&irtGjSH*D+B zC!_~&UQ{Wf(Z;?~wBx)IzSdLkZ68#*>i2lGKl#cA(l7c=aZ~Sp#Jvfrg)tx@8Rd^~ z?Boo#2K~&xs?`Q;=OLvZ-Zw$?W;$)_hT3N|1#1Whkx;NvP%UGaAgmRm$ZEQ}Z>8~G zQVK2bEhkpNCu}@1COX1CjEJu*`k!sKa02@VEEu@vlx-5CXiycPLJR0s2i948?8=<% zLf==2#RbqwsWfN9rJIhVQ}ph0wP-6@VhBk_ug2Ew6doiSiNmr`S;|qQs7eEk42$Di zwXCG2*HU24v!CK$`Z2%L2`Hq#gA+sdtkQCSgK1?n>*rwVih}zhD?yJwcg&o>zm&0OJDFx?tWqrhqnG+>ICS4c8?d}rqgUU;^hbyy84JGM@6p?lAVu+% ztNR#>W+s)J!%?6}WT8Urs~GRM6{dP%vLBw&xmZfiaxfZaUmh?s5+&l0`~J$AnhqPGSf)7sg+HsaO3xaRNSM9r)SGn{ac2UkK{qo5Ld*sgD$Y zIu%2aqK?}+W!>PYUp)1_NpVVK4J0+tuPoK6J9zeF5vr4hdQ)S6rKNP=a2h9y=snO~ zDGxubT;|LR(bsth_rszI;)*p5Y^)+*5d+;zChYQDo0e@P?2}9FGb*g?#w!ij_F3Sq z-WPb^{u-rV?}eqin;FOL#Hp0dVYip&?+%`rU;e68cENr`Mf zpaszgl`z(Ms@3{R9y)k#6+I3ohA-XA_00$9SC7*7kbE*@qC7t4JL>JQ z-bI0DKchUf-tXzEn)OIi@T2Cso=MhMNYERuAI3}os&pClZZXct-%O6ituE!oE}wAs z?^~sdWq$z?!|4eih}dzF8r$&^cI!O}8uegLN$mqKj6V@{9EA14uRR`n7m*)_IGDIF zZKFRnk5|(R4NfG=&xh7Zy%a4v&t<;JShgHVVCYp;-gv9~NRAX82u+P5(z@$*|9*3y z`U6Q&+HQ^ulCyX;b)J~zqp-2$;JZ5)-iEAha2e5(e;f|Kq{N4vk@P|S8Te_bEkw-f zV)M-w8hod$XlPdq{K7*i;HxiB>RxrFR77zafBsy>9=>V)MS7epv;4qNXrA_&#?xB= zi93Xf%FQRIP4Wb;StzUg^}h2#rHKgm$fQ$68h6YhGz4M}b)yECjS|ZdQoXR?YPVcB z1qF-V;}BVYOkCn>0TEVS%hu}jsuTwYO5H1OJj#KDSY8~Yi(qL%N}9w}tt~q1E8&?N zWMKLbuFjyzv`L?jgIeSwZdiuudd+jqbtP ztf7F)M&jhnh$#1SQ#n5UinQe--*J^yI**UU2!gtkI$A8iy(E?#(TzRIp9Yq6Fn z1WNCY$`h~5`6I&NezQVZ>aw46;flq4(}OWp2>U&M7G zP-6`x=qa5tK8nX_f|=?|A3{YMG%f^%W6f9h-Z6+!%`};o(D2?%Z=3zrvlqjj8%$yv}u<&=jB zlrn4f^)89o&6k^t?Ty`BC*}+*HwN*)9yvTt37Or_Df&iKLT8mqmV}P&i(5l~Dj;Du z@6s&x{`jSg-Xx*pgr8{f>K!9i;JdlbkWv&)eZLSWDPBt1aMUHDzzQsQ#RZCvO#k}g z(p3TCy(dJBtrt)1rTd}izQ1%FaexmUmJ~C~!6q9cwwZ*^X;<`x+IyUb*wdzmrrsdP zbTZgoD7~EYd5hR}FwO{F;(Q=ByTT$VC-Ia;#4A|v`N#*)$@+=KGMEg~Xg@^Y!gOsx zy=_EC1{+`EE25J6F?m#HK4bQWSYTay7iY-1$_Ph{= zT1paL&SLWMO+`Kn;>Kp*E8Q^_wUf*cofQ!J?A=y5MnVI!Fo3)_<)EN5M`wo~8J;yir znS{dZSm^%W?uC48(?as~3o!iw#;9!2{!$BA7#?R1BE!N3zOy3YK@o%;1)@GN_;>(X z)H9SrD6k|T3P!?t7s8w0sP}osW}+OTI0Gc}LH|3g4DIdz%PPo{{kf$?b=XcYqeG@b zp72AP&p+X+X2z}|*@T=ac1tIp*`^Hi78&cDS$f_@om4yz7@x8z(SJAlInr;ucgbK~vXjQJa8$@c|AR@pH((^M9lGl+ z?`jDVfOt9O2t`ny>2{^XzBK+7Q=HBv20M)~#}q-`mYvy3rITZjTJLOT*s{en{(R-p-yse=q^G|?(PLBT){Bg{OQ0H%RhL%=Py7x)SrM4Tb>UA9+qXl02Xlm(CL4Rw1=V(^VeUZ`;c_@pUV8oW*?&bGbQwY_?Bky|@gT1JHqJZ*{ii?w1qlGuKl$4|{psBmWndvT2ml~M OK9mqMPJ8y_?0*1>(gX4U literal 0 HcmV?d00001 diff --git a/app-builder/plugins/aipp-file-extract-service/pom.xml b/app-builder/plugins/aipp-file-extract-service/pom.xml new file mode 100644 index 0000000000..5b3ec6a434 --- /dev/null +++ b/app-builder/plugins/aipp-file-extract-service/pom.xml @@ -0,0 +1,57 @@ + + + 4.0.0 + + modelengine.fit.jade + app-builder-plugin-parent + 1.0.0-SNAPSHOT + + + aipp-file-extract-service + + + + + org.fitframework + fit-api + + + org.fitframework + fit-util + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + ${maven.compiler.version} + + ${java.version} + ${java.version} + ${project.build.sourceEncoding} + + -parameters + + + + + org.fitframework + fit-build-maven-plugin + ${fit.version} + + + build-service + + build-service + + + + + + + + \ No newline at end of file diff --git a/app-builder/plugins/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/AbstractFileExtractor.java b/app-builder/plugins/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/AbstractFileExtractor.java new file mode 100644 index 0000000000..1cabc7a406 --- /dev/null +++ b/app-builder/plugins/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/AbstractFileExtractor.java @@ -0,0 +1,27 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * This file is a part of the ModelEngine Project. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +package modelengine.fit.jade.aipp.file.extract; + +import modelengine.fitframework.annotation.Genericable; + +public interface AbstractFileExtractor { + /** + * + * @param fileUrl 文件路径 + * @return 表示提取的文件信息的 {@link String}。 + */ + @Genericable(id = "extract-file") + String extractFile(String fileUrl); + + /** + * + * @return 表示返回的文件枚举类型 + */ + @Genericable(id = "get-fileType") + FileTypeConstant.FileType supportedFileType(); + +} diff --git a/app-builder/plugins/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/FileTypeConstant.java b/app-builder/plugins/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/FileTypeConstant.java new file mode 100644 index 0000000000..a9063270ab --- /dev/null +++ b/app-builder/plugins/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/FileTypeConstant.java @@ -0,0 +1,24 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * This file is a part of the ModelEngine Project. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +package modelengine.fit.jade.aipp.file.extract; + +public class FileTypeConstant { + /** + * 文件类型枚举 + */ + public enum FileType { + PDF, + WORD, + EXCEL, + IMAGE, + AUDIO, + TXT, + HTML, + MARKDOWN, + CSV + } +} diff --git a/app-builder/plugins/aipp-plugin/pom.xml b/app-builder/plugins/aipp-plugin/pom.xml index 53718801f6..82231a929b 100644 --- a/app-builder/plugins/aipp-plugin/pom.xml +++ b/app-builder/plugins/aipp-plugin/pom.xml @@ -143,6 +143,16 @@ org.redisson redisson + + + modelengine.fit.jade + aipp-file-extract-service + + + modelengine.fit.jade + aipp-file-extract-excel + test + diff --git a/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/service/impl/OperatorServiceImpl.java b/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/service/impl/OperatorServiceImpl.java index 93f14c6659..2095244114 100644 --- a/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/service/impl/OperatorServiceImpl.java +++ b/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/service/impl/OperatorServiceImpl.java @@ -10,6 +10,7 @@ import modelengine.fit.jober.aipp.common.exception.AippException; import modelengine.fit.jober.aipp.service.LLMService; import modelengine.fit.jober.aipp.service.OperatorService; +import modelengine.fit.jober.aipp.tool.FileExtractorContainer; import modelengine.fit.jober.aipp.util.AippFileUtils; import modelengine.fit.jober.aipp.util.AippStringUtils; import modelengine.fitframework.annotation.Component; @@ -20,12 +21,6 @@ import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.text.PDFTextStripper; import org.apache.poi.poifs.filesystem.FileMagic; -import org.apache.poi.ss.usermodel.Cell; -import org.apache.poi.ss.usermodel.DateUtil; -import org.apache.poi.ss.usermodel.Row; -import org.apache.poi.ss.usermodel.Sheet; -import org.apache.poi.ss.usermodel.Workbook; -import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.apache.poi.xwpf.extractor.XWPFWordExtractor; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.apache.poi.xwpf.usermodel.XWPFParagraph; @@ -41,13 +36,10 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Paths; -import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.Date; import java.util.EnumMap; -import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Objects; @@ -97,8 +89,8 @@ public class OperatorServiceImpl implements OperatorService { private final LLMService llmService; private final BrokerClient client; + private final FileExtractorContainer fileExtractorContainer; private final Function pdfExtractor = this::extractPdfFile; - private final Function excelExtractor = this::extractExcelFile; private final Function wordExtractor = this::extractWordFile; private final Function textExtractor = this::extractTextFile; private final EnumMap> outlineOperatorMap = @@ -113,7 +105,6 @@ public class OperatorServiceImpl implements OperatorService { { put(FileType.PDF, pdfExtractor); put(FileType.WORD, wordExtractor); - put(FileType.EXCEL, excelExtractor); put(FileType.TXT, textExtractor); put(FileType.HTML, textExtractor); put(FileType.MARKDOWN, textExtractor); @@ -121,30 +112,11 @@ public class OperatorServiceImpl implements OperatorService { } }; - public OperatorServiceImpl(LLMService llmService, BrokerClient client) { + public OperatorServiceImpl(LLMService llmService, BrokerClient client, + FileExtractorContainer fileExtractorContainer) { this.llmService = llmService; this.client = client; - } - - private static String getCellValueAsString(Cell cell) { - switch (cell.getCellType()) { - case STRING: - return cell.getStringCellValue(); - case NUMERIC: - if (DateUtil.isCellDateFormatted(cell)) { - Date dateCellValue = cell.getDateCellValue(); - SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd"); - return dateFormat.format(dateCellValue); - } else { - return Double.toString(cell.getNumericCellValue()); - } - case BOOLEAN: - return Boolean.toString(cell.getBooleanCellValue()); - case FORMULA: - return cell.getCellFormula(); - default: - return ""; - } + this.fileExtractorContainer = fileExtractorContainer; } private static String extractDocHandle(InputStream fis, String fileName) throws IOException { @@ -248,42 +220,12 @@ public File createDoc(String instanceId, String fileName, String txt) throws IOE public String fileExtractor(String fileUrl, Optional optionalFileType) { if (optionalFileType.isPresent()) { Function function = this.fileOperatorMap.get(optionalFileType.get()); - return Optional.ofNullable(function).map(f -> f.apply(fileUrl)).orElse(StringUtils.EMPTY); - } - return this.extractTextFile(fileUrl); - } + return fileExtractorContainer.extract(fileUrl, optionalFileType.get()) + .or(() -> Optional.ofNullable(function).map(f -> f.apply(fileUrl))) + .orElse(StringUtils.EMPTY); - private String iterExcel(Workbook workbook) { - StringBuilder excelContent = new StringBuilder(); - for (int sheetIndex = 0; sheetIndex < workbook.getNumberOfSheets(); sheetIndex++) { - Sheet sheet = workbook.getSheetAt(sheetIndex); - StringBuilder sheetContent = new StringBuilder(); - for (Row row : sheet) { - StringBuilder rowContent = new StringBuilder(); - Iterator cellIterator = row.cellIterator(); - while (cellIterator.hasNext()) { - Cell cell = cellIterator.next(); - String cellValue = getCellValueAsString(cell); - rowContent.append(cellValue).append("\t"); - } - sheetContent.append(rowContent.toString().trim()).append("\n"); - } - excelContent.append("Sheet ").append(sheetIndex + 1).append(":\n").append(sheetContent).append("\n"); - } - return excelContent.toString(); - } - - private String extractExcelFile(String fileUrl) { - File file = Paths.get(fileUrl).toFile(); - String excelContent = ""; - try (InputStream fis = new BufferedInputStream(Files.newInputStream(file.toPath()))) { - Workbook workbook = new XSSFWorkbook(fis); - excelContent = this.iterExcel(workbook); - } catch (IOException e) { - log.error("read excel fail.", e); - throw new AippException(AippErrCode.EXTRACT_FILE_FAILED); } - return excelContent; + return this.extractTextFile(fileUrl); } private String iterPdf(PDDocument doc) throws IOException { diff --git a/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileExtractorContainer.java b/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileExtractorContainer.java new file mode 100644 index 0000000000..ea420775cb --- /dev/null +++ b/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileExtractorContainer.java @@ -0,0 +1,39 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * This file is a part of the ModelEngine Project. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +package modelengine.fit.jober.aipp.tool; + +import modelengine.fit.jade.aipp.file.extract.AbstractFileExtractor; +import modelengine.fit.jade.aipp.file.extract.FileTypeConstant; +import modelengine.fit.jober.aipp.service.OperatorService; +import modelengine.fitframework.annotation.Component; + +import java.util.EnumMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +/** + * 管理文件提取器的容器 + */ +@Component +public class FileExtractorContainer { + private final Map map; + + public FileExtractorContainer(List extractors) { + map = new EnumMap<>(FileTypeConstant.FileType.class); + for (AbstractFileExtractor fileExtractor : extractors) { + map.put(fileExtractor.supportedFileType(), fileExtractor); + } + } + + public Optional extract(String fileUrl, OperatorService.FileType fileType) { + FileTypeConstant.FileType fileType_transform = FileTypeConvertor.convert(fileType); + return Optional.ofNullable(map.get(fileType_transform)) + .map(extractor -> extractor.extractFile(fileUrl)); + } + +} diff --git a/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileTypeConvertor.java b/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileTypeConvertor.java new file mode 100644 index 0000000000..dd0926442c --- /dev/null +++ b/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileTypeConvertor.java @@ -0,0 +1,29 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. + * This file is a part of the ModelEngine Project. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +package modelengine.fit.jober.aipp.tool; + +import modelengine.fit.jade.aipp.file.extract.FileTypeConstant; +import modelengine.fit.jober.aipp.service.OperatorService; + +/** + * 文件类型转换器 + */ +public class FileTypeConvertor { + public static FileTypeConstant.FileType convert(OperatorService.FileType fileType) { + return switch (fileType) { + case PDF -> FileTypeConstant.FileType.PDF; + case WORD -> FileTypeConstant.FileType.WORD; + case EXCEL -> FileTypeConstant.FileType.EXCEL; + case IMAGE -> FileTypeConstant.FileType.IMAGE; + case AUDIO -> FileTypeConstant.FileType.AUDIO; + case TXT -> FileTypeConstant.FileType.TXT; + case HTML -> FileTypeConstant.FileType.HTML; + case MARKDOWN -> FileTypeConstant.FileType.MARKDOWN; + case CSV -> FileTypeConstant.FileType.CSV; + }; + } +} diff --git a/app-builder/plugins/aipp-plugin/src/test/java/modelengine/fit/jober/aipp/service/OperatorServiceImplTest.java b/app-builder/plugins/aipp-plugin/src/test/java/modelengine/fit/jober/aipp/service/OperatorServiceImplTest.java index a26b08114f..73b28ed9f3 100644 --- a/app-builder/plugins/aipp-plugin/src/test/java/modelengine/fit/jober/aipp/service/OperatorServiceImplTest.java +++ b/app-builder/plugins/aipp-plugin/src/test/java/modelengine/fit/jober/aipp/service/OperatorServiceImplTest.java @@ -8,7 +8,9 @@ import static org.assertj.core.api.Assertions.assertThat; +import modelengine.fit.jade.aipp.file.extract.ExcelFileExtractor; import modelengine.fit.jober.aipp.service.impl.OperatorServiceImpl; +import modelengine.fit.jober.aipp.tool.FileExtractorContainer; import modelengine.fitframework.annotation.Fit; import modelengine.fitframework.test.annotation.FitTestWithJunit; import modelengine.fitframework.test.annotation.Mock; @@ -26,7 +28,7 @@ * @author 兰宇晨 * @since 2025-01-15 */ -@FitTestWithJunit(includeClasses = OperatorServiceImpl.class) +@FitTestWithJunit(includeClasses = {OperatorServiceImpl.class, FileExtractorContainer.class, ExcelFileExtractor.class}) @Disabled public class OperatorServiceImplTest { @Fit @@ -64,8 +66,7 @@ void shouldOkWhenExtractExcelFile() { } private String getContent(String filePath, OperatorService.FileType fileType) { - String fileUrl = "/path/mockurl.mock"; File file = new File(this.getClass().getClassLoader().getResource(filePath).getFile()); - return this.operatorService.fileExtractor(fileUrl, Optional.of(fileType)); + return this.operatorService.fileExtractor(file.getAbsolutePath(), Optional.of(fileType)); } } diff --git a/app-builder/plugins/pom.xml b/app-builder/plugins/pom.xml index dd7180c795..55459c4cc2 100644 --- a/app-builder/plugins/pom.xml +++ b/app-builder/plugins/pom.xml @@ -18,6 +18,8 @@ aipp-custom-model-center aipp-document-extract-node aipp-extractor + aipp-file-extract-excel + aipp-file-extract-service aipp-http-call aipp-loop-tool aipp-memory diff --git a/common/dependency/pom.xml b/common/dependency/pom.xml index b700cd44dd..8766671fa5 100644 --- a/common/dependency/pom.xml +++ b/common/dependency/pom.xml @@ -36,6 +36,7 @@ 1.0.0-SNAPSHOT 1.0.0-SNAPSHOT 1.0.0-SNAPSHOT + 1.0.0-SNAPSHOT 1.14.12 @@ -58,6 +59,7 @@ 2.24 1.12.468 5.7.1 + 1.1.0 3.22.0 @@ -392,6 +394,16 @@ app-engine-metrics-base-service ${app-engine.version} + + modelengine.fit.jade + aipp-file-extract-service + ${aipp-file-extract.version} + + + modelengine.fit.jade + aipp-file-extract-excel + ${aipp-file-extract.version} + @@ -559,6 +571,11 @@ opencsv ${opencsv.version} + + cn.idev.excel + fastexcel + ${fastexcel.version} + From 2514394970772889bc4116f8eea0eec58bf1aaeb Mon Sep 17 00:00:00 2001 From: jsbjfkbsjk <2504892220@qq.com> Date: Sat, 6 Sep 2025 23:52:08 +0800 Subject: [PATCH 2/8] =?UTF-8?q?=E6=9B=B4=E6=AD=A3=E6=B3=A8=E9=87=8A?= =?UTF-8?q?=EF=BC=8C=E6=A0=BC=E5=BC=8F=E5=92=8C=E7=9B=AE=E5=BD=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../plugins/aipp-file-extract-excel/pom.xml | 29 ++++++++++------- .../aipp/file/extract/ExcelFileExtractor.java | 31 +++++++++++++------ .../file/extract/ExcelFileExtractorTest.java | 9 +++++- .../aipp/file/extract/FileTypeConstant.java | 24 -------------- app-builder/plugins/aipp-plugin/pom.xml | 3 +- .../aipp/tool/FileExtractorContainer.java | 24 ++++++++++---- .../jober/aipp/tool/FileTypeConvertor.java | 29 ----------------- app-builder/plugins/pom.xml | 1 - .../aipp-file-extract-service/pom.xml | 12 ++++--- .../file/extract/AbstractFileExtractor.java | 14 +++++++-- app-builder/services/pom.xml | 1 + common/dependency/pom.xml | 16 +++------- 12 files changed, 92 insertions(+), 101 deletions(-) delete mode 100644 app-builder/plugins/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/FileTypeConstant.java delete mode 100644 app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileTypeConvertor.java rename app-builder/{plugins => services}/aipp-file-extract-service/pom.xml (84%) rename app-builder/{plugins => services}/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/AbstractFileExtractor.java (70%) diff --git a/app-builder/plugins/aipp-file-extract-excel/pom.xml b/app-builder/plugins/aipp-file-extract-excel/pom.xml index 5712a12111..90ae701f05 100644 --- a/app-builder/plugins/aipp-file-extract-excel/pom.xml +++ b/app-builder/plugins/aipp-file-extract-excel/pom.xml @@ -1,6 +1,5 @@ - 4.0.0 @@ -9,6 +8,7 @@ 1.0.0-SNAPSHOT + modelengine.fit.jade.plugin aipp-file-extract-excel @@ -21,14 +21,6 @@ org.fitframework fit-util - - cn.idev.excel - fastexcel - - - modelengine.fit.jade - aipp-file-extract-service - org.junit.jupiter junit-jupiter @@ -41,6 +33,22 @@ org.assertj assertj-core + + + + cn.idev.excel + fastexcel + + + + + modelengine.fit.jade + aipp-file-extract-service + + + modelengine.fit.jade + aipp-service + @@ -85,5 +93,4 @@ - \ No newline at end of file diff --git a/app-builder/plugins/aipp-file-extract-excel/src/main/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractor.java b/app-builder/plugins/aipp-file-extract-excel/src/main/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractor.java index ac0e783ce5..6330e58fe1 100644 --- a/app-builder/plugins/aipp-file-extract-excel/src/main/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractor.java +++ b/app-builder/plugins/aipp-file-extract-excel/src/main/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractor.java @@ -18,6 +18,7 @@ import cn.idev.excel.read.listener.ReadListener; import cn.idev.excel.read.metadata.ReadSheet; import cn.idev.excel.util.DateUtils; +import modelengine.fit.jober.aipp.service.OperatorService; import modelengine.fitframework.annotation.Component; import modelengine.fitframework.annotation.Fitable; @@ -34,9 +35,20 @@ import java.util.Map; import java.util.stream.Collectors; +/** + * Excel文件的提取器。 + * + * @author jsbjfkbsjk + * @since 2025-9-6 + */ @Component public class ExcelFileExtractor implements AbstractFileExtractor { - + /** + * 把单元格转换成格式化字符串 + * + * @param cell 表示单元格数据 {@link ReadCellData} + * @return 转换后的内容 {@link String} + */ private static String getCellValueAsString(ReadCellData cell) { switch (cell.getType()) { case STRING: @@ -60,22 +72,22 @@ private static String getCellValueAsString(ReadCellData cell) { } } + /** + * 该文件提取器支持excel类型 + * + * @return 枚举常量类型 {@link OperatorService.FileType} + */ @Override @Fitable(id = "get-fileType-excel") - public FileTypeConstant.FileType supportedFileType() { - return FileTypeConstant.FileType.EXCEL; + public OperatorService.FileType supportedFileType() { + return OperatorService.FileType.EXCEL; } /** * 从指定路径的 Excel 文件中提取内容,并返回为字符串形式。 - * 实现方式: - * 基于 fast-excel 包,使用流式读取(ReadListener)逐行解析,避免一次性加载整表造成的内存开销。 - * 每行数据会被转换为以制表符(\t)分隔的文本,并在行末追加换行符。 - * 支持多 sheet 解析,会依次读取工作簿中的每一个 sheet。 * * @param fileUrl 表示文件路径的 {@link String}. * @return 表示文件内容的 {@link String}。 - * @throws RuntimeException 当文件读取或解析失败时抛出 */ @Override @Fitable(id = "extract-file-excel") @@ -119,8 +131,7 @@ public void doAfterAllAnalysed(AnalysisContext context) { /** * 自定义单元格数据转换器。 - * 将 Excel 单元格数据统一转换为字符串,避免数值/日期等类型在读取时格式不一致的问题。 - * 缺点:由于采用fast excel包,没有 FORMULA类,会将公式单元格自动计算为值 + * 该转换器实现了能够处理单元格数据并将其转换为字符串形式。 */ public static class CustomCellStringConverter implements Converter { @Override diff --git a/app-builder/plugins/aipp-file-extract-excel/src/test/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractorTest.java b/app-builder/plugins/aipp-file-extract-excel/src/test/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractorTest.java index 8b4d9a08e4..2a6dd6f16c 100644 --- a/app-builder/plugins/aipp-file-extract-excel/src/test/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractorTest.java +++ b/app-builder/plugins/aipp-file-extract-excel/src/test/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractorTest.java @@ -8,6 +8,7 @@ import static org.assertj.core.api.Assertions.assertThat; +import modelengine.fit.jober.aipp.service.OperatorService; import modelengine.fitframework.annotation.Fit; import modelengine.fitframework.test.annotation.FitTestWithJunit; @@ -17,6 +18,12 @@ import java.io.File; +/** + * 表示{@link ExcelFileExtractor}的测试集 + * + * @author jsbjfkbsjk + * @since 2025-9-6 + */ @FitTestWithJunit(includeClasses = ExcelFileExtractor.class) @Disabled class ExcelFileExtractorTest { @@ -26,7 +33,7 @@ class ExcelFileExtractorTest { @Test @DisplayName("测试获取支持文件类型") void supportedFileType() { - assertThat(this.excelFileExtractor.supportedFileType()).isEqualTo(FileTypeConstant.FileType.EXCEL); + assertThat(this.excelFileExtractor.supportedFileType()).isEqualTo(OperatorService.FileType.EXCEL); } @Test diff --git a/app-builder/plugins/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/FileTypeConstant.java b/app-builder/plugins/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/FileTypeConstant.java deleted file mode 100644 index a9063270ab..0000000000 --- a/app-builder/plugins/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/FileTypeConstant.java +++ /dev/null @@ -1,24 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. - * This file is a part of the ModelEngine Project. - * Licensed under the MIT License. See License.txt in the project root for license information. - *--------------------------------------------------------------------------------------------*/ - -package modelengine.fit.jade.aipp.file.extract; - -public class FileTypeConstant { - /** - * 文件类型枚举 - */ - public enum FileType { - PDF, - WORD, - EXCEL, - IMAGE, - AUDIO, - TXT, - HTML, - MARKDOWN, - CSV - } -} diff --git a/app-builder/plugins/aipp-plugin/pom.xml b/app-builder/plugins/aipp-plugin/pom.xml index 82231a929b..c913a9e5cb 100644 --- a/app-builder/plugins/aipp-plugin/pom.xml +++ b/app-builder/plugins/aipp-plugin/pom.xml @@ -149,8 +149,9 @@ aipp-file-extract-service - modelengine.fit.jade + modelengine.fit.jade.plugin aipp-file-extract-excel + 1.0.0-SNAPSHOT test diff --git a/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileExtractorContainer.java b/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileExtractorContainer.java index ea420775cb..3d61c1a2fc 100644 --- a/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileExtractorContainer.java +++ b/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileExtractorContainer.java @@ -7,7 +7,6 @@ package modelengine.fit.jober.aipp.tool; import modelengine.fit.jade.aipp.file.extract.AbstractFileExtractor; -import modelengine.fit.jade.aipp.file.extract.FileTypeConstant; import modelengine.fit.jober.aipp.service.OperatorService; import modelengine.fitframework.annotation.Component; @@ -18,22 +17,35 @@ /** * 管理文件提取器的容器 + * + * @author jsbjfkbsjk + * @since 2025-9-6 */ @Component public class FileExtractorContainer { - private final Map map; + private final Map map; + /** + * 初始化用框架注入提取器 + * + * @param extractors 文件提取器 {@link AbstractFileExtractor} + */ public FileExtractorContainer(List extractors) { - map = new EnumMap<>(FileTypeConstant.FileType.class); + map = new EnumMap<>(OperatorService.FileType.class); for (AbstractFileExtractor fileExtractor : extractors) { map.put(fileExtractor.supportedFileType(), fileExtractor); } } + /** + * 根据文件类型找到支持文件类型的提取器 + * + * @param fileUrl 文件路径 {@link String} + * @param fileType 文件枚举类型 {@link OperatorService.FileType} + * @return 提取的字符串 {@link Optional} + */ public Optional extract(String fileUrl, OperatorService.FileType fileType) { - FileTypeConstant.FileType fileType_transform = FileTypeConvertor.convert(fileType); - return Optional.ofNullable(map.get(fileType_transform)) + return Optional.ofNullable(map.get(fileType)) .map(extractor -> extractor.extractFile(fileUrl)); } - } diff --git a/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileTypeConvertor.java b/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileTypeConvertor.java deleted file mode 100644 index dd0926442c..0000000000 --- a/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileTypeConvertor.java +++ /dev/null @@ -1,29 +0,0 @@ -/*--------------------------------------------------------------------------------------------- - * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved. - * This file is a part of the ModelEngine Project. - * Licensed under the MIT License. See License.txt in the project root for license information. - *--------------------------------------------------------------------------------------------*/ - -package modelengine.fit.jober.aipp.tool; - -import modelengine.fit.jade.aipp.file.extract.FileTypeConstant; -import modelengine.fit.jober.aipp.service.OperatorService; - -/** - * 文件类型转换器 - */ -public class FileTypeConvertor { - public static FileTypeConstant.FileType convert(OperatorService.FileType fileType) { - return switch (fileType) { - case PDF -> FileTypeConstant.FileType.PDF; - case WORD -> FileTypeConstant.FileType.WORD; - case EXCEL -> FileTypeConstant.FileType.EXCEL; - case IMAGE -> FileTypeConstant.FileType.IMAGE; - case AUDIO -> FileTypeConstant.FileType.AUDIO; - case TXT -> FileTypeConstant.FileType.TXT; - case HTML -> FileTypeConstant.FileType.HTML; - case MARKDOWN -> FileTypeConstant.FileType.MARKDOWN; - case CSV -> FileTypeConstant.FileType.CSV; - }; - } -} diff --git a/app-builder/plugins/pom.xml b/app-builder/plugins/pom.xml index 55459c4cc2..4a0ab34250 100644 --- a/app-builder/plugins/pom.xml +++ b/app-builder/plugins/pom.xml @@ -19,7 +19,6 @@ aipp-document-extract-node aipp-extractor aipp-file-extract-excel - aipp-file-extract-service aipp-http-call aipp-loop-tool aipp-memory diff --git a/app-builder/plugins/aipp-file-extract-service/pom.xml b/app-builder/services/aipp-file-extract-service/pom.xml similarity index 84% rename from app-builder/plugins/aipp-file-extract-service/pom.xml rename to app-builder/services/aipp-file-extract-service/pom.xml index 5b3ec6a434..6ae89b12e5 100644 --- a/app-builder/plugins/aipp-file-extract-service/pom.xml +++ b/app-builder/services/aipp-file-extract-service/pom.xml @@ -1,11 +1,10 @@ - 4.0.0 modelengine.fit.jade - app-builder-plugin-parent + app-builder-service-parent 1.0.0-SNAPSHOT @@ -21,6 +20,12 @@ org.fitframework fit-util + + + + modelengine.fit.jade + aipp-service + @@ -53,5 +58,4 @@ - \ No newline at end of file diff --git a/app-builder/plugins/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/AbstractFileExtractor.java b/app-builder/services/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/AbstractFileExtractor.java similarity index 70% rename from app-builder/plugins/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/AbstractFileExtractor.java rename to app-builder/services/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/AbstractFileExtractor.java index 1cabc7a406..d09d9693d6 100644 --- a/app-builder/plugins/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/AbstractFileExtractor.java +++ b/app-builder/services/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/AbstractFileExtractor.java @@ -6,10 +6,18 @@ package modelengine.fit.jade.aipp.file.extract; +import modelengine.fit.jober.aipp.service.OperatorService; import modelengine.fitframework.annotation.Genericable; +/** + * Excel文件提取器的抽象接口。 + * + * @author jsbjfkbsjk + * @since 2025-9-6 + */ public interface AbstractFileExtractor { /** + * 提取文件函数 * * @param fileUrl 文件路径 * @return 表示提取的文件信息的 {@link String}。 @@ -18,10 +26,10 @@ public interface AbstractFileExtractor { String extractFile(String fileUrl); /** + * 返回提取器支持文件类型 * - * @return 表示返回的文件枚举类型 + * @return 表示返回的文件枚举类型 {@link OperatorService.FileType} */ @Genericable(id = "get-fileType") - FileTypeConstant.FileType supportedFileType(); - + OperatorService.FileType supportedFileType(); } diff --git a/app-builder/services/pom.xml b/app-builder/services/pom.xml index 952383ab08..46a9cd5419 100644 --- a/app-builder/services/pom.xml +++ b/app-builder/services/pom.xml @@ -16,6 +16,7 @@ aipp-classify-question aipp-code aipp-extractor + aipp-file-extract-service aipp-genericable aipp-http-call aipp-memory diff --git a/common/dependency/pom.xml b/common/dependency/pom.xml index 8766671fa5..6cf3d291ea 100644 --- a/common/dependency/pom.xml +++ b/common/dependency/pom.xml @@ -36,7 +36,6 @@ 1.0.0-SNAPSHOT 1.0.0-SNAPSHOT 1.0.0-SNAPSHOT - 1.0.0-SNAPSHOT 1.14.12 @@ -372,6 +371,11 @@ aipp-extractor-service 1.0.0-SNAPSHOT + + modelengine.fit.jade + aipp-file-extract-service + 1.0.0-SNAPSHOT + @@ -394,16 +398,6 @@ app-engine-metrics-base-service ${app-engine.version} - - modelengine.fit.jade - aipp-file-extract-service - ${aipp-file-extract.version} - - - modelengine.fit.jade - aipp-file-extract-excel - ${aipp-file-extract.version} - From 3384e17da2998f82f2390794676ef5cad5e384b1 Mon Sep 17 00:00:00 2001 From: jsbjfkbsjk <2504892220@qq.com> Date: Sun, 7 Sep 2025 00:02:27 +0800 Subject: [PATCH 3/8] =?UTF-8?q?=E6=9B=B4=E6=AD=A3=E6=A0=BC=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app-builder/plugins/aipp-file-extract-excel/pom.xml | 1 + app-builder/services/aipp-file-extract-service/pom.xml | 1 + 2 files changed, 2 insertions(+) diff --git a/app-builder/plugins/aipp-file-extract-excel/pom.xml b/app-builder/plugins/aipp-file-extract-excel/pom.xml index 90ae701f05..f68db379d5 100644 --- a/app-builder/plugins/aipp-file-extract-excel/pom.xml +++ b/app-builder/plugins/aipp-file-extract-excel/pom.xml @@ -2,6 +2,7 @@ 4.0.0 + modelengine.fit.jade app-builder-plugin-parent diff --git a/app-builder/services/aipp-file-extract-service/pom.xml b/app-builder/services/aipp-file-extract-service/pom.xml index 6ae89b12e5..87757a5187 100644 --- a/app-builder/services/aipp-file-extract-service/pom.xml +++ b/app-builder/services/aipp-file-extract-service/pom.xml @@ -2,6 +2,7 @@ 4.0.0 + modelengine.fit.jade app-builder-service-parent From 4a2dc21f4297cd04926e2d79501ae0b5a6343c4b Mon Sep 17 00:00:00 2001 From: jsbjfkbsjk <2504892220@qq.com> Date: Mon, 8 Sep 2025 10:08:58 +0800 Subject: [PATCH 4/8] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=BA=8F=E5=88=97?= =?UTF-8?q?=E5=8C=96=E8=83=BD=E5=8A=9B=E5=92=8C=E4=BF=AE=E6=94=B9=E6=A0=BC?= =?UTF-8?q?=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../plugins/aipp-file-extract-excel/pom.xml | 29 +++--- .../aipp/file/extract/ExcelFileExtractor.java | 92 ++++++++++++------ .../file/extract/ExcelFileExtractorTest.java | 23 ++++- .../src/test/resources/file/content.csv | 1 + .../src/test/resources/file/content.xlsx | Bin 9434 -> 0 bytes app-builder/plugins/aipp-plugin/pom.xml | 1 - .../aipp/tool/FileExtractorContainer.java | 45 +++++---- ...FileExtractor.java => FileExtraction.java} | 23 ++--- common/dependency/pom.xml | 6 ++ 9 files changed, 144 insertions(+), 76 deletions(-) create mode 100644 app-builder/plugins/aipp-file-extract-excel/src/test/resources/file/content.csv delete mode 100644 app-builder/plugins/aipp-file-extract-excel/src/test/resources/file/content.xlsx rename app-builder/services/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/{AbstractFileExtractor.java => FileExtraction.java} (61%) diff --git a/app-builder/plugins/aipp-file-extract-excel/pom.xml b/app-builder/plugins/aipp-file-extract-excel/pom.xml index f68db379d5..174e17b214 100644 --- a/app-builder/plugins/aipp-file-extract-excel/pom.xml +++ b/app-builder/plugins/aipp-file-extract-excel/pom.xml @@ -22,18 +22,6 @@ org.fitframework fit-util - - org.junit.jupiter - junit-jupiter - - - org.fitframework - fit-test-framework - - - org.assertj - assertj-core - @@ -50,6 +38,23 @@ modelengine.fit.jade aipp-service + + + + org.junit.jupiter + junit-jupiter + test + + + org.fitframework + fit-test-framework + test + + + org.assertj + assertj-core + test + diff --git a/app-builder/plugins/aipp-file-extract-excel/src/main/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractor.java b/app-builder/plugins/aipp-file-extract-excel/src/main/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractor.java index 6330e58fe1..ec399d3c70 100644 --- a/app-builder/plugins/aipp-file-extract-excel/src/main/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractor.java +++ b/app-builder/plugins/aipp-file-extract-excel/src/main/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractor.java @@ -28,8 +28,11 @@ import java.io.InputStream; import java.math.BigDecimal; import java.nio.file.Files; +import java.nio.file.InvalidPathException; +import java.nio.file.Path; import java.nio.file.Paths; import java.text.SimpleDateFormat; +import java.util.Arrays; import java.util.Date; import java.util.List; import java.util.Map; @@ -38,16 +41,16 @@ /** * Excel文件的提取器。 * - * @author jsbjfkbsjk - * @since 2025-9-6 + * @author 黄政炫 + * @since 2025-09-06 */ @Component -public class ExcelFileExtractor implements AbstractFileExtractor { +public class ExcelFileExtractor implements FileExtraction { /** - * 把单元格转换成格式化字符串 + * 把单元格转换成格式化字符串。 * - * @param cell 表示单元格数据 {@link ReadCellData} - * @return 转换后的内容 {@link String} + * @param cell 表示单元格数据 {@link ReadCellData}。 + * @return 转换后的内容 {@link String}。 */ private static String getCellValueAsString(ReadCellData cell) { switch (cell.getType()) { @@ -73,44 +76,47 @@ private static String getCellValueAsString(ReadCellData cell) { } /** - * 该文件提取器支持excel类型 + * 该文件提取器支持EXCEL和CSV类型。 * - * @return 枚举常量类型 {@link OperatorService.FileType} + * @return 枚举常量类型集合 {@link List}。 */ @Override @Fitable(id = "get-fileType-excel") - public OperatorService.FileType supportedFileType() { - return OperatorService.FileType.EXCEL; + public List supportedFileType() { + return Arrays.asList(OperatorService.FileType.EXCEL.toString(), OperatorService.FileType.CSV.toString()); + } + + /** + * @param fileUrl 表示文件路径 {@link String}。 + * @return 表示路径是否有效 {@link Boolean}。 + */ + private boolean isValidPath(String fileUrl) { + try { + Path path = Paths.get(fileUrl); + return Files.exists(path) && Files.isRegularFile(path); + } catch (InvalidPathException e) { + return false; + } } /** * 从指定路径的 Excel 文件中提取内容,并返回为字符串形式。 * - * @param fileUrl 表示文件路径的 {@link String}. + * @param fileUrl 表示文件路径的 {@link String}。 * @return 表示文件内容的 {@link String}。 */ @Override @Fitable(id = "extract-file-excel") public String extractFile(String fileUrl) { + if (!isValidPath(fileUrl)) { + throw new IllegalArgumentException("无效的文件路径: " + fileUrl); + } File file = Paths.get(fileUrl).toFile(); StringBuilder excelContent = new StringBuilder(); - ReadListener> listener = new ReadListener<>() { - @Override - public void invoke(Map data, AnalysisContext context) { - String line = data.entrySet() - .stream() - .sorted(Map.Entry.comparingByKey()) - .map(e -> e.getValue() == null ? "" : e.getValue()) - .collect(Collectors.joining("\t")); - excelContent.append(line).append('\n'); - } - - @Override - public void doAfterAllAnalysed(AnalysisContext context) { - } - }; + ExcelReadListener listener = new ExcelReadListener(excelContent); + ExcelReader reader = null; try (InputStream is = new BufferedInputStream(Files.newInputStream(file.toPath()))) { - ExcelReader reader = FastExcel.read(is, listener) + reader = FastExcel.read(is, listener) .registerConverter(new CustomCellStringConverter()) .headRowNumber(0) .build(); @@ -122,13 +128,41 @@ public void doAfterAllAnalysed(AnalysisContext context) { reader.read(readSheet); } excelContent.append('\n'); - reader.finish(); // 关闭资源 } catch (IOException e) { - throw new RuntimeException(e); + throw new IllegalStateException("Excel文件读取失败", e); + } finally { + if (reader != null) { + reader.finish(); // 关闭资源 + } } return excelContent.toString(); } + /** + * 读取监听器的内部类实现。 + */ + private class ExcelReadListener implements ReadListener> { + StringBuilder excelContent; + + ExcelReadListener(StringBuilder excelContent) { + this.excelContent = excelContent; + } + + @Override + public void invoke(Map data, AnalysisContext context) { + String line = data.entrySet() + .stream() + .sorted(Map.Entry.comparingByKey()) + .map(e -> e.getValue() == null ? "" : e.getValue()) + .collect(Collectors.joining("\t")); + this.excelContent.append(line).append('\n'); + } + + @Override + public void doAfterAllAnalysed(AnalysisContext context) { + } + } + /** * 自定义单元格数据转换器。 * 该转换器实现了能够处理单元格数据并将其转换为字符串形式。 diff --git a/app-builder/plugins/aipp-file-extract-excel/src/test/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractorTest.java b/app-builder/plugins/aipp-file-extract-excel/src/test/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractorTest.java index 2a6dd6f16c..8d40a2d4f0 100644 --- a/app-builder/plugins/aipp-file-extract-excel/src/test/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractorTest.java +++ b/app-builder/plugins/aipp-file-extract-excel/src/test/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractorTest.java @@ -7,6 +7,7 @@ package modelengine.fit.jade.aipp.file.extract; import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertThrows; import modelengine.fit.jober.aipp.service.OperatorService; import modelengine.fitframework.annotation.Fit; @@ -17,12 +18,14 @@ import org.junit.jupiter.api.Test; import java.io.File; +import java.util.Arrays; +import java.util.List; /** - * 表示{@link ExcelFileExtractor}的测试集 + * 表示{@link ExcelFileExtractor}的测试集。 * - * @author jsbjfkbsjk - * @since 2025-9-6 + * @author 黄政炫 + * @since 2025-09-06 */ @FitTestWithJunit(includeClasses = ExcelFileExtractor.class) @Disabled @@ -33,13 +36,23 @@ class ExcelFileExtractorTest { @Test @DisplayName("测试获取支持文件类型") void supportedFileType() { - assertThat(this.excelFileExtractor.supportedFileType()).isEqualTo(OperatorService.FileType.EXCEL); + List supportedTypes = + Arrays.asList(OperatorService.FileType.EXCEL.toString(), OperatorService.FileType.CSV.toString()); + assertThat(this.excelFileExtractor.supportedFileType()).isEqualTo(supportedTypes); + } + + @Test + @DisplayName("测试能否捕获错误路径") + void validPath() { + assertThrows(IllegalArgumentException.class, () -> { + this.excelFileExtractor.extractFile("invalidPath.csv"); + }); } @Test @DisplayName("测试 excel 文件提取成功") void extractFile() { - File file = new File(this.getClass().getClassLoader().getResource("file/content.xlsx").getFile()); + File file = new File(this.getClass().getClassLoader().getResource("file/content.csv").getFile()); assertThat(this.excelFileExtractor.extractFile(file.getAbsolutePath())).isEqualTo( "Sheet 1:\nThis is an excel test\n\n"); } diff --git a/app-builder/plugins/aipp-file-extract-excel/src/test/resources/file/content.csv b/app-builder/plugins/aipp-file-extract-excel/src/test/resources/file/content.csv new file mode 100644 index 0000000000..f40b535e35 --- /dev/null +++ b/app-builder/plugins/aipp-file-extract-excel/src/test/resources/file/content.csv @@ -0,0 +1 @@ +This is an excel test diff --git a/app-builder/plugins/aipp-file-extract-excel/src/test/resources/file/content.xlsx b/app-builder/plugins/aipp-file-extract-excel/src/test/resources/file/content.xlsx deleted file mode 100644 index ca17d408e0d80eec3ce5c87340bd79d20a923b9a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 9434 zcmeHNgE2P+GdXOS-#5M7lvrq@^3~=sD+l4##`{ zf$!V%%-+v_X0Lbl`t9|u+=?>LFxUV%00IC2cm@#W^c%5#1OULm0svS51SlO*J6mUv zt+T!=*dFAh%jjlf{WJ>(iaG-T1-bse+kfy1l&B8ab~2;4Xlx6LwW`0%3@R!@AZ{eq zA^!%S;1;DxUb{g+ed9@Gs-#wd&_SVOJ)%Q-%!gk(N2w(uv}VnCrKLb_0L%|7bv)$1 zl}s1+%PCP0tBMTa$iPQAIDi|kftRhHGh(zPy(gu><)}tt+w3@!-x4W)D=13QLi@D{ z>T+g>f1pzfN`}S---yf2UzveWdP$?XS&5ZRo%}MIhd?ZGkZ z4BZNI0v=HkA&Zhn1vacC$ZUa#=oKv3M^DXs;F!o38wcxIk-FyGJPdXu@EcRt%4%Ko zZ+QJiBSY0P6ecTO8!Xo&fX9hW$GhzPxux$I2DL%E^5=+mZulUNQ#<;cV?5uHZ%@-$ zA`ihW0SHck=oX8BrN%vs6Jn=QsBegeC+eJd-|?poWjD!ZkA)pd1GlW0sg?i)ns&*0Miz$Bx`1c*|6C}6O)f*8^uJB!tX`j%1{we$fd>H4AXh+4=I_Q*qBd`v&5Z7;Z*l+V zGSv?;5{5&UBGR1BMMG_Vul1=~xPzZgxz;>;VbR@nc;?B~Sc_P99ZR2?>8FG)rt3yW z+r+V#!33S~0*(n8m0yf_MFknv_hGo_(3K*kk)DgX%4Rc3qzgXY&uP?>0j~jkKC`J3 zj+3Y5TMSrcp_z2SXg*h->~2@J;a{YFfuzf7x;+T=5InQ&gx$u#JjFn6n6)2dm6}gN z9QUVI$ACU@86e`%m^>k~=TPFbSyR^H1XO&*;dFgW?P!@qZ1+ahMAvuTotWDlRsq?2 zR}>4G%6Dr~i6dXyvVjF2uy>lXd>P@1wx;oHrY)kQFH%IILdVwWqFcJhdw|xfGdR7Y z{C>n7KWEZ{!uQK9h4x7|gOEfJkzU1+UL&eS(m_sbVo{_DhD{tSXGuaPi|j^ti?*Uo zl{px8s+~m6X#Fw&z@;iT?0jS`E8f%fM^9+U!t;eqT>Y58#u`eacPY~#fBQcA!F%#$ zP-+||_I^r*K7$4_67U4JRYb;IWa=s=Va!5}LsTm&ztRA#VLTaYJ3Sqmd7B63N4rLmtl}vaHgLP{4ZP8P%5$Cm65DYa?0jz2yc6er)EV`Suf+#$@Kb0vzlc;nId@XG;bDN zhLgLnas8>yvN#?$ZG~CE8kFlSkzRIAmLhZQ&U5(+64^b}T287tXMXcoXnx|W;|es7 z8wz`5s`2(zfIcTml>tFm{H!C!Z1+rr)RAD>M43s{3LQu*PAm~9{td-r{?m-9Fgtaj zE5d~zI{ehq_o-34-?Az+j3BQY^2;IQi~R?yoXia!K_)8Bjuy6NPCxuGdqjsNGz+rW z!J9k(&n=_1LCV3?+%QNjr;%PhhqM)j47_o*klF6FK}oi9y)M7 zEAMVJ;U2jFd$_v^D*`J6YLG_UjC)$z+MJ5uy6AX}S*<>Z5n{z>G|g%DZmPN?J~t7J zuKi><+HDQW&9``+hgtcwyYXF^UVz*YXsxU5OuowXYXl8lC=kjctl6(3iR`#6_mK&= zmot}I#rXzjudR)uG{5Sl&YShew*jLVn|t)s8)Q)xPSSd{O=zR5Ik@@w$aCyl_bOsg`^JX- z$j0;JYraAvQ`GnL6OB-B){XJZ%fGwNF|-L+aBVMOBQo$s8jdrzCPB-~OTX)j)WiW+ zyip>*#qaX!o~;N+EQ}QRIxF=K%sg-zLiD+$$kOIWAHJGNzUF{_-BZJ#hhJIiZa3VM zgwHm+?UUruNPrY~q=7(|BzM2~-jR=y@Q&+Qnw2LiOrFihq2n{e-+9xl@_RHrh$kI> z`lFcq=}n!@K{g?|BmR`onFR9jIBzE|YIY9=$(T?K5TflRkp9NCcG+u( zbS?@byaN8{-HZr>V3cF68htPZGMR(#uszhW9Dc<600{wxaBd#B78V9|>0E7=7vzU6 zFg+P(b0#7$X0C5XnY=wtzt^D&1PXe)b4#RmioL_OG!VONO?sr`N60HcPZyy1mT_YV zBa49~kEm+xSu2@m1nIMtkU0G@K`)1EBMcjmh^lQQ$2!~N)$il zf*Or04xd4nmu*WQPOa!X34LCWX~=WOY$KzI$SQOBl0iWp(koa1pp2=YunZ8D6$0zPm^>iFor)3-+?un;hxp}6s3PnB0_%u z-p%noKN?Mtui;+zd7V0FU@ybOa`5)_G~e6%(D!^3P5y2tfsbh@`^ z(rZnJ{ouZI@B8(>0}{cT^$5EsT-9~lJj^Sin8qu*J$*|kxLr_>&(@&tj4-5f{Uor3`Z-uJPaTF{PH_huBAX)9(Au6+c{oZ-Tj51l1rS63I9 z)kWdcCgnbyh(uXK6LkgDLi&i0-a8o(XJ=gEG>tTQ(khIMf@dEp%d3)oPZ9dI{r4DV zesVeI0*5?rxpY~{;nhhuM0m*q>*-&@i4-s_?=Bwkgf^-#c1b+PtcA@&T?$D~hc$zS1Q5f|2YT&%_ELgL~L5m1IZ280F?__tIR?(HQ!PAM(9p?!xq7!W0k0^+`JVOlV7Dg!jl7-6h zq`-a!c3&4ThGY_EuN{P`5SldehK)mX=To?rbZ@VxVCa+ z^lPVU)3*g2!fDK+FtC9kw!+;=-i%qF)EJ(-4$AGyB?mo1A}d~cCw263byIZ~>y8%` z(d5JJ@7Gs)p(dRw7gOm})IJ+BPb+hIp zm|^Yp^=pP2S-n;vP_!%9d4y_d<|q+e`h3H0RX4Lq0BXs+7aD(Jk9slsWeva#!{Z~N zEUxX~`?y1{=mQLbJqdHT(#%R*19|bh*R;(9Sf5$Ox*D^C6}F~4^TEx1y%zaZFV%I2 zC*D8}cr;^|^=8UYs5qCVxgMx*y9hcLHI>Lf$LxmS8_JpzDY@V)>7$BZr`Sre(lq#RQd2KIiED_@A$k?qevA*GV^|`?{kWJ{|WvSGA5=f=lUc?*55#Ut@ ziN)}|jFumSV|kz89@EXB&)^-4!ahozWkaXogTAD>mLx=Hm*jx%%-iqA+CgR!xQMTm zCQstKqEcq>RotWhq=bByxU+>METRRPXOj~>M^lrD4nF*yPl&(c_ArZYayicXjZ4>U zgF*A@=HM~zm~b+l5XDzn2_|Ws9Pi3Ljw;ZR;491~b!6@^@+A~OpT#OHnvqNnI>A|d zHOuRv-JWoc-rC_^M=3rMov5VEwY`;Ra4fT8Y(T}F1WWU+O?ZYwcQ7-ge`J-@YL@Q` zuBuX+uV$l1KvtM|eqrPCf{XvKl1i{Y^^>-X5dDsH>c!R9h2}|9_&Q&{*Y@6A`O4oE zJ5xC%6i;$FBWY~faU0002r&e}PV(A!i^?;ojj`oA)Eu2&&kEpjMaZl*fP2g(ao2_o z6FaK=9UY|E^~8B~XVCSX86ms-dJ!KMm-~ivl7M8mc!{m)OqN}zx1J5Fjcp`F$>3!& zAs$z&SmCO<-3g>&OLs+t*QqZuXG>j>TD0qyh)x;}v$^Xx-6aT#tBV&irtGjSH*D+B zC!_~&UQ{Wf(Z;?~wBx)IzSdLkZ68#*>i2lGKl#cA(l7c=aZ~Sp#Jvfrg)tx@8Rd^~ z?Boo#2K~&xs?`Q;=OLvZ-Zw$?W;$)_hT3N|1#1Whkx;NvP%UGaAgmRm$ZEQ}Z>8~G zQVK2bEhkpNCu}@1COX1CjEJu*`k!sKa02@VEEu@vlx-5CXiycPLJR0s2i948?8=<% zLf==2#RbqwsWfN9rJIhVQ}ph0wP-6@VhBk_ug2Ew6doiSiNmr`S;|qQs7eEk42$Di zwXCG2*HU24v!CK$`Z2%L2`Hq#gA+sdtkQCSgK1?n>*rwVih}zhD?yJwcg&o>zm&0OJDFx?tWqrhqnG+>ICS4c8?d}rqgUU;^hbyy84JGM@6p?lAVu+% ztNR#>W+s)J!%?6}WT8Urs~GRM6{dP%vLBw&xmZfiaxfZaUmh?s5+&l0`~J$AnhqPGSf)7sg+HsaO3xaRNSM9r)SGn{ac2UkK{qo5Ld*sgD$Y zIu%2aqK?}+W!>PYUp)1_NpVVK4J0+tuPoK6J9zeF5vr4hdQ)S6rKNP=a2h9y=snO~ zDGxubT;|LR(bsth_rszI;)*p5Y^)+*5d+;zChYQDo0e@P?2}9FGb*g?#w!ij_F3Sq z-WPb^{u-rV?}eqin;FOL#Hp0dVYip&?+%`rU;e68cENr`Mf zpaszgl`z(Ms@3{R9y)k#6+I3ohA-XA_00$9SC7*7kbE*@qC7t4JL>JQ z-bI0DKchUf-tXzEn)OIi@T2Cso=MhMNYERuAI3}os&pClZZXct-%O6ituE!oE}wAs z?^~sdWq$z?!|4eih}dzF8r$&^cI!O}8uegLN$mqKj6V@{9EA14uRR`n7m*)_IGDIF zZKFRnk5|(R4NfG=&xh7Zy%a4v&t<;JShgHVVCYp;-gv9~NRAX82u+P5(z@$*|9*3y z`U6Q&+HQ^ulCyX;b)J~zqp-2$;JZ5)-iEAha2e5(e;f|Kq{N4vk@P|S8Te_bEkw-f zV)M-w8hod$XlPdq{K7*i;HxiB>RxrFR77zafBsy>9=>V)MS7epv;4qNXrA_&#?xB= zi93Xf%FQRIP4Wb;StzUg^}h2#rHKgm$fQ$68h6YhGz4M}b)yECjS|ZdQoXR?YPVcB z1qF-V;}BVYOkCn>0TEVS%hu}jsuTwYO5H1OJj#KDSY8~Yi(qL%N}9w}tt~q1E8&?N zWMKLbuFjyzv`L?jgIeSwZdiuudd+jqbtP ztf7F)M&jhnh$#1SQ#n5UinQe--*J^yI**UU2!gtkI$A8iy(E?#(TzRIp9Yq6Fn z1WNCY$`h~5`6I&NezQVZ>aw46;flq4(}OWp2>U&M7G zP-6`x=qa5tK8nX_f|=?|A3{YMG%f^%W6f9h-Z6+!%`};o(D2?%Z=3zrvlqjj8%$yv}u<&=jB zlrn4f^)89o&6k^t?Ty`BC*}+*HwN*)9yvTt37Or_Df&iKLT8mqmV}P&i(5l~Dj;Du z@6s&x{`jSg-Xx*pgr8{f>K!9i;JdlbkWv&)eZLSWDPBt1aMUHDzzQsQ#RZCvO#k}g z(p3TCy(dJBtrt)1rTd}izQ1%FaexmUmJ~C~!6q9cwwZ*^X;<`x+IyUb*wdzmrrsdP zbTZgoD7~EYd5hR}FwO{F;(Q=ByTT$VC-Ia;#4A|v`N#*)$@+=KGMEg~Xg@^Y!gOsx zy=_EC1{+`EE25J6F?m#HK4bQWSYTay7iY-1$_Ph{= zT1paL&SLWMO+`Kn;>Kp*E8Q^_wUf*cofQ!J?A=y5MnVI!Fo3)_<)EN5M`wo~8J;yir znS{dZSm^%W?uC48(?as~3o!iw#;9!2{!$BA7#?R1BE!N3zOy3YK@o%;1)@GN_;>(X z)H9SrD6k|T3P!?t7s8w0sP}osW}+OTI0Gc}LH|3g4DIdz%PPo{{kf$?b=XcYqeG@b zp72AP&p+X+X2z}|*@T=ac1tIp*`^Hi78&cDS$f_@om4yz7@x8z(SJAlInr;ucgbK~vXjQJa8$@c|AR@pH((^M9lGl+ z?`jDVfOt9O2t`ny>2{^XzBK+7Q=HBv20M)~#}q-`mYvy3rITZjTJLOT*s{en{(R-p-yse=q^G|?(PLBT){Bg{OQ0H%RhL%=Py7x)SrM4Tb>UA9+qXl02Xlm(CL4Rw1=V(^VeUZ`;c_@pUV8oW*?&bGbQwY_?Bky|@gT1JHqJZ*{ii?w1qlGuKl$4|{psBmWndvT2ml~M OK9mqMPJ8y_?0*1>(gX4U diff --git a/app-builder/plugins/aipp-plugin/pom.xml b/app-builder/plugins/aipp-plugin/pom.xml index c913a9e5cb..8241a16855 100644 --- a/app-builder/plugins/aipp-plugin/pom.xml +++ b/app-builder/plugins/aipp-plugin/pom.xml @@ -151,7 +151,6 @@ modelengine.fit.jade.plugin aipp-file-extract-excel - 1.0.0-SNAPSHOT test diff --git a/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileExtractorContainer.java b/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileExtractorContainer.java index 3d61c1a2fc..cd975438b3 100644 --- a/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileExtractorContainer.java +++ b/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileExtractorContainer.java @@ -6,46 +6,55 @@ package modelengine.fit.jober.aipp.tool; -import modelengine.fit.jade.aipp.file.extract.AbstractFileExtractor; +import modelengine.fit.jade.aipp.file.extract.FileExtraction; import modelengine.fit.jober.aipp.service.OperatorService; import modelengine.fitframework.annotation.Component; -import java.util.EnumMap; +import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; /** - * 管理文件提取器的容器 + * 管理文件提取器的容器。 * - * @author jsbjfkbsjk - * @since 2025-9-6 + * @author 黄政炫 + * @since 2025-09-06 */ @Component public class FileExtractorContainer { - private final Map map; + /** + * 一种文件类型对应一个提取器集合。 + */ + private final Map> fileExtractorMap; /** - * 初始化用框架注入提取器 + * 初始化用框架注入提取器。 * - * @param extractors 文件提取器 {@link AbstractFileExtractor} + * @param extractors 文件提取器 {@link FileExtraction}。 */ - public FileExtractorContainer(List extractors) { - map = new EnumMap<>(OperatorService.FileType.class); - for (AbstractFileExtractor fileExtractor : extractors) { - map.put(fileExtractor.supportedFileType(), fileExtractor); + public FileExtractorContainer(List extractors) { + this.fileExtractorMap = new HashMap<>(); + for (FileExtraction fileExtractor : extractors) { + for (String supportedFileType : fileExtractor.supportedFileType()) { + this.fileExtractorMap.computeIfAbsent(supportedFileType, k -> new ArrayList<>()).add(fileExtractor); + } } } /** - * 根据文件类型找到支持文件类型的提取器 + * 根据文件类型找到支持文件类型的提取器。 * - * @param fileUrl 文件路径 {@link String} - * @param fileType 文件枚举类型 {@link OperatorService.FileType} - * @return 提取的字符串 {@link Optional} + * @param fileUrl 文件路径 {@link String}。 + * @param fileType 文件枚举类型 {@link OperatorService.FileType}。 + * @return 提取的字符串 {@link Optional}。 */ public Optional extract(String fileUrl, OperatorService.FileType fileType) { - return Optional.ofNullable(map.get(fileType)) - .map(extractor -> extractor.extractFile(fileUrl)); + List extractors = this.fileExtractorMap.get(fileType.toString()); + if (extractors == null || extractors.isEmpty()) { + return Optional.empty(); + } + return Optional.ofNullable(extractors.get(0)).map(extractor -> extractor.extractFile(fileUrl)); } } diff --git a/app-builder/services/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/AbstractFileExtractor.java b/app-builder/services/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/FileExtraction.java similarity index 61% rename from app-builder/services/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/AbstractFileExtractor.java rename to app-builder/services/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/FileExtraction.java index d09d9693d6..42f09198b3 100644 --- a/app-builder/services/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/AbstractFileExtractor.java +++ b/app-builder/services/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/FileExtraction.java @@ -6,30 +6,31 @@ package modelengine.fit.jade.aipp.file.extract; -import modelengine.fit.jober.aipp.service.OperatorService; import modelengine.fitframework.annotation.Genericable; +import java.util.List; + /** * Excel文件提取器的抽象接口。 * - * @author jsbjfkbsjk - * @since 2025-9-6 + * @author 黄政炫 + * @since 2025-09-06 */ -public interface AbstractFileExtractor { +public interface FileExtraction { /** - * 提取文件函数 + * 提取文件函数。 * - * @param fileUrl 文件路径 + * @param fileUrl 表示文件路径 {@link String}。 * @return 表示提取的文件信息的 {@link String}。 */ - @Genericable(id = "extract-file") + @Genericable(id = "modelengine.fit.jade.file.extractFile") String extractFile(String fileUrl); /** - * 返回提取器支持文件类型 + * 返回提取器支持文件类型。 * - * @return 表示返回的文件枚举类型 {@link OperatorService.FileType} + * @return 表示返回的文件类型 {@link List}。 */ - @Genericable(id = "get-fileType") - OperatorService.FileType supportedFileType(); + @Genericable(id = "modelengine.fit.jade.file.getFileType") + List supportedFileType(); } diff --git a/common/dependency/pom.xml b/common/dependency/pom.xml index 6cf3d291ea..72ea911cf5 100644 --- a/common/dependency/pom.xml +++ b/common/dependency/pom.xml @@ -69,6 +69,7 @@ 2.16.1 + 1.0.0-SNAPSHOT @@ -597,6 +598,11 @@ poi-scratchpad ${poi.version} + + modelengine.fit.jade.plugin + aipp-file-extract-excel + ${file-extract.version} + From 8ec5b8a580af0d50ad932f0dc85553b2262d726a Mon Sep 17 00:00:00 2001 From: jsbjfkbsjk <2504892220@qq.com> Date: Wed, 10 Sep 2025 12:52:16 +0800 Subject: [PATCH 5/8] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=A0=BC=E5=BC=8F?= =?UTF-8?q?=E5=92=8C=E5=81=A5=E5=A3=AE=E6=80=A7=E5=A4=84=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../aipp/file/extract/ExcelFileExtractor.java | 27 ++++++++++--------- .../file/extract/ExcelFileExtractorTest.java | 22 ++++++++++++--- .../src/test/resources/file/content.csv | 11 ++++++++ .../aipp/tool/FileExtractorContainer.java | 20 +++++++++----- ...FileExtraction.java => FileExtractor.java} | 4 +-- 5 files changed, 59 insertions(+), 25 deletions(-) rename app-builder/services/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/{FileExtraction.java => FileExtractor.java} (93%) diff --git a/app-builder/plugins/aipp-file-extract-excel/src/main/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractor.java b/app-builder/plugins/aipp-file-extract-excel/src/main/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractor.java index ec399d3c70..c0191672ec 100644 --- a/app-builder/plugins/aipp-file-extract-excel/src/main/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractor.java +++ b/app-builder/plugins/aipp-file-extract-excel/src/main/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractor.java @@ -18,6 +18,8 @@ import cn.idev.excel.read.listener.ReadListener; import cn.idev.excel.read.metadata.ReadSheet; import cn.idev.excel.util.DateUtils; +import cn.idev.excel.util.StringUtils; +import lombok.NonNull; import modelengine.fit.jober.aipp.service.OperatorService; import modelengine.fitframework.annotation.Component; import modelengine.fitframework.annotation.Fitable; @@ -45,22 +47,20 @@ * @since 2025-09-06 */ @Component -public class ExcelFileExtractor implements FileExtraction { +public class ExcelFileExtractor implements FileExtractor { /** * 把单元格转换成格式化字符串。 * * @param cell 表示单元格数据 {@link ReadCellData}。 * @return 转换后的内容 {@link String}。 */ - private static String getCellValueAsString(ReadCellData cell) { + private static String getCellValueAsString(@NonNull ReadCellData cell) { switch (cell.getType()) { case STRING: return cell.getStringValue(); case NUMBER: DataFormatData fmt = cell.getDataFormatData(); - short formatIndex = fmt.getIndex(); - String formatString = fmt.getFormat(); - if (DateUtils.isADateFormat(formatIndex, formatString)) { + if (DateUtils.isADateFormat(fmt.getIndex(), fmt.getFormat())) { double value = cell.getNumberValue().doubleValue(); Date date = DateUtils.getJavaDate(value, true); return new SimpleDateFormat("yyyy-MM-dd").format(date); @@ -82,13 +82,15 @@ private static String getCellValueAsString(ReadCellData cell) { */ @Override @Fitable(id = "get-fileType-excel") - public List supportedFileType() { + public List supportedFileTypes() { return Arrays.asList(OperatorService.FileType.EXCEL.toString(), OperatorService.FileType.CSV.toString()); } /** + * 判断文件路径是否有效 + * * @param fileUrl 表示文件路径 {@link String}。 - * @return 表示路径是否有效 {@link Boolean}。 + * @return 表示路径是否有效 {@code boolean}。 */ private boolean isValidPath(String fileUrl) { try { @@ -109,7 +111,7 @@ private boolean isValidPath(String fileUrl) { @Fitable(id = "extract-file-excel") public String extractFile(String fileUrl) { if (!isValidPath(fileUrl)) { - throw new IllegalArgumentException("无效的文件路径: " + fileUrl); + throw new IllegalArgumentException("Invalid FilePath" + fileUrl); } File file = Paths.get(fileUrl).toFile(); StringBuilder excelContent = new StringBuilder(); @@ -129,7 +131,7 @@ public String extractFile(String fileUrl) { } excelContent.append('\n'); } catch (IOException e) { - throw new IllegalStateException("Excel文件读取失败", e); + throw new IllegalStateException("Fail To Extract Excel File", e); } finally { if (reader != null) { reader.finish(); // 关闭资源 @@ -142,7 +144,7 @@ public String extractFile(String fileUrl) { * 读取监听器的内部类实现。 */ private class ExcelReadListener implements ReadListener> { - StringBuilder excelContent; + private final StringBuilder excelContent; ExcelReadListener(StringBuilder excelContent) { this.excelContent = excelContent; @@ -159,8 +161,7 @@ public void invoke(Map data, AnalysisContext context) { } @Override - public void doAfterAllAnalysed(AnalysisContext context) { - } + public void doAfterAllAnalysed(AnalysisContext context) {} } /** @@ -181,7 +182,7 @@ public CellDataTypeEnum supportExcelTypeKey() { @Override public String convertToJavaData(ReadCellData cellData, ExcelContentProperty contentProperty, GlobalConfiguration globalConfiguration) { - return getCellValueAsString(cellData); + return (cellData != null) ? getCellValueAsString(cellData) : StringUtils.EMPTY; } } } diff --git a/app-builder/plugins/aipp-file-extract-excel/src/test/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractorTest.java b/app-builder/plugins/aipp-file-extract-excel/src/test/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractorTest.java index 8d40a2d4f0..6b5befc33a 100644 --- a/app-builder/plugins/aipp-file-extract-excel/src/test/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractorTest.java +++ b/app-builder/plugins/aipp-file-extract-excel/src/test/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractorTest.java @@ -38,7 +38,7 @@ class ExcelFileExtractorTest { void supportedFileType() { List supportedTypes = Arrays.asList(OperatorService.FileType.EXCEL.toString(), OperatorService.FileType.CSV.toString()); - assertThat(this.excelFileExtractor.supportedFileType()).isEqualTo(supportedTypes); + assertThat(this.excelFileExtractor.supportedFileTypes()).isEqualTo(supportedTypes); } @Test @@ -53,7 +53,23 @@ void validPath() { @DisplayName("测试 excel 文件提取成功") void extractFile() { File file = new File(this.getClass().getClassLoader().getResource("file/content.csv").getFile()); - assertThat(this.excelFileExtractor.extractFile(file.getAbsolutePath())).isEqualTo( - "Sheet 1:\nThis is an excel test\n\n"); + assertThat(this.excelFileExtractor.extractFile(file.getAbsolutePath())).isEqualTo(expected); } + + String expected = """ + Sheet 1: + This is an excel test + ID\tName\tAge\tJoinDate\tActive\tSalary\tDepartment\tNotes + 1\tJohn Doe\t25\t2023-01-15\tTRUE\t8000.50\tIT\tRegular employee + 2\tJane Smith\t30\t2022-05-20\tTRUE\t12000.00\tMarketing\tTeam leader + 3\tBob Johnson\t28\t2023-03-10\tFALSE\t7500.00\tSales\tLeft company + 4\tAlice Brown\t35\t2020-12-01\tTRUE\t15000.75\tIT\tSenior engineer + 5\tTom Wilson\t22\t2023-08-25\tTRUE\t6000.00\tHR\tIntern + 6\t\t40\t2019-06-15\tTRUE\t18000.00\tFinance\tDepartment manager + 7\tLucy Davis\t27\t2023-02-28\tFALSE\t7000.00\tOperations\tContract ended + 8\tMike Miller\t32\t2021-09-10\tTRUE\t13500.50\tIT\tProject lead + 9\tSarah Lee\t29\t2022-11-05\tTRUE\t9500.00\tMarketing\tMarketing specialist + 10\tDavid Zhang\t26\t2023-07-12\tTRUE\t8500.25\tSales\tSales representative + + """; } \ No newline at end of file diff --git a/app-builder/plugins/aipp-file-extract-excel/src/test/resources/file/content.csv b/app-builder/plugins/aipp-file-extract-excel/src/test/resources/file/content.csv index f40b535e35..4e03d459f0 100644 --- a/app-builder/plugins/aipp-file-extract-excel/src/test/resources/file/content.csv +++ b/app-builder/plugins/aipp-file-extract-excel/src/test/resources/file/content.csv @@ -1 +1,12 @@ This is an excel test +ID,Name,Age,JoinDate,Active,Salary,Department,Notes +1,John Doe,25,2023-01-15,TRUE,8000.50,IT,"Regular employee" +2,Jane Smith,30,2022-05-20,TRUE,12000.00,Marketing,"Team leader" +3,Bob Johnson,28,2023-03-10,FALSE,7500.00,Sales,"Left company" +4,Alice Brown,35,2020-12-01,TRUE,15000.75,IT,"Senior engineer" +5,Tom Wilson,22,2023-08-25,TRUE,6000.00,HR,"Intern" +6,,40,2019-06-15,TRUE,18000.00,Finance,"Department manager" +7,Lucy Davis,27,2023-02-28,FALSE,7000.00,Operations,"Contract ended" +8,Mike Miller,32,2021-09-10,TRUE,13500.50,IT,"Project lead" +9,Sarah Lee,29,2022-11-05,TRUE,9500.00,Marketing,"Marketing specialist" +10,David Zhang,26,2023-07-12,TRUE,8500.25,Sales,"Sales representative" \ No newline at end of file diff --git a/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileExtractorContainer.java b/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileExtractorContainer.java index cd975438b3..715dea1b06 100644 --- a/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileExtractorContainer.java +++ b/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileExtractorContainer.java @@ -6,9 +6,10 @@ package modelengine.fit.jober.aipp.tool; -import modelengine.fit.jade.aipp.file.extract.FileExtraction; +import modelengine.fit.jade.aipp.file.extract.FileExtractor; import modelengine.fit.jober.aipp.service.OperatorService; import modelengine.fitframework.annotation.Component; +import modelengine.fitframework.log.Logger; import java.util.ArrayList; import java.util.HashMap; @@ -24,20 +25,22 @@ */ @Component public class FileExtractorContainer { + private static final Logger log = Logger.get(FileExtractorContainer.class); + /** * 一种文件类型对应一个提取器集合。 */ - private final Map> fileExtractorMap; + private final Map> fileExtractorMap; /** * 初始化用框架注入提取器。 * - * @param extractors 文件提取器 {@link FileExtraction}。 + * @param extractors 文件提取器 {@link FileExtractor}。 */ - public FileExtractorContainer(List extractors) { + public FileExtractorContainer(List extractors) { this.fileExtractorMap = new HashMap<>(); - for (FileExtraction fileExtractor : extractors) { - for (String supportedFileType : fileExtractor.supportedFileType()) { + for (FileExtractor fileExtractor : extractors) { + for (String supportedFileType : fileExtractor.supportedFileTypes()) { this.fileExtractorMap.computeIfAbsent(supportedFileType, k -> new ArrayList<>()).add(fileExtractor); } } @@ -51,10 +54,13 @@ public FileExtractorContainer(List extractors) { * @return 提取的字符串 {@link Optional}。 */ public Optional extract(String fileUrl, OperatorService.FileType fileType) { - List extractors = this.fileExtractorMap.get(fileType.toString()); + List extractors = this.fileExtractorMap.get(fileType.toString()); if (extractors == null || extractors.isEmpty()) { return Optional.empty(); } + if (extractors.size() > 1) { + log.warn("Multiple extractors found , using first: {}", extractors.get(0).getClass().getSimpleName()); + } return Optional.ofNullable(extractors.get(0)).map(extractor -> extractor.extractFile(fileUrl)); } } diff --git a/app-builder/services/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/FileExtraction.java b/app-builder/services/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/FileExtractor.java similarity index 93% rename from app-builder/services/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/FileExtraction.java rename to app-builder/services/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/FileExtractor.java index 42f09198b3..5c189b2f0c 100644 --- a/app-builder/services/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/FileExtraction.java +++ b/app-builder/services/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/FileExtractor.java @@ -16,7 +16,7 @@ * @author 黄政炫 * @since 2025-09-06 */ -public interface FileExtraction { +public interface FileExtractor{ /** * 提取文件函数。 * @@ -32,5 +32,5 @@ public interface FileExtraction { * @return 表示返回的文件类型 {@link List}。 */ @Genericable(id = "modelengine.fit.jade.file.getFileType") - List supportedFileType(); + List supportedFileTypes(); } From 66a941203a093301452b9fb87169f564b7799f15 Mon Sep 17 00:00:00 2001 From: Muggle <125979968+jsbjfkbsjk@users.noreply.github.com> Date: Wed, 10 Sep 2025 14:51:06 +0800 Subject: [PATCH 6/8] Update FileExtractor.java MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 格式化 --- .../modelengine/fit/jade/aipp/file/extract/FileExtractor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app-builder/services/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/FileExtractor.java b/app-builder/services/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/FileExtractor.java index 5c189b2f0c..ed0728b58b 100644 --- a/app-builder/services/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/FileExtractor.java +++ b/app-builder/services/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/FileExtractor.java @@ -16,7 +16,7 @@ * @author 黄政炫 * @since 2025-09-06 */ -public interface FileExtractor{ +public interface FileExtractor { /** * 提取文件函数。 * From 5aea926113e3e40a19fb67a70d0b6a3a72e0fb96 Mon Sep 17 00:00:00 2001 From: jsbjfkbsjk <2504892220@qq.com> Date: Thu, 11 Sep 2025 22:55:48 +0800 Subject: [PATCH 7/8] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=A0=BC=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../aipp/file/extract/ExcelFileExtractor.java | 7 ++-- .../file/extract/ExcelFileExtractorTest.java | 35 +++++++++---------- .../aipp/tool/FileExtractorContainer.java | 12 +++++-- .../jade/aipp/file/extract/FileExtractor.java | 6 ++-- 4 files changed, 32 insertions(+), 28 deletions(-) diff --git a/app-builder/plugins/aipp-file-extract-excel/src/main/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractor.java b/app-builder/plugins/aipp-file-extract-excel/src/main/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractor.java index c0191672ec..07cf4d9c81 100644 --- a/app-builder/plugins/aipp-file-extract-excel/src/main/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractor.java +++ b/app-builder/plugins/aipp-file-extract-excel/src/main/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractor.java @@ -78,7 +78,7 @@ private static String getCellValueAsString(@NonNull ReadCellData cell) { /** * 该文件提取器支持EXCEL和CSV类型。 * - * @return 枚举常量类型集合 {@link List}。 + * @return 支持的枚举常量类型列表 {@link List}{@code <}{@link String}{@code >}。 */ @Override @Fitable(id = "get-fileType-excel") @@ -111,7 +111,7 @@ private boolean isValidPath(String fileUrl) { @Fitable(id = "extract-file-excel") public String extractFile(String fileUrl) { if (!isValidPath(fileUrl)) { - throw new IllegalArgumentException("Invalid FilePath" + fileUrl); + throw new IllegalArgumentException(String.format("Invalid FilePath. [fileUrl=%s]", fileUrl)); } File file = Paths.get(fileUrl).toFile(); StringBuilder excelContent = new StringBuilder(); @@ -131,7 +131,8 @@ public String extractFile(String fileUrl) { } excelContent.append('\n'); } catch (IOException e) { - throw new IllegalStateException("Fail To Extract Excel File", e); + throw new IllegalStateException(String.format("Fail to extract excel file. [exception=%s]", e.getMessage()), + e); } finally { if (reader != null) { reader.finish(); // 关闭资源 diff --git a/app-builder/plugins/aipp-file-extract-excel/src/test/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractorTest.java b/app-builder/plugins/aipp-file-extract-excel/src/test/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractorTest.java index 6b5befc33a..ca977c1e73 100644 --- a/app-builder/plugins/aipp-file-extract-excel/src/test/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractorTest.java +++ b/app-builder/plugins/aipp-file-extract-excel/src/test/java/modelengine/fit/jade/aipp/file/extract/ExcelFileExtractorTest.java @@ -13,7 +13,6 @@ import modelengine.fitframework.annotation.Fit; import modelengine.fitframework.test.annotation.FitTestWithJunit; -import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Test; @@ -28,7 +27,6 @@ * @since 2025-09-06 */ @FitTestWithJunit(includeClasses = ExcelFileExtractor.class) -@Disabled class ExcelFileExtractorTest { @Fit ExcelFileExtractor excelFileExtractor; @@ -53,23 +51,22 @@ void validPath() { @DisplayName("测试 excel 文件提取成功") void extractFile() { File file = new File(this.getClass().getClassLoader().getResource("file/content.csv").getFile()); + String expected = """ + Sheet 1: + This is an excel test + ID\tName\tAge\tJoinDate\tActive\tSalary\tDepartment\tNotes + 1\tJohn Doe\t25\t2023-01-15\tTRUE\t8000.50\tIT\tRegular employee + 2\tJane Smith\t30\t2022-05-20\tTRUE\t12000.00\tMarketing\tTeam leader + 3\tBob Johnson\t28\t2023-03-10\tFALSE\t7500.00\tSales\tLeft company + 4\tAlice Brown\t35\t2020-12-01\tTRUE\t15000.75\tIT\tSenior engineer + 5\tTom Wilson\t22\t2023-08-25\tTRUE\t6000.00\tHR\tIntern + 6\t\t40\t2019-06-15\tTRUE\t18000.00\tFinance\tDepartment manager + 7\tLucy Davis\t27\t2023-02-28\tFALSE\t7000.00\tOperations\tContract ended + 8\tMike Miller\t32\t2021-09-10\tTRUE\t13500.50\tIT\tProject lead + 9\tSarah Lee\t29\t2022-11-05\tTRUE\t9500.00\tMarketing\tMarketing specialist + 10\tDavid Zhang\t26\t2023-07-12\tTRUE\t8500.25\tSales\tSales representative + + """; assertThat(this.excelFileExtractor.extractFile(file.getAbsolutePath())).isEqualTo(expected); } - - String expected = """ - Sheet 1: - This is an excel test - ID\tName\tAge\tJoinDate\tActive\tSalary\tDepartment\tNotes - 1\tJohn Doe\t25\t2023-01-15\tTRUE\t8000.50\tIT\tRegular employee - 2\tJane Smith\t30\t2022-05-20\tTRUE\t12000.00\tMarketing\tTeam leader - 3\tBob Johnson\t28\t2023-03-10\tFALSE\t7500.00\tSales\tLeft company - 4\tAlice Brown\t35\t2020-12-01\tTRUE\t15000.75\tIT\tSenior engineer - 5\tTom Wilson\t22\t2023-08-25\tTRUE\t6000.00\tHR\tIntern - 6\t\t40\t2019-06-15\tTRUE\t18000.00\tFinance\tDepartment manager - 7\tLucy Davis\t27\t2023-02-28\tFALSE\t7000.00\tOperations\tContract ended - 8\tMike Miller\t32\t2021-09-10\tTRUE\t13500.50\tIT\tProject lead - 9\tSarah Lee\t29\t2022-11-05\tTRUE\t9500.00\tMarketing\tMarketing specialist - 10\tDavid Zhang\t26\t2023-07-12\tTRUE\t8500.25\tSales\tSales representative - - """; } \ No newline at end of file diff --git a/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileExtractorContainer.java b/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileExtractorContainer.java index 715dea1b06..25435f6f0f 100644 --- a/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileExtractorContainer.java +++ b/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileExtractorContainer.java @@ -10,6 +10,7 @@ import modelengine.fit.jober.aipp.service.OperatorService; import modelengine.fitframework.annotation.Component; import modelengine.fitframework.log.Logger; +import modelengine.fitframework.util.CollectionUtils; import java.util.ArrayList; import java.util.HashMap; @@ -51,15 +52,20 @@ public FileExtractorContainer(List extractors) { * * @param fileUrl 文件路径 {@link String}。 * @param fileType 文件枚举类型 {@link OperatorService.FileType}。 - * @return 提取的字符串 {@link Optional}。 + * @return 提取的字符串 {@link Optional}{@code <}{@link String}{@code >}。 */ public Optional extract(String fileUrl, OperatorService.FileType fileType) { + if (fileType == null) { + log.warn("FileType is null"); + return Optional.empty(); + } List extractors = this.fileExtractorMap.get(fileType.toString()); - if (extractors == null || extractors.isEmpty()) { + if (CollectionUtils.isEmpty(extractors)) { return Optional.empty(); } if (extractors.size() > 1) { - log.warn("Multiple extractors found , using first: {}", extractors.get(0).getClass().getSimpleName()); + log.warn("Multiple extractors found, using the first one instead. [name={}]", + extractors.get(0).getClass().getSimpleName()); } return Optional.ofNullable(extractors.get(0)).map(extractor -> extractor.extractFile(fileUrl)); } diff --git a/app-builder/services/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/FileExtractor.java b/app-builder/services/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/FileExtractor.java index 5c189b2f0c..c3b8bb1049 100644 --- a/app-builder/services/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/FileExtractor.java +++ b/app-builder/services/aipp-file-extract-service/src/main/java/modelengine/fit/jade/aipp/file/extract/FileExtractor.java @@ -16,7 +16,7 @@ * @author 黄政炫 * @since 2025-09-06 */ -public interface FileExtractor{ +public interface FileExtractor { /** * 提取文件函数。 * @@ -29,8 +29,8 @@ public interface FileExtractor{ /** * 返回提取器支持文件类型。 * - * @return 表示返回的文件类型 {@link List}。 + * @return 支持的枚举常量类型列表 {@link List}{@code <}{@link String}{@code >}。 */ - @Genericable(id = "modelengine.fit.jade.file.getFileType") + @Genericable(id = "modelengine.fit.jade.file.getFileTypes") List supportedFileTypes(); } From 2bc9807cdc73f8b6eb06e8a14f74c53192e67fa2 Mon Sep 17 00:00:00 2001 From: Muggle <125979968+jsbjfkbsjk@users.noreply.github.com> Date: Fri, 12 Sep 2025 14:46:31 +0800 Subject: [PATCH 8/8] Update FileExtractorContainer.java --- .../modelengine/fit/jober/aipp/tool/FileExtractorContainer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileExtractorContainer.java b/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileExtractorContainer.java index 25435f6f0f..d205b5c07a 100644 --- a/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileExtractorContainer.java +++ b/app-builder/plugins/aipp-plugin/src/main/java/modelengine/fit/jober/aipp/tool/FileExtractorContainer.java @@ -56,7 +56,7 @@ public FileExtractorContainer(List extractors) { */ public Optional extract(String fileUrl, OperatorService.FileType fileType) { if (fileType == null) { - log.warn("FileType is null"); + log.warn("File type cannot be null."); return Optional.empty(); } List extractors = this.fileExtractorMap.get(fileType.toString());