Skip to content

Commit 8d2b41e

Browse files
authored
feature: Implement the basic knowledge generation function (#40)
1 parent 5612c7c commit 8d2b41e

32 files changed

Lines changed: 1060 additions & 67 deletions

.editorconfig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ root = true
44
charset = utf-8
55
end_of_line = lf
66
indent_style = space
7-
indent_size = 2
7+
indent_size = 4
88
insert_final_newline = true
99
trim_trailing_whitespace = true
1010

backend/services/main-application/pom.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,10 @@
130130
<artifactId>spring-boot-starter-test</artifactId>
131131
<scope>test</scope>
132132
</dependency>
133+
<dependency>
134+
<groupId>org.springframework.boot</groupId>
135+
<artifactId>spring-boot-autoconfigure</artifactId>
136+
</dependency>
133137
</dependencies>
134138

135139
<build>
@@ -141,6 +145,7 @@
141145
<configuration>
142146
<source>${maven.compiler.source}</source>
143147
<target>${maven.compiler.target}</target>
148+
<parameters>true</parameters>
144149
<compilerArgs>
145150
<arg>-parameters</arg>
146151
</compilerArgs>

backend/services/main-application/src/main/java/com/datamate/main/DataMatePlatformApplication.java

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -16,31 +16,12 @@
1616
* @version 1.0.0
1717
*/
1818
@SpringBootApplication
19-
@ComponentScan(basePackages = {
20-
"com.datamate.main",
21-
"com.datamate.datamanagement",
22-
"com.datamate.collection",
23-
"com.datamate.operator",
24-
"com.datamate.cleaning",
25-
"com.datamate.synthesis",
26-
"com.datamate.annotation",
27-
"com.datamate.evaluation",
28-
"com.datamate.pipeline",
29-
"com.datamate.execution",
30-
"com.datamate.common"
31-
})
32-
@MapperScan(basePackages = {
33-
"com.datamate.collection.infrastructure.persistence.mapper",
34-
"com.datamate.datamanagement.infrastructure.persistence.mapper",
35-
"com.datamate.operator.infrastructure.persistence.mapper",
36-
"com.datamate.cleaning.infrastructure.persistence.mapper",
37-
"com.datamate.**.mapper"
38-
})
19+
@ComponentScan(basePackages = {"com.datamate"})
20+
@MapperScan(basePackages = {"com.datamate.**.mapper"})
3921
@EnableTransactionManagement
4022
@EnableAsync
4123
@EnableScheduling
4224
public class DataMatePlatformApplication {
43-
4425
public static void main(String[] args) {
4526
SpringApplication.run(DataMatePlatformApplication.class, args);
4627
}

backend/services/rag-indexer-service/pom.xml

Lines changed: 65 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -16,45 +16,97 @@
1616
<name>RAG Indexer Service</name>
1717
<description>RAG文档索引服务</description>
1818

19+
<dependencyManagement>
20+
<dependencies>
21+
<dependency>
22+
<groupId>dev.langchain4j</groupId>
23+
<artifactId>langchain4j-bom</artifactId>
24+
<version>1.8.0</version>
25+
<type>pom</type>
26+
<scope>import</scope>
27+
</dependency>
28+
</dependencies>
29+
</dependencyManagement>
30+
1931
<dependencies>
2032
<dependency>
2133
<groupId>com.datamate</groupId>
2234
<artifactId>domain-common</artifactId>
2335
<version>${project.version}</version>
2436
</dependency>
37+
<dependency>
38+
<groupId>com.datamate</groupId>
39+
<artifactId>data-management-service</artifactId>
40+
<version>1.0.0-SNAPSHOT</version>
41+
</dependency>
2542
<dependency>
2643
<groupId>org.springframework.boot</groupId>
2744
<artifactId>spring-boot-starter-web</artifactId>
2845
</dependency>
2946
<dependency>
30-
<groupId>org.springframework.boot</groupId>
31-
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
47+
<groupId>mysql</groupId>
48+
<artifactId>mysql-connector-java</artifactId>
49+
<version>8.0.33</version>
3250
</dependency>
3351
<dependency>
34-
<groupId>com.mysql</groupId>
35-
<artifactId>mysql-connector-j</artifactId>
36-
<version>${mysql.version}</version>
52+
<groupId>org.springdoc</groupId>
53+
<artifactId>springdoc-openapi-starter-webmvc-ui</artifactId>
54+
</dependency>
55+
<dependency>
56+
<groupId>org.openapitools</groupId>
57+
<artifactId>jackson-databind-nullable</artifactId>
58+
</dependency>
59+
<dependency>
60+
<groupId>jakarta.validation</groupId>
61+
<artifactId>jakarta.validation-api</artifactId>
3762
</dependency>
3863
<dependency>
3964
<groupId>org.springframework.boot</groupId>
4065
<artifactId>spring-boot-starter-test</artifactId>
4166
<scope>test</scope>
4267
</dependency>
4368
<dependency>
44-
<groupId>org.springframework.cloud</groupId>
45-
<artifactId>spring-cloud-starter-openfeign</artifactId>
69+
<groupId>dev.langchain4j</groupId>
70+
<artifactId>langchain4j-open-ai</artifactId>
71+
<version>1.8.0</version>
4672
</dependency>
4773
<dependency>
48-
<groupId>org.springdoc</groupId>
49-
<artifactId>springdoc-openapi-starter-webmvc-ui</artifactId>
74+
<groupId>dev.langchain4j</groupId>
75+
<artifactId>langchain4j</artifactId>
76+
<version>1.8.0</version>
5077
</dependency>
5178
<dependency>
52-
<groupId>org.openapitools</groupId>
53-
<artifactId>jackson-databind-nullable</artifactId>
79+
<groupId>dev.langchain4j</groupId>
80+
<artifactId>langchain4j-document-parser-apache-pdfbox</artifactId>
5481
</dependency>
5582
<dependency>
56-
<groupId>jakarta.validation</groupId>
57-
<artifactId>jakarta.validation-api</artifactId>
83+
<groupId>dev.langchain4j</groupId>
84+
<artifactId>langchain4j-document-parser-apache-tika</artifactId>
85+
</dependency>
86+
<dependency>
87+
<groupId>dev.langchain4j</groupId>
88+
<artifactId>langchain4j-document-parser-apache-poi</artifactId>
89+
</dependency>
90+
<dependency>
91+
<groupId>dev.langchain4j</groupId>
92+
<artifactId>langchain4j-document-parser-markdown</artifactId>
93+
</dependency>
94+
<dependency>
95+
<groupId>dev.langchain4j</groupId>
96+
<artifactId>langchain4j-document-transformer-jsoup</artifactId>
97+
</dependency>
98+
<dependency>
99+
<groupId>dev.langchain4j</groupId>
100+
<artifactId>langchain4j-milvus</artifactId>
101+
</dependency>
102+
103+
<dependency>
104+
<groupId>dev.langchain4j</groupId>
105+
<artifactId>langchain4j-embeddings-all-minilm-l6-v2</artifactId>
106+
</dependency>
107+
<dependency>
108+
<groupId>org.testcontainers</groupId>
109+
<artifactId>milvus</artifactId>
58110
</dependency>
59111
</dependencies>
60112

@@ -64,31 +116,6 @@
64116
<groupId>org.springframework.boot</groupId>
65117
<artifactId>spring-boot-maven-plugin</artifactId>
66118
</plugin>
67-
<!--<plugin>
68-
<groupId>org.openapitools</groupId>
69-
<artifactId>openapi-generator-maven-plugin</artifactId>
70-
<version>6.6.0</version>
71-
<executions>
72-
<execution>
73-
<goals>
74-
<goal>generate</goal>
75-
</goals>
76-
<configuration>
77-
<inputSpec>${project.basedir}/../../openapi/specs/rag-services.yaml</inputSpec>
78-
<generatorName>spring</generatorName>
79-
<output>${project.build.directory}/generated-sources/openapi</output>
80-
<apiPackage>com.datamate.rag.indexer.interfaces.api</apiPackage>
81-
<modelPackage>com.datamate.rag.indexer.interfaces.dto</modelPackage>
82-
<configOptions>
83-
<interfaceOnly>true</interfaceOnly>
84-
<useTags>true</useTags>
85-
<useSpringBoot3>true</useSpringBoot3>
86-
<documentationProvider>springdoc</documentationProvider>
87-
</configOptions>
88-
</configuration>
89-
</execution>
90-
</executions>
91-
</plugin>-->
92119
</plugins>
93120
</build>
94121
</project>
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
package com.datamate.rag.indexer.application;
2+
3+
import com.baomidou.mybatisplus.core.metadata.IPage;
4+
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
5+
import com.datamate.rag.indexer.domain.model.FileStatus;
6+
import com.datamate.rag.indexer.domain.model.KnowledgeBase;
7+
import com.datamate.rag.indexer.domain.model.RagChunk;
8+
import com.datamate.rag.indexer.domain.model.RagFile;
9+
import com.datamate.rag.indexer.domain.repository.KnowledgeBaseRepository;
10+
import com.datamate.rag.indexer.domain.repository.RagFileRepository;
11+
import com.datamate.rag.indexer.infrastructure.event.DataInsertedEvent;
12+
import com.datamate.common.infrastructure.exception.BusinessException;
13+
import com.datamate.common.infrastructure.exception.KnowledgeBaseErrorCode;
14+
import com.datamate.common.interfaces.PagedResponse;
15+
import com.datamate.common.interfaces.PagingQuery;
16+
import com.datamate.rag.indexer.interfaces.dto.*;
17+
import lombok.RequiredArgsConstructor;
18+
import org.springframework.beans.BeanUtils;
19+
import org.springframework.context.ApplicationEventPublisher;
20+
import org.springframework.stereotype.Service;
21+
import org.springframework.transaction.annotation.Transactional;
22+
import org.springframework.util.StringUtils;
23+
24+
import java.util.List;
25+
import java.util.Optional;
26+
27+
/**
28+
* 知识库服务类
29+
*
30+
* @author dallas
31+
* @since 2025-10-24
32+
*/
33+
@Service
34+
@RequiredArgsConstructor
35+
public class KnowledgeBaseService {
36+
private final KnowledgeBaseRepository knowledgeBaseRepository;
37+
private final RagFileRepository ragFileRepository;
38+
private final ApplicationEventPublisher eventPublisher;
39+
40+
41+
/**
42+
* 创建知识库
43+
*
44+
* @param request 知识库创建请求
45+
* @return 知识库 ID
46+
*/
47+
public String create(KnowledgeBaseCreateReq request) {
48+
KnowledgeBase knowledgeBase = new KnowledgeBase();
49+
BeanUtils.copyProperties(request, knowledgeBase);
50+
knowledgeBaseRepository.save(knowledgeBase);
51+
return knowledgeBase.getId();
52+
}
53+
54+
/**
55+
* 更新知识库
56+
*
57+
* @param knowledgeBaseId 知识库 ID
58+
* @param request 知识库更新请求
59+
*/
60+
public void update(String knowledgeBaseId, KnowledgeBaseUpdateReq request) {
61+
KnowledgeBase knowledgeBase = Optional.ofNullable(knowledgeBaseRepository.getById(knowledgeBaseId))
62+
.orElseThrow(() -> BusinessException.of(KnowledgeBaseErrorCode.KNOWLEDGE_BASE_NOT_FOUND));
63+
if (StringUtils.hasText(request.getName())) {
64+
knowledgeBase.setName(request.getName());
65+
}
66+
if (StringUtils.hasText(request.getDescription())) {
67+
knowledgeBase.setDescription(request.getDescription());
68+
}
69+
knowledgeBaseRepository.updateById(knowledgeBase);
70+
}
71+
72+
public void delete(String knowledgeBaseId) {
73+
knowledgeBaseRepository.removeById(knowledgeBaseId);
74+
ragFileRepository.removeByKnowledgeBaseId(knowledgeBaseId);
75+
// TODO: 删除知识库关联的所有文档
76+
}
77+
78+
public KnowledgeBase getById(String knowledgeBaseId) {
79+
return Optional.ofNullable(knowledgeBaseRepository.getById(knowledgeBaseId))
80+
.orElseThrow(() -> BusinessException.of(KnowledgeBaseErrorCode.KNOWLEDGE_BASE_NOT_FOUND));
81+
}
82+
83+
public PagedResponse<KnowledgeBase> list(KnowledgeBaseQueryReq request) {
84+
IPage<KnowledgeBase> page = new Page<>(request.getPage(), request.getSize());
85+
page = knowledgeBaseRepository.page(page, request);
86+
return PagedResponse.of(page.getRecords(), page.getCurrent(), page.getTotal(), page.getPages());
87+
}
88+
89+
@Transactional(rollbackFor = Exception.class)
90+
public void addFiles(AddFilesReq request) {
91+
KnowledgeBase knowledgeBase = Optional.ofNullable(knowledgeBaseRepository.getById(request.getKnowledgeBaseId()))
92+
.orElseThrow(() -> BusinessException.of(KnowledgeBaseErrorCode.KNOWLEDGE_BASE_NOT_FOUND));
93+
List<RagFile> ragFiles = request.getFiles().stream().map(fileInfo -> {
94+
RagFile ragFile = new RagFile();
95+
ragFile.setKnowledgeBaseId(knowledgeBase.getId());
96+
ragFile.setFileId(fileInfo.fileId());
97+
ragFile.setFileName(fileInfo.fileName());
98+
ragFile.setStatus(FileStatus.UNPROCESSED);
99+
return ragFile;
100+
}).toList();
101+
ragFileRepository.saveBatch(ragFiles, 100);
102+
eventPublisher.publishEvent(new DataInsertedEvent(knowledgeBase.getId(), request.getProcessType()));
103+
}
104+
105+
public PagedResponse<RagFile> listFiles(String knowledgeBaseId, RagFileReq request) {
106+
IPage<RagFile> page = new Page<>(request.getPage(), request.getSize());
107+
page = ragFileRepository.page(page);
108+
return PagedResponse.of(page.getRecords(), page.getCurrent(), page.getTotal(), page.getPages());
109+
}
110+
111+
public void deleteFile(String knowledgeBaseId, DeleteFilesReq request) {
112+
}
113+
114+
public PagedResponse<RagChunk> getChunks(String knowledgeBaseId, String ragFileId, PagingQuery pagingQuery) {
115+
IPage<RagChunk> page = new Page<>(pagingQuery.getPage(), pagingQuery.getSize());
116+
return PagedResponse.of(page.getRecords(), page.getCurrent(), page.getTotal(), page.getPages());
117+
}
118+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
package com.datamate.rag.indexer.domain.model;
2+
3+
/**
4+
* 文件状态枚举
5+
*
6+
* @author dallas
7+
* @since 2025-10-29
8+
*/
9+
public enum FileStatus {
10+
/**
11+
* 未处理
12+
*/
13+
UNPROCESSED,
14+
/**
15+
* 处理中
16+
*/
17+
PROCESSING,
18+
/**
19+
* 已处理
20+
*/
21+
PROCESSED,
22+
/**
23+
* 处理失败
24+
*/
25+
PROCESS_FAILED
26+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
package com.datamate.rag.indexer.domain.model;
2+
3+
import com.baomidou.mybatisplus.annotation.TableName;
4+
import com.datamate.common.domain.model.base.BaseEntity;
5+
import lombok.Getter;
6+
import lombok.Setter;
7+
8+
/**
9+
* 知识库实体类
10+
*
11+
* @author dallas
12+
* @since 2025-10-24
13+
*/
14+
@Getter
15+
@Setter
16+
@TableName("t_rag_knowledge_base")
17+
public class KnowledgeBase extends BaseEntity<String> {
18+
/**
19+
* 知识库名称
20+
*/
21+
private String name;
22+
23+
/**
24+
* 知识库描述
25+
*/
26+
private String description;
27+
28+
/**
29+
* 嵌入模型
30+
*/
31+
private String embeddingModel;
32+
33+
/**
34+
* 聊天模型
35+
*/
36+
private String chatModel;
37+
}

0 commit comments

Comments
 (0)