From 4de3e996b65ba20e984a02ac8e33523a466ee4d2 Mon Sep 17 00:00:00 2001 From: Dallas98 <990259227@qq.com> Date: Tue, 28 Oct 2025 10:04:14 +0800 Subject: [PATCH 1/5] refactor: rename artifactId and application name to 'datamate'; add model configuration and related services --- backend/api-gateway/pom.xml | 2 +- backend/pom.xml | 4 +- .../services/data-annotation-service/pom.xml | 2 +- .../services/data-cleaning-service/pom.xml | 2 +- .../services/data-collection-service/pom.xml | 2 +- .../services/data-evaluation-service/pom.xml | 2 +- .../services/data-management-service/pom.xml | 2 +- .../services/data-synthesis-service/pom.xml | 2 +- .../services/execution-engine-service/pom.xml | 2 +- backend/services/main-application/pom.xml | 2 +- .../src/main/resources/application.yml | 2 +- .../services/operator-market-service/pom.xml | 2 +- .../pipeline-orchestration-service/pom.xml | 2 +- backend/services/rag-indexer-service/pom.xml | 2 +- backend/services/rag-query-service/pom.xml | 2 +- backend/shared/domain-common/pom.xml | 7 +- .../exception/KnowledgeBaseErrorCode.java | 23 +++++ .../ModelConfigApplicationService.java | 85 ++++++++++++++++++ .../models/domain/entity/ModelConfig.java | 44 +++++++++ .../models/domain/entity/ModelType.java | 18 ++++ .../repository/ModelConfigRepository.java | 22 +++++ .../infrastructure/client/ModelClient.java | 44 +++++++++ .../exception/ModelsErrorCode.java | 27 ++++++ .../impl/ModelConfigRepositoryImpl.java | 37 ++++++++ .../persistence/mapper/ModelConfigMapper.java | 15 ++++ .../rest/ModelConfigController.java | 90 +++++++++++++++++++ .../rest/dto/CreateModelRequest.java | 46 ++++++++++ .../rest/dto/QueryModelRequest.java | 27 ++++++ backend/shared/security-common/pom.xml | 2 +- editions/community/config/application.yml | 2 +- editions/enterprise/config/application.yml | 2 +- scripts/db/model-management-init.sql | 17 ++++ 32 files changed, 520 insertions(+), 20 deletions(-) create mode 100644 backend/shared/domain-common/src/main/java/com/datamate/common/infrastructure/exception/KnowledgeBaseErrorCode.java create mode 100644 backend/shared/domain-common/src/main/java/com/datamate/common/models/application/ModelConfigApplicationService.java create mode 100644 backend/shared/domain-common/src/main/java/com/datamate/common/models/domain/entity/ModelConfig.java create mode 100644 backend/shared/domain-common/src/main/java/com/datamate/common/models/domain/entity/ModelType.java create mode 100644 backend/shared/domain-common/src/main/java/com/datamate/common/models/domain/repository/ModelConfigRepository.java create mode 100644 backend/shared/domain-common/src/main/java/com/datamate/common/models/infrastructure/client/ModelClient.java create mode 100644 backend/shared/domain-common/src/main/java/com/datamate/common/models/infrastructure/exception/ModelsErrorCode.java create mode 100644 backend/shared/domain-common/src/main/java/com/datamate/common/models/infrastructure/persistence/impl/ModelConfigRepositoryImpl.java create mode 100644 backend/shared/domain-common/src/main/java/com/datamate/common/models/infrastructure/persistence/mapper/ModelConfigMapper.java create mode 100644 backend/shared/domain-common/src/main/java/com/datamate/common/models/interfaces/rest/ModelConfigController.java create mode 100644 backend/shared/domain-common/src/main/java/com/datamate/common/models/interfaces/rest/dto/CreateModelRequest.java create mode 100644 backend/shared/domain-common/src/main/java/com/datamate/common/models/interfaces/rest/dto/QueryModelRequest.java create mode 100644 scripts/db/model-management-init.sql diff --git a/backend/api-gateway/pom.xml b/backend/api-gateway/pom.xml index c6284e6f4..87c3cf4a5 100644 --- a/backend/api-gateway/pom.xml +++ b/backend/api-gateway/pom.xml @@ -7,7 +7,7 @@ com.datamate - data-mate-platform + datamate 1.0.0-SNAPSHOT ../pom.xml diff --git a/backend/pom.xml b/backend/pom.xml index 682eaa065..8a7a795d1 100644 --- a/backend/pom.xml +++ b/backend/pom.xml @@ -6,11 +6,11 @@ 4.0.0 com.datamate - data-mate-platform + datamate 1.0.0-SNAPSHOT pom - DataMatePlatform + DataMate 一站式数据工作平台,面向模型微调与RAG检索 diff --git a/backend/services/data-annotation-service/pom.xml b/backend/services/data-annotation-service/pom.xml index a91d058c0..387d7cb66 100644 --- a/backend/services/data-annotation-service/pom.xml +++ b/backend/services/data-annotation-service/pom.xml @@ -7,7 +7,7 @@ com.datamate - data-mate-platform + datamate 1.0.0-SNAPSHOT ../../pom.xml diff --git a/backend/services/data-cleaning-service/pom.xml b/backend/services/data-cleaning-service/pom.xml index 56b70fc56..9e622b494 100644 --- a/backend/services/data-cleaning-service/pom.xml +++ b/backend/services/data-cleaning-service/pom.xml @@ -7,7 +7,7 @@ com.datamate - data-mate-platform + datamate 1.0.0-SNAPSHOT ../../pom.xml diff --git a/backend/services/data-collection-service/pom.xml b/backend/services/data-collection-service/pom.xml index 9261ad74a..d946b696a 100644 --- a/backend/services/data-collection-service/pom.xml +++ b/backend/services/data-collection-service/pom.xml @@ -6,7 +6,7 @@ com.datamate - data-mate-platform + datamate 1.0.0-SNAPSHOT ../../pom.xml diff --git a/backend/services/data-evaluation-service/pom.xml b/backend/services/data-evaluation-service/pom.xml index c976d19d7..a572467bb 100644 --- a/backend/services/data-evaluation-service/pom.xml +++ b/backend/services/data-evaluation-service/pom.xml @@ -7,7 +7,7 @@ com.datamate - data-mate-platform + datamate 1.0.0-SNAPSHOT ../../pom.xml diff --git a/backend/services/data-management-service/pom.xml b/backend/services/data-management-service/pom.xml index e6f0c16bc..2b3510771 100644 --- a/backend/services/data-management-service/pom.xml +++ b/backend/services/data-management-service/pom.xml @@ -7,7 +7,7 @@ com.datamate - data-mate-platform + datamate 1.0.0-SNAPSHOT ../../pom.xml diff --git a/backend/services/data-synthesis-service/pom.xml b/backend/services/data-synthesis-service/pom.xml index bc146a443..9a7baf9ff 100644 --- a/backend/services/data-synthesis-service/pom.xml +++ b/backend/services/data-synthesis-service/pom.xml @@ -7,7 +7,7 @@ com.datamate - data-mate-platform + datamate 1.0.0-SNAPSHOT ../../pom.xml diff --git a/backend/services/execution-engine-service/pom.xml b/backend/services/execution-engine-service/pom.xml index 42f94843e..58c4fcbe6 100644 --- a/backend/services/execution-engine-service/pom.xml +++ b/backend/services/execution-engine-service/pom.xml @@ -7,7 +7,7 @@ com.datamate - data-mate-platform + datamate 1.0.0-SNAPSHOT ../../pom.xml diff --git a/backend/services/main-application/pom.xml b/backend/services/main-application/pom.xml index 33e486254..4fb6f00c9 100644 --- a/backend/services/main-application/pom.xml +++ b/backend/services/main-application/pom.xml @@ -7,7 +7,7 @@ com.datamate - data-mate-platform + datamate 1.0.0-SNAPSHOT ../../pom.xml diff --git a/backend/services/main-application/src/main/resources/application.yml b/backend/services/main-application/src/main/resources/application.yml index 4becb3c6e..51206ba9b 100644 --- a/backend/services/main-application/src/main/resources/application.yml +++ b/backend/services/main-application/src/main/resources/application.yml @@ -1,7 +1,7 @@ # 数据引擎平台 - 主应用配置 spring: application: - name: data-mate-platform + name: datamate # 暂时排除Spring Security自动配置(开发阶段使用) autoconfigure: diff --git a/backend/services/operator-market-service/pom.xml b/backend/services/operator-market-service/pom.xml index 6543a1e0c..b63c681c5 100644 --- a/backend/services/operator-market-service/pom.xml +++ b/backend/services/operator-market-service/pom.xml @@ -7,7 +7,7 @@ com.datamate - data-mate-platform + datamate 1.0.0-SNAPSHOT ../../pom.xml diff --git a/backend/services/pipeline-orchestration-service/pom.xml b/backend/services/pipeline-orchestration-service/pom.xml index d5dfe1d54..ab75ae5ae 100644 --- a/backend/services/pipeline-orchestration-service/pom.xml +++ b/backend/services/pipeline-orchestration-service/pom.xml @@ -7,7 +7,7 @@ com.datamate - data-mate-platform + datamate 1.0.0-SNAPSHOT ../../pom.xml diff --git a/backend/services/rag-indexer-service/pom.xml b/backend/services/rag-indexer-service/pom.xml index f9e0441ea..040a9f8f8 100644 --- a/backend/services/rag-indexer-service/pom.xml +++ b/backend/services/rag-indexer-service/pom.xml @@ -7,7 +7,7 @@ com.datamate - data-mate-platform + datamate 1.0.0-SNAPSHOT ../../pom.xml diff --git a/backend/services/rag-query-service/pom.xml b/backend/services/rag-query-service/pom.xml index cc407ef5e..ed6c226b0 100644 --- a/backend/services/rag-query-service/pom.xml +++ b/backend/services/rag-query-service/pom.xml @@ -7,7 +7,7 @@ com.datamate - data-mate-platform + datamate 1.0.0-SNAPSHOT ../../pom.xml diff --git a/backend/shared/domain-common/pom.xml b/backend/shared/domain-common/pom.xml index bf1e93301..6cb2b22c3 100644 --- a/backend/shared/domain-common/pom.xml +++ b/backend/shared/domain-common/pom.xml @@ -7,7 +7,7 @@ com.datamate - data-mate-platform + datamate 1.0.0-SNAPSHOT ../../pom.xml @@ -33,5 +33,10 @@ com.fasterxml.jackson.datatype jackson-datatype-jsr310 + + dev.langchain4j + langchain4j-open-ai + 1.8.0 + diff --git a/backend/shared/domain-common/src/main/java/com/datamate/common/infrastructure/exception/KnowledgeBaseErrorCode.java b/backend/shared/domain-common/src/main/java/com/datamate/common/infrastructure/exception/KnowledgeBaseErrorCode.java new file mode 100644 index 000000000..d56bdb9a7 --- /dev/null +++ b/backend/shared/domain-common/src/main/java/com/datamate/common/infrastructure/exception/KnowledgeBaseErrorCode.java @@ -0,0 +1,23 @@ +package com.datamate.common.infrastructure.exception; + +import lombok.AllArgsConstructor; +import lombok.Getter; + +/** + * 知识库错误码 + * + * @author dallas + * @since 2025-10-24 + */ + +@Getter +@AllArgsConstructor +public enum KnowledgeBaseErrorCode implements ErrorCode { + /** + * 知识库不存在 + */ + KNOWLEDGE_BASE_NOT_FOUND("knowledge.0001", "知识库不存在"); + + private final String code; + private final String message; +} diff --git a/backend/shared/domain-common/src/main/java/com/datamate/common/models/application/ModelConfigApplicationService.java b/backend/shared/domain-common/src/main/java/com/datamate/common/models/application/ModelConfigApplicationService.java new file mode 100644 index 000000000..1c3435b90 --- /dev/null +++ b/backend/shared/domain-common/src/main/java/com/datamate/common/models/application/ModelConfigApplicationService.java @@ -0,0 +1,85 @@ +package com.datamate.common.models.application; + +import com.baomidou.mybatisplus.core.metadata.IPage; +import com.datamate.common.infrastructure.exception.BusinessAssert; +import com.datamate.common.interfaces.PagedResponse; +import com.datamate.common.models.domain.entity.ModelConfig; +import com.datamate.common.models.domain.repository.ModelConfigRepository; +import com.datamate.common.models.infrastructure.client.ModelClient; +import com.datamate.common.models.infrastructure.exception.ModelsErrorCode; +import com.datamate.common.models.interfaces.rest.dto.CreateModelRequest; +import com.datamate.common.models.interfaces.rest.dto.QueryModelRequest; +import dev.langchain4j.model.chat.ChatModel; +import jakarta.validation.Valid; +import lombok.RequiredArgsConstructor; +import org.springframework.stereotype.Service; + +import java.util.ArrayList; +import java.util.List; + +/** + * 模型配置应用服务类 + * + * @author dallas + * @since 2025-10-27 + */ +@Service +@RequiredArgsConstructor +public class ModelConfigApplicationService { + private final ModelConfigRepository modelConfigRepository; + + public List getProviders() { + List providers = new ArrayList<>(); + providers.add(ModelConfig.builder().provider("ModelEngine").baseUrl("http://localhost:9981").build()); + providers.add(ModelConfig.builder().provider("Ollama").baseUrl("http://localhost:11434").build()); + providers.add(ModelConfig.builder().provider("OpenAI").baseUrl("https://api.openai.com/v1").build()); + providers.add(ModelConfig.builder().provider("DeepSeek").baseUrl("https://api.deepseek.cn/v1").build()); + providers.add(ModelConfig.builder().provider("火山方舟").baseUrl("https://ark.cn-beijing.volces.com/api/v3").build()); + providers.add(ModelConfig.builder().provider("阿里云百炼").baseUrl("https://dashscope.aliyuncs.com/compatible-mode/v1").build()); + providers.add(ModelConfig.builder().provider("硅基流动").baseUrl("https://api.siliconflow.cn/v1").build()); + providers.add(ModelConfig.builder().provider("智谱AI").baseUrl("https://open.bigmodel.cn/api/paas/v4").build()); + return providers; + } + + public PagedResponse getModels(QueryModelRequest queryModelRequest) { + // 从数据库查询模型配置 + IPage page = modelConfigRepository.page(queryModelRequest); + return PagedResponse.of(page.getRecords(), page.getCurrent(), page.getTotal(), page.getPages()); + } + + public ModelConfig getModelDetail(String modelId) { + return modelConfigRepository.getById(modelId); + } + + public ModelConfig createModel(CreateModelRequest modelConfig) { + ModelConfig newConfig = ModelConfig.builder() + .provider(modelConfig.getProvider()) + .modelName(modelConfig.getModelName()) + .type(modelConfig.getType()) + .baseUrl(modelConfig.getBaseUrl()) + .apiKey(modelConfig.getApiKey()) + .isEnabled(true) + .build(); + ModelClient.checkHealth(newConfig); + modelConfigRepository.save(newConfig); + return newConfig; + } + + public ModelConfig updateModel(String modelId, @Valid CreateModelRequest updateModelRequest) { + ModelConfig modelConfig = modelConfigRepository.getById(modelId); + BusinessAssert.notNull(modelConfig, ModelsErrorCode.MODEL_CONFIG_NOT_FOUND); + modelConfig.setProvider(updateModelRequest.getProvider()); + modelConfig.setModelName(updateModelRequest.getModelName()); + modelConfig.setType(updateModelRequest.getType()); + modelConfig.setBaseUrl(updateModelRequest.getBaseUrl()); + modelConfig.setApiKey(updateModelRequest.getApiKey()); + modelConfig.setIsEnabled(true); + ModelClient.checkHealth(modelConfig); + modelConfigRepository.updateById(modelConfig); + return modelConfig; + } + + public void deleteModel(String modelId) { + modelConfigRepository.removeById(modelId); + } +} diff --git a/backend/shared/domain-common/src/main/java/com/datamate/common/models/domain/entity/ModelConfig.java b/backend/shared/domain-common/src/main/java/com/datamate/common/models/domain/entity/ModelConfig.java new file mode 100644 index 000000000..1eecd8eed --- /dev/null +++ b/backend/shared/domain-common/src/main/java/com/datamate/common/models/domain/entity/ModelConfig.java @@ -0,0 +1,44 @@ +package com.datamate.common.models.domain.entity; + +import com.baomidou.mybatisplus.annotation.TableName; +import com.datamate.common.domain.model.base.BaseEntity; +import lombok.Builder; +import lombok.Getter; +import lombok.Setter; + +/** + * 模型配置实体类 + * + * @author dallas + * @since 2025-10-27 + */ +@Getter +@Setter +@TableName("t_model_config") +@Builder +public class ModelConfig extends BaseEntity { + /** + * 模型名称(如 qwen2) + */ + private String modelName; + /** + * 模型提供商(如 Ollama、OpenAI、DeepSeek) + */ + private String provider; + /** + * API 基础地址 + */ + private String baseUrl; + /** + * API 密钥(无密钥则为空) + */ + private String apiKey; + /** + * 模型类型(如 chat、embedding) + */ + private ModelType type; + /** + * 是否启用:1-启用,0-禁用 + */ + private Boolean isEnabled; +} diff --git a/backend/shared/domain-common/src/main/java/com/datamate/common/models/domain/entity/ModelType.java b/backend/shared/domain-common/src/main/java/com/datamate/common/models/domain/entity/ModelType.java new file mode 100644 index 000000000..7b0567280 --- /dev/null +++ b/backend/shared/domain-common/src/main/java/com/datamate/common/models/domain/entity/ModelType.java @@ -0,0 +1,18 @@ +package com.datamate.common.models.domain.entity; + +/** + * 模型类型枚举类 + * + * @author dallas + * @since 2025-10-27 + */ +public enum ModelType { + /** + * 语言模型 + */ + CHAT, + /** + * 嵌入模型 + */ + EMBEDDING +} diff --git a/backend/shared/domain-common/src/main/java/com/datamate/common/models/domain/repository/ModelConfigRepository.java b/backend/shared/domain-common/src/main/java/com/datamate/common/models/domain/repository/ModelConfigRepository.java new file mode 100644 index 000000000..2c70b348f --- /dev/null +++ b/backend/shared/domain-common/src/main/java/com/datamate/common/models/domain/repository/ModelConfigRepository.java @@ -0,0 +1,22 @@ +package com.datamate.common.models.domain.repository; + +import com.baomidou.mybatisplus.core.metadata.IPage; +import com.baomidou.mybatisplus.extension.repository.IRepository; +import com.datamate.common.models.domain.entity.ModelConfig; +import com.datamate.common.models.interfaces.rest.dto.QueryModelRequest; + +/** + * 模型配置仓库接口 + * + * @author dallas + * @since 2025-10-27 + */ +public interface ModelConfigRepository extends IRepository { + /** + * 分页查询模型配置 + * + * @param queryModelRequest 分页查询参数 + * @return 模型配置列表 + */ + IPage page(QueryModelRequest queryModelRequest); +} diff --git a/backend/shared/domain-common/src/main/java/com/datamate/common/models/infrastructure/client/ModelClient.java b/backend/shared/domain-common/src/main/java/com/datamate/common/models/infrastructure/client/ModelClient.java new file mode 100644 index 000000000..5246c031f --- /dev/null +++ b/backend/shared/domain-common/src/main/java/com/datamate/common/models/infrastructure/client/ModelClient.java @@ -0,0 +1,44 @@ +package com.datamate.common.models.infrastructure.client; + +import com.datamate.common.models.domain.entity.ModelConfig; +import com.datamate.common.models.domain.entity.ModelType; +import dev.langchain4j.model.chat.ChatModel; +import dev.langchain4j.model.embedding.EmbeddingModel; +import dev.langchain4j.model.openai.OpenAiChatModel; +import dev.langchain4j.model.openai.OpenAiEmbeddingModel; + +import java.util.function.Consumer; + +/** + * 模型客户端接口 + * + * @author dallas + * @since 2025-10-27 + */ +public class ModelClient { + public static T invokeModel(ModelConfig modelConfig, Class modelInterface) { + return switch (modelConfig.getType()) { + case CHAT -> modelInterface.cast(invokeChatModel(modelConfig)); + case EMBEDDING -> modelInterface.cast(invokeEmbeddingModel(modelConfig)); + }; + } + + private static EmbeddingModel invokeEmbeddingModel(ModelConfig modelConfig) { + return OpenAiEmbeddingModel.builder() + .baseUrl(modelConfig.getBaseUrl()) + .apiKey(modelConfig.getApiKey()) + .modelName(modelConfig.getModelName()) + .build(); + } + + private static ChatModel invokeChatModel(ModelConfig modelConfig) { + return OpenAiChatModel.builder() + .baseUrl(modelConfig.getBaseUrl()) + .apiKey(modelConfig.getApiKey()) + .modelName(modelConfig.getModelName()) + .build(); + } + + public static void checkHealth(ModelConfig modelConfig) { + } +} diff --git a/backend/shared/domain-common/src/main/java/com/datamate/common/models/infrastructure/exception/ModelsErrorCode.java b/backend/shared/domain-common/src/main/java/com/datamate/common/models/infrastructure/exception/ModelsErrorCode.java new file mode 100644 index 000000000..e0835838c --- /dev/null +++ b/backend/shared/domain-common/src/main/java/com/datamate/common/models/infrastructure/exception/ModelsErrorCode.java @@ -0,0 +1,27 @@ +package com.datamate.common.models.infrastructure.exception; + +import com.datamate.common.infrastructure.exception.ErrorCode; +import lombok.AllArgsConstructor; +import lombok.Getter; + +/** + * 模型配置错误码枚举类 + * + * @author dallas + * @since 2025-10-27 + */ +@Getter +@AllArgsConstructor +public enum ModelsErrorCode implements ErrorCode { + /** + * 模型配置不存在 + */ + MODEL_CONFIG_NOT_FOUND("model.0001", "模型配置不存在"), + /** + * 模型配置已存在 + */ + MODEL_CONFIG_ALREADY_EXISTS("model.0002", "模型配置已存在"); + + private final String code; + private final String message; +} diff --git a/backend/shared/domain-common/src/main/java/com/datamate/common/models/infrastructure/persistence/impl/ModelConfigRepositoryImpl.java b/backend/shared/domain-common/src/main/java/com/datamate/common/models/infrastructure/persistence/impl/ModelConfigRepositoryImpl.java new file mode 100644 index 000000000..0d4239fa0 --- /dev/null +++ b/backend/shared/domain-common/src/main/java/com/datamate/common/models/infrastructure/persistence/impl/ModelConfigRepositoryImpl.java @@ -0,0 +1,37 @@ +package com.datamate.common.models.infrastructure.persistence.impl; + +import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; +import com.baomidou.mybatisplus.core.metadata.IPage; +import com.baomidou.mybatisplus.extension.plugins.pagination.Page; +import com.baomidou.mybatisplus.extension.repository.CrudRepository; +import com.datamate.common.models.domain.entity.ModelConfig; +import com.datamate.common.models.domain.repository.ModelConfigRepository; +import com.datamate.common.models.infrastructure.persistence.mapper.ModelConfigMapper; +import com.datamate.common.models.interfaces.rest.dto.QueryModelRequest; +import lombok.RequiredArgsConstructor; +import org.springframework.stereotype.Repository; +import org.springframework.util.StringUtils; + +import java.util.Objects; + +/** + * 模型配置仓库实现类 + * + * @author dallas + * @since 2025-10-27 + */ +@Repository +@RequiredArgsConstructor +public class ModelConfigRepositoryImpl extends CrudRepository implements ModelConfigRepository { + private final ModelConfigMapper modelConfigMapper; + + + @Override + public IPage page(QueryModelRequest queryModelRequest) { + IPage page = new Page<>(queryModelRequest.getPage(), queryModelRequest.getSize()); + return this.page(page, new LambdaQueryWrapper() + .eq(StringUtils.hasText(queryModelRequest.getProvider()), ModelConfig::getProvider, queryModelRequest.getProvider()) + .eq(Objects.nonNull(queryModelRequest.getType()), ModelConfig::getType, queryModelRequest.getType()) + .eq(Objects.nonNull(queryModelRequest.getIsEnabled()), ModelConfig::getIsEnabled, queryModelRequest.getIsEnabled())); + } +} diff --git a/backend/shared/domain-common/src/main/java/com/datamate/common/models/infrastructure/persistence/mapper/ModelConfigMapper.java b/backend/shared/domain-common/src/main/java/com/datamate/common/models/infrastructure/persistence/mapper/ModelConfigMapper.java new file mode 100644 index 000000000..970f3fa97 --- /dev/null +++ b/backend/shared/domain-common/src/main/java/com/datamate/common/models/infrastructure/persistence/mapper/ModelConfigMapper.java @@ -0,0 +1,15 @@ +package com.datamate.common.models.infrastructure.persistence.mapper; + +import com.baomidou.mybatisplus.core.mapper.BaseMapper; +import com.datamate.common.models.domain.entity.ModelConfig; +import org.apache.ibatis.annotations.Mapper; + +/** + * 模型配置映射器接口 + * + * @author dallas + * @since 2025-10-27 + */ +@Mapper +public interface ModelConfigMapper extends BaseMapper { +} \ No newline at end of file diff --git a/backend/shared/domain-common/src/main/java/com/datamate/common/models/interfaces/rest/ModelConfigController.java b/backend/shared/domain-common/src/main/java/com/datamate/common/models/interfaces/rest/ModelConfigController.java new file mode 100644 index 000000000..e05ddc512 --- /dev/null +++ b/backend/shared/domain-common/src/main/java/com/datamate/common/models/interfaces/rest/ModelConfigController.java @@ -0,0 +1,90 @@ +package com.datamate.common.models.interfaces.rest; + + +import com.datamate.common.interfaces.PagedResponse; +import com.datamate.common.models.application.ModelConfigApplicationService; +import com.datamate.common.models.domain.entity.ModelConfig; +import com.datamate.common.models.interfaces.rest.dto.CreateModelRequest; +import com.datamate.common.models.interfaces.rest.dto.QueryModelRequest; +import jakarta.validation.Valid; +import lombok.RequiredArgsConstructor; +import org.springframework.web.bind.annotation.*; + +import java.util.List; + +/** + * 模型配置控制器类 + * + * @author dallas + * @since 2025-10-27 + */ +@RestController +@RequestMapping("/api/models") +@RequiredArgsConstructor +public class ModelConfigController { + private final ModelConfigApplicationService modelConfigApplicationService; + + /** + * 获取厂商列表 + * + * @return 厂商列表 + */ + @GetMapping("/providers") + public List getProviders() { + return modelConfigApplicationService.getProviders(); + } + + /** + * 获取模型列表 + * + * @return 模型列表 + */ + @GetMapping("/list") + public PagedResponse getModels(@RequestParam QueryModelRequest queryModelRequest) { + return modelConfigApplicationService.getModels(queryModelRequest); + } + + /** + * 获取模型详情 + * + * @param modelId 模型 ID + * @return 模型详情 + */ + @GetMapping("/{modelId}") + public ModelConfig getModelDetail(@PathVariable String modelId) { + return modelConfigApplicationService.getModelDetail(modelId); + } + + /** + * 创建模型配置 + * + * @param createModelRequest 创建模型配置请求 + * @return 创建的模型配置 + */ + @PostMapping("/create") + public ModelConfig createModel(@RequestBody @Valid CreateModelRequest createModelRequest) { + return modelConfigApplicationService.createModel(createModelRequest); + } + + /** + * 更新模型配置 + * + * @param modelId 模型 ID + * @param updateModelRequest 更新模型配置请求 + * @return 更新后的模型配置 + */ + @PutMapping("/{modelId}") + public ModelConfig updateModel(@PathVariable String modelId, @RequestBody @Valid CreateModelRequest updateModelRequest) { + return modelConfigApplicationService.updateModel(modelId, updateModelRequest); + } + + /** + * 删除模型配置 + * + * @param modelId 模型 ID + */ + @DeleteMapping("/{modelId}") + public void deleteModel(@PathVariable String modelId) { + modelConfigApplicationService.deleteModel(modelId); + } +} diff --git a/backend/shared/domain-common/src/main/java/com/datamate/common/models/interfaces/rest/dto/CreateModelRequest.java b/backend/shared/domain-common/src/main/java/com/datamate/common/models/interfaces/rest/dto/CreateModelRequest.java new file mode 100644 index 000000000..54678e5f2 --- /dev/null +++ b/backend/shared/domain-common/src/main/java/com/datamate/common/models/interfaces/rest/dto/CreateModelRequest.java @@ -0,0 +1,46 @@ +package com.datamate.common.models.interfaces.rest.dto; + +import com.datamate.common.models.domain.entity.ModelType; +import jakarta.validation.constraints.NotEmpty; +import jakarta.validation.constraints.NotNull; +import lombok.Getter; +import lombok.Setter; + +/** + * 创建模型配置请求类 + * + * @author dallas + * @since 2025-10-27 + */ +@Setter +@Getter +public class CreateModelRequest { + /** + * 模型名称(如 qwen2) + */ + @NotEmpty(message = "模型名称不能为空") + private String modelName; + /** + * 模型提供商(如 Ollama、OpenAI、DeepSeek) + */ + @NotEmpty(message = "模型提供商不能为空") + private String provider; + /** + * API 基础地址 + */ + @NotEmpty(message = "API 基础地址不能为空") + private String baseUrl; + /** + * API 密钥(无密钥则为空) + */ + private String apiKey; + /** + * 模型类型(如 chat、embedding) + */ + @NotNull(message = "模型类型不能为空") + private ModelType type; + /** + * 是否启用:1-启用,0-禁用 + */ + private Boolean isEnabled; +} diff --git a/backend/shared/domain-common/src/main/java/com/datamate/common/models/interfaces/rest/dto/QueryModelRequest.java b/backend/shared/domain-common/src/main/java/com/datamate/common/models/interfaces/rest/dto/QueryModelRequest.java new file mode 100644 index 000000000..5c17c0f92 --- /dev/null +++ b/backend/shared/domain-common/src/main/java/com/datamate/common/models/interfaces/rest/dto/QueryModelRequest.java @@ -0,0 +1,27 @@ +package com.datamate.common.models.interfaces.rest.dto; + +import com.datamate.common.interfaces.PagingQuery; +import com.datamate.common.models.domain.entity.ModelType; +import lombok.Getter; +import lombok.Setter; + +/** + * 模型查询请求 DTO + * + * @author dallas + * @since 2025-10-27 + */ +@Getter +@Setter +public class QueryModelRequest extends PagingQuery { + /** + * 模型提供商(如 Ollama、OpenAI、DeepSeek) + */ + private String provider; + /** + * 模型类型(如 chat、embedding) + */ + private ModelType type; + + private Boolean isEnabled; +} diff --git a/backend/shared/security-common/pom.xml b/backend/shared/security-common/pom.xml index 686234bbd..5f3a8ca48 100644 --- a/backend/shared/security-common/pom.xml +++ b/backend/shared/security-common/pom.xml @@ -7,7 +7,7 @@ com.datamate - data-mate-platform + datamate 1.0.0-SNAPSHOT ../../pom.xml diff --git a/editions/community/config/application.yml b/editions/community/config/application.yml index d6b251de7..0d468f4b0 100644 --- a/editions/community/config/application.yml +++ b/editions/community/config/application.yml @@ -1,7 +1,7 @@ # 数据引擎平台 - 主应用配置 spring: application: - name: data-mate-platform + name: datamate # 暂时排除Spring Security自动配置(开发阶段使用) autoconfigure: diff --git a/editions/enterprise/config/application.yml b/editions/enterprise/config/application.yml index 671a27f75..9326ab629 100644 --- a/editions/enterprise/config/application.yml +++ b/editions/enterprise/config/application.yml @@ -1,7 +1,7 @@ # 数据引擎平台 - 主应用配置 spring: application: - name: data-mate-platform + name: datamate # 暂时排除Spring Security自动配置(开发阶段使用) autoconfigure: diff --git a/scripts/db/model-management-init.sql b/scripts/db/model-management-init.sql new file mode 100644 index 000000000..993ba5a35 --- /dev/null +++ b/scripts/db/model-management-init.sql @@ -0,0 +1,17 @@ +CREATE TABLE t_model_config +( + id VARCHAR(36) AUTO_INCREMENT PRIMARY KEY COMMENT '主键ID', + model_name VARCHAR(100) NOT NULL COMMENT '模型名称(如 qwen2)', + provider VARCHAR(50) NOT NULL COMMENT '模型提供商(如 Ollama、OpenAI、DeepSeek)', + base_url VARCHAR(255) NOT NULL COMMENT 'API 基础地址', + api_key VARCHAR(255) DEFAULT '' COMMENT 'API 密钥(无密钥则为空)', + type VARCHAR(50) NOT NULL COMMENT '模型类型(如 chat、embedding)', + is_enabled TINYINT DEFAULT 1 COMMENT '是否启用:1-启用,0-禁用', + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + created_by VARCHAR(255) COMMENT '创建者', + updated_by VARCHAR(255) COMMENT '更新者', + UNIQUE KEY uk_model_provider (model_name, provider) COMMENT '避免同一提供商下模型名称重复' +) ENGINE = InnoDB + DEFAULT CHARSET = utf8mb4 COMMENT ='模型配置表'; + From e03c1cab884bd46dbd7032864dcdd7094bfaa3da Mon Sep 17 00:00:00 2001 From: Dallas98 <990259227@qq.com> Date: Tue, 28 Oct 2025 14:06:11 +0800 Subject: [PATCH 2/5] refactor: simplify package scanning by using wildcard for mapper packages --- .../java/com/datamate/main/DataMatePlatformApplication.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/backend/services/main-application/src/main/java/com/datamate/main/DataMatePlatformApplication.java b/backend/services/main-application/src/main/java/com/datamate/main/DataMatePlatformApplication.java index 72679672e..3356df14b 100644 --- a/backend/services/main-application/src/main/java/com/datamate/main/DataMatePlatformApplication.java +++ b/backend/services/main-application/src/main/java/com/datamate/main/DataMatePlatformApplication.java @@ -27,8 +27,6 @@ "com.datamate.evaluation", "com.datamate.pipeline", "com.datamate.execution", - "com.datamate.rag", - "com.datamate.shared", "com.datamate.common" }) @MapperScan(basePackages = { @@ -36,7 +34,7 @@ "com.datamate.datamanagement.infrastructure.persistence.mapper", "com.datamate.operator.infrastructure.persistence.mapper", "com.datamate.cleaning.infrastructure.persistence.mapper", - "com.datamate.common.infrastructure.mapper" + "com.datamate.**.mapper" }) @EnableTransactionManagement @EnableAsync From 9cfafbe85221f7218f080417a81e9c2beb3236fd Mon Sep 17 00:00:00 2001 From: Dallas98 <990259227@qq.com> Date: Tue, 28 Oct 2025 16:04:40 +0800 Subject: [PATCH 3/5] feat: add model health check functionality and improve model configuration --- backend/pom.xml | 13 ++++++++ .../models/domain/entity/ModelConfig.java | 2 ++ .../infrastructure/client/ModelClient.java | 30 +++++++++++++++---- .../exception/ModelsErrorCode.java | 7 ++++- .../rest/ModelConfigController.java | 6 ++-- scripts/db/model-management-init.sql | 4 ++- 6 files changed, 52 insertions(+), 10 deletions(-) diff --git a/backend/pom.xml b/backend/pom.xml index 8a7a795d1..ef6da27c2 100644 --- a/backend/pom.xml +++ b/backend/pom.xml @@ -207,6 +207,19 @@ spring-boot-maven-plugin ${spring-boot.version} + + org.apache.maven.plugins + maven-compiler-plugin + 3.11.0 + + ${maven.compiler.source} + ${maven.compiler.target} + true + + -parameters + + + diff --git a/backend/shared/domain-common/src/main/java/com/datamate/common/models/domain/entity/ModelConfig.java b/backend/shared/domain-common/src/main/java/com/datamate/common/models/domain/entity/ModelConfig.java index 1eecd8eed..9cfbbff3d 100644 --- a/backend/shared/domain-common/src/main/java/com/datamate/common/models/domain/entity/ModelConfig.java +++ b/backend/shared/domain-common/src/main/java/com/datamate/common/models/domain/entity/ModelConfig.java @@ -5,6 +5,7 @@ import lombok.Builder; import lombok.Getter; import lombok.Setter; +import lombok.ToString; /** * 模型配置实体类 @@ -16,6 +17,7 @@ @Setter @TableName("t_model_config") @Builder +@ToString public class ModelConfig extends BaseEntity { /** * 模型名称(如 qwen2) diff --git a/backend/shared/domain-common/src/main/java/com/datamate/common/models/infrastructure/client/ModelClient.java b/backend/shared/domain-common/src/main/java/com/datamate/common/models/infrastructure/client/ModelClient.java index 5246c031f..26ddf73a8 100644 --- a/backend/shared/domain-common/src/main/java/com/datamate/common/models/infrastructure/client/ModelClient.java +++ b/backend/shared/domain-common/src/main/java/com/datamate/common/models/infrastructure/client/ModelClient.java @@ -1,13 +1,13 @@ package com.datamate.common.models.infrastructure.client; +import com.datamate.common.infrastructure.exception.BusinessException; import com.datamate.common.models.domain.entity.ModelConfig; -import com.datamate.common.models.domain.entity.ModelType; +import com.datamate.common.models.infrastructure.exception.ModelsErrorCode; import dev.langchain4j.model.chat.ChatModel; import dev.langchain4j.model.embedding.EmbeddingModel; import dev.langchain4j.model.openai.OpenAiChatModel; import dev.langchain4j.model.openai.OpenAiEmbeddingModel; - -import java.util.function.Consumer; +import lombok.extern.slf4j.Slf4j; /** * 模型客户端接口 @@ -15,6 +15,7 @@ * @author dallas * @since 2025-10-27 */ +@Slf4j public class ModelClient { public static T invokeModel(ModelConfig modelConfig, Class modelInterface) { return switch (modelConfig.getType()) { @@ -23,7 +24,7 @@ public static T invokeModel(ModelConfig modelConfig, Class modelInterface }; } - private static EmbeddingModel invokeEmbeddingModel(ModelConfig modelConfig) { + public static EmbeddingModel invokeEmbeddingModel(ModelConfig modelConfig) { return OpenAiEmbeddingModel.builder() .baseUrl(modelConfig.getBaseUrl()) .apiKey(modelConfig.getApiKey()) @@ -31,7 +32,7 @@ private static EmbeddingModel invokeEmbeddingModel(ModelConfig modelConfig) { .build(); } - private static ChatModel invokeChatModel(ModelConfig modelConfig) { + public static ChatModel invokeChatModel(ModelConfig modelConfig) { return OpenAiChatModel.builder() .baseUrl(modelConfig.getBaseUrl()) .apiKey(modelConfig.getApiKey()) @@ -40,5 +41,24 @@ private static ChatModel invokeChatModel(ModelConfig modelConfig) { } public static void checkHealth(ModelConfig modelConfig) { + try { + switch (modelConfig.getType()) { + case CHAT -> checkChatModelHealth(modelConfig); + case EMBEDDING -> checkEmbeddingModelHealth(modelConfig); + } + } catch (Exception e) { + log.error("Model health check failed for modelConfig: {}", modelConfig, e); + throw BusinessException.of(ModelsErrorCode.MODEL_HEALTH_CHECK_FAILED); + } + } + + private static void checkEmbeddingModelHealth(ModelConfig modelConfig) { + EmbeddingModel embeddingModel = invokeEmbeddingModel(modelConfig); + embeddingModel.embed("text"); + } + + private static void checkChatModelHealth(ModelConfig modelConfig) { + ChatModel chatModel = invokeChatModel(modelConfig); + chatModel.chat("hello"); } } diff --git a/backend/shared/domain-common/src/main/java/com/datamate/common/models/infrastructure/exception/ModelsErrorCode.java b/backend/shared/domain-common/src/main/java/com/datamate/common/models/infrastructure/exception/ModelsErrorCode.java index e0835838c..48501a1d5 100644 --- a/backend/shared/domain-common/src/main/java/com/datamate/common/models/infrastructure/exception/ModelsErrorCode.java +++ b/backend/shared/domain-common/src/main/java/com/datamate/common/models/infrastructure/exception/ModelsErrorCode.java @@ -20,7 +20,12 @@ public enum ModelsErrorCode implements ErrorCode { /** * 模型配置已存在 */ - MODEL_CONFIG_ALREADY_EXISTS("model.0002", "模型配置已存在"); + MODEL_CONFIG_ALREADY_EXISTS("model.0002", "模型配置已存在"), + + /** + * 模型健康检查失败 + */ + MODEL_HEALTH_CHECK_FAILED("model.0003", "模型健康检查失败"); private final String code; private final String message; diff --git a/backend/shared/domain-common/src/main/java/com/datamate/common/models/interfaces/rest/ModelConfigController.java b/backend/shared/domain-common/src/main/java/com/datamate/common/models/interfaces/rest/ModelConfigController.java index e05ddc512..20eacf876 100644 --- a/backend/shared/domain-common/src/main/java/com/datamate/common/models/interfaces/rest/ModelConfigController.java +++ b/backend/shared/domain-common/src/main/java/com/datamate/common/models/interfaces/rest/ModelConfigController.java @@ -19,7 +19,7 @@ * @since 2025-10-27 */ @RestController -@RequestMapping("/api/models") +@RequestMapping("/models") @RequiredArgsConstructor public class ModelConfigController { private final ModelConfigApplicationService modelConfigApplicationService; @@ -40,7 +40,7 @@ public List getProviders() { * @return 模型列表 */ @GetMapping("/list") - public PagedResponse getModels(@RequestParam QueryModelRequest queryModelRequest) { + public PagedResponse getModels(QueryModelRequest queryModelRequest) { return modelConfigApplicationService.getModels(queryModelRequest); } @@ -51,7 +51,7 @@ public PagedResponse getModels(@RequestParam QueryModelRequest quer * @return 模型详情 */ @GetMapping("/{modelId}") - public ModelConfig getModelDetail(@PathVariable String modelId) { + public ModelConfig getModelDetail(@PathVariable("modelId") String modelId) { return modelConfigApplicationService.getModelDetail(modelId); } diff --git a/scripts/db/model-management-init.sql b/scripts/db/model-management-init.sql index 993ba5a35..0ad254855 100644 --- a/scripts/db/model-management-init.sql +++ b/scripts/db/model-management-init.sql @@ -1,6 +1,8 @@ +USE datamate; + CREATE TABLE t_model_config ( - id VARCHAR(36) AUTO_INCREMENT PRIMARY KEY COMMENT '主键ID', + id VARCHAR(36) PRIMARY KEY COMMENT '主键ID', model_name VARCHAR(100) NOT NULL COMMENT '模型名称(如 qwen2)', provider VARCHAR(50) NOT NULL COMMENT '模型提供商(如 Ollama、OpenAI、DeepSeek)', base_url VARCHAR(255) NOT NULL COMMENT 'API 基础地址', From 684e82c7d419e51790eecc3e3d61d7ff4b81c1ff Mon Sep 17 00:00:00 2001 From: Dallas98 <990259227@qq.com> Date: Tue, 28 Oct 2025 17:26:53 +0800 Subject: [PATCH 4/5] feat: increase api_key length and enhance ModelConfig annotations --- .../datamate/common/models/domain/entity/ModelConfig.java | 7 +++---- scripts/db/model-management-init.sql | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/backend/shared/domain-common/src/main/java/com/datamate/common/models/domain/entity/ModelConfig.java b/backend/shared/domain-common/src/main/java/com/datamate/common/models/domain/entity/ModelConfig.java index 9cfbbff3d..1d4cc6236 100644 --- a/backend/shared/domain-common/src/main/java/com/datamate/common/models/domain/entity/ModelConfig.java +++ b/backend/shared/domain-common/src/main/java/com/datamate/common/models/domain/entity/ModelConfig.java @@ -2,10 +2,7 @@ import com.baomidou.mybatisplus.annotation.TableName; import com.datamate.common.domain.model.base.BaseEntity; -import lombok.Builder; -import lombok.Getter; -import lombok.Setter; -import lombok.ToString; +import lombok.*; /** * 模型配置实体类 @@ -18,6 +15,8 @@ @TableName("t_model_config") @Builder @ToString +@NoArgsConstructor +@AllArgsConstructor public class ModelConfig extends BaseEntity { /** * 模型名称(如 qwen2) diff --git a/scripts/db/model-management-init.sql b/scripts/db/model-management-init.sql index 0ad254855..26f70efff 100644 --- a/scripts/db/model-management-init.sql +++ b/scripts/db/model-management-init.sql @@ -6,7 +6,7 @@ CREATE TABLE t_model_config model_name VARCHAR(100) NOT NULL COMMENT '模型名称(如 qwen2)', provider VARCHAR(50) NOT NULL COMMENT '模型提供商(如 Ollama、OpenAI、DeepSeek)', base_url VARCHAR(255) NOT NULL COMMENT 'API 基础地址', - api_key VARCHAR(255) DEFAULT '' COMMENT 'API 密钥(无密钥则为空)', + api_key VARCHAR(512) DEFAULT '' COMMENT 'API 密钥(无密钥则为空)', type VARCHAR(50) NOT NULL COMMENT '模型类型(如 chat、embedding)', is_enabled TINYINT DEFAULT 1 COMMENT '是否启用:1-启用,0-禁用', created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', From 48966a8bafe74da2c019fd76cc875fc73f60990f Mon Sep 17 00:00:00 2001 From: Dallas98 <990259227@qq.com> Date: Thu, 30 Oct 2025 16:34:57 +0800 Subject: [PATCH 5/5] feature: Implement the basic knowledge generation function --- .editorconfig | 2 +- backend/services/main-application/pom.xml | 5 + .../main/DataMatePlatformApplication.java | 23 +-- backend/services/rag-indexer-service/pom.xml | 103 +++++++----- .../application/KnowledgeBaseService.java | 118 +++++++++++++ .../rag/indexer/domain/model/FileStatus.java | 26 +++ .../indexer/domain/model/KnowledgeBase.java | 37 +++++ .../rag/indexer/domain/model/RagChunk.java | 10 ++ .../rag/indexer/domain/model/RagFile.java | 47 ++++++ .../repository/KnowledgeBaseRepository.java | 23 +++ .../domain/repository/RagFileRepository.java | 18 ++ .../event/DataInsertedEvent.java | 12 ++ .../infrastructure/event/RagEtlService.java | 157 ++++++++++++++++++ .../impl/KnowledgeBaseRepositoryImpl.java | 31 ++++ .../impl/RagFileRepositoryImpl.java | 32 ++++ .../mapper/KnowledgeBaseMapper.java | 16 ++ .../persistence/mapper/RagFileMapper.java | 16 ++ .../interfaces/EmbeddingController.java | 8 + .../interfaces/KnowledgeBaseController.java | 137 +++++++++++++++ .../indexer/interfaces/dto/AddFilesReq.java | 23 +++ .../interfaces/dto/DeleteFilesReq.java | 13 ++ .../dto/KnowledgeBaseCreateReq.java | 41 +++++ .../interfaces/dto/KnowledgeBaseQueryReq.java | 24 +++ .../dto/KnowledgeBaseUpdateReq.java | 30 ++++ .../indexer/interfaces/dto/ProcessType.java | 33 ++++ .../indexer/interfaces/dto/RagFileReq.java | 13 ++ .../common/interfaces/PagedResponse.java | 2 + .../common/interfaces/PagingQuery.java | 21 ++- deployment/docker/datamate/docker-compose.yml | 1 + .../docker/deer-flow/docker-compose.yml | 2 +- deployment/docker/milvus/docker-compose.yml | 74 +++++++++ scripts/db/rag-management-init.sql | 29 ++++ 32 files changed, 1060 insertions(+), 67 deletions(-) create mode 100644 backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/application/KnowledgeBaseService.java create mode 100644 backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/FileStatus.java create mode 100644 backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/KnowledgeBase.java create mode 100644 backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/RagChunk.java create mode 100644 backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/RagFile.java create mode 100644 backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/repository/KnowledgeBaseRepository.java create mode 100644 backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/repository/RagFileRepository.java create mode 100644 backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/DataInsertedEvent.java create mode 100644 backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java create mode 100644 backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/impl/KnowledgeBaseRepositoryImpl.java create mode 100644 backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/impl/RagFileRepositoryImpl.java create mode 100644 backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/mapper/KnowledgeBaseMapper.java create mode 100644 backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/mapper/RagFileMapper.java create mode 100644 backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/EmbeddingController.java create mode 100644 backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/KnowledgeBaseController.java create mode 100644 backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/AddFilesReq.java create mode 100644 backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/DeleteFilesReq.java create mode 100644 backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseCreateReq.java create mode 100644 backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseQueryReq.java create mode 100644 backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseUpdateReq.java create mode 100644 backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/ProcessType.java create mode 100644 backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/RagFileReq.java create mode 100644 deployment/docker/milvus/docker-compose.yml create mode 100644 scripts/db/rag-management-init.sql diff --git a/.editorconfig b/.editorconfig index 65ede0753..01ab809a1 100644 --- a/.editorconfig +++ b/.editorconfig @@ -4,7 +4,7 @@ root = true charset = utf-8 end_of_line = lf indent_style = space -indent_size = 2 +indent_size = 4 insert_final_newline = true trim_trailing_whitespace = true diff --git a/backend/services/main-application/pom.xml b/backend/services/main-application/pom.xml index 342ced397..4f180decb 100644 --- a/backend/services/main-application/pom.xml +++ b/backend/services/main-application/pom.xml @@ -130,6 +130,10 @@ spring-boot-starter-test test + + org.springframework.boot + spring-boot-autoconfigure + @@ -141,6 +145,7 @@ ${maven.compiler.source} ${maven.compiler.target} + true -parameters diff --git a/backend/services/main-application/src/main/java/com/datamate/main/DataMatePlatformApplication.java b/backend/services/main-application/src/main/java/com/datamate/main/DataMatePlatformApplication.java index 3356df14b..3cf215c7d 100644 --- a/backend/services/main-application/src/main/java/com/datamate/main/DataMatePlatformApplication.java +++ b/backend/services/main-application/src/main/java/com/datamate/main/DataMatePlatformApplication.java @@ -16,31 +16,12 @@ * @version 1.0.0 */ @SpringBootApplication -@ComponentScan(basePackages = { - "com.datamate.main", - "com.datamate.datamanagement", - "com.datamate.collection", - "com.datamate.operator", - "com.datamate.cleaning", - "com.datamate.synthesis", - "com.datamate.annotation", - "com.datamate.evaluation", - "com.datamate.pipeline", - "com.datamate.execution", - "com.datamate.common" -}) -@MapperScan(basePackages = { - "com.datamate.collection.infrastructure.persistence.mapper", - "com.datamate.datamanagement.infrastructure.persistence.mapper", - "com.datamate.operator.infrastructure.persistence.mapper", - "com.datamate.cleaning.infrastructure.persistence.mapper", - "com.datamate.**.mapper" -}) +@ComponentScan(basePackages = {"com.datamate"}) +@MapperScan(basePackages = {"com.datamate.**.mapper"}) @EnableTransactionManagement @EnableAsync @EnableScheduling public class DataMatePlatformApplication { - public static void main(String[] args) { SpringApplication.run(DataMatePlatformApplication.class, args); } diff --git a/backend/services/rag-indexer-service/pom.xml b/backend/services/rag-indexer-service/pom.xml index 90aea6da1..454a3d9a6 100644 --- a/backend/services/rag-indexer-service/pom.xml +++ b/backend/services/rag-indexer-service/pom.xml @@ -16,24 +16,49 @@ RAG Indexer Service RAG文档索引服务 + + + + dev.langchain4j + langchain4j-bom + 1.8.0 + pom + import + + + + com.datamate domain-common ${project.version} + + com.datamate + data-management-service + 1.0.0-SNAPSHOT + org.springframework.boot spring-boot-starter-web - org.springframework.boot - spring-boot-starter-data-elasticsearch + mysql + mysql-connector-java + 8.0.33 - com.mysql - mysql-connector-j - ${mysql.version} + org.springdoc + springdoc-openapi-starter-webmvc-ui + + + org.openapitools + jackson-databind-nullable + + + jakarta.validation + jakarta.validation-api org.springframework.boot @@ -41,20 +66,47 @@ test - org.springframework.cloud - spring-cloud-starter-openfeign + dev.langchain4j + langchain4j-open-ai + 1.8.0 - org.springdoc - springdoc-openapi-starter-webmvc-ui + dev.langchain4j + langchain4j + 1.8.0 - org.openapitools - jackson-databind-nullable + dev.langchain4j + langchain4j-document-parser-apache-pdfbox - jakarta.validation - jakarta.validation-api + dev.langchain4j + langchain4j-document-parser-apache-tika + + + dev.langchain4j + langchain4j-document-parser-apache-poi + + + dev.langchain4j + langchain4j-document-parser-markdown + + + dev.langchain4j + langchain4j-document-transformer-jsoup + + + dev.langchain4j + langchain4j-milvus + + + + dev.langchain4j + langchain4j-embeddings-all-minilm-l6-v2 + + + org.testcontainers + milvus @@ -64,31 +116,6 @@ org.springframework.boot spring-boot-maven-plugin - diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/application/KnowledgeBaseService.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/application/KnowledgeBaseService.java new file mode 100644 index 000000000..989816de8 --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/application/KnowledgeBaseService.java @@ -0,0 +1,118 @@ +package com.datamate.rag.indexer.application; + +import com.baomidou.mybatisplus.core.metadata.IPage; +import com.baomidou.mybatisplus.extension.plugins.pagination.Page; +import com.datamate.rag.indexer.domain.model.FileStatus; +import com.datamate.rag.indexer.domain.model.KnowledgeBase; +import com.datamate.rag.indexer.domain.model.RagChunk; +import com.datamate.rag.indexer.domain.model.RagFile; +import com.datamate.rag.indexer.domain.repository.KnowledgeBaseRepository; +import com.datamate.rag.indexer.domain.repository.RagFileRepository; +import com.datamate.rag.indexer.infrastructure.event.DataInsertedEvent; +import com.datamate.common.infrastructure.exception.BusinessException; +import com.datamate.common.infrastructure.exception.KnowledgeBaseErrorCode; +import com.datamate.common.interfaces.PagedResponse; +import com.datamate.common.interfaces.PagingQuery; +import com.datamate.rag.indexer.interfaces.dto.*; +import lombok.RequiredArgsConstructor; +import org.springframework.beans.BeanUtils; +import org.springframework.context.ApplicationEventPublisher; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; +import org.springframework.util.StringUtils; + +import java.util.List; +import java.util.Optional; + +/** + * 知识库服务类 + * + * @author dallas + * @since 2025-10-24 + */ +@Service +@RequiredArgsConstructor +public class KnowledgeBaseService { + private final KnowledgeBaseRepository knowledgeBaseRepository; + private final RagFileRepository ragFileRepository; + private final ApplicationEventPublisher eventPublisher; + + + /** + * 创建知识库 + * + * @param request 知识库创建请求 + * @return 知识库 ID + */ + public String create(KnowledgeBaseCreateReq request) { + KnowledgeBase knowledgeBase = new KnowledgeBase(); + BeanUtils.copyProperties(request, knowledgeBase); + knowledgeBaseRepository.save(knowledgeBase); + return knowledgeBase.getId(); + } + + /** + * 更新知识库 + * + * @param knowledgeBaseId 知识库 ID + * @param request 知识库更新请求 + */ + public void update(String knowledgeBaseId, KnowledgeBaseUpdateReq request) { + KnowledgeBase knowledgeBase = Optional.ofNullable(knowledgeBaseRepository.getById(knowledgeBaseId)) + .orElseThrow(() -> BusinessException.of(KnowledgeBaseErrorCode.KNOWLEDGE_BASE_NOT_FOUND)); + if (StringUtils.hasText(request.getName())) { + knowledgeBase.setName(request.getName()); + } + if (StringUtils.hasText(request.getDescription())) { + knowledgeBase.setDescription(request.getDescription()); + } + knowledgeBaseRepository.updateById(knowledgeBase); + } + + public void delete(String knowledgeBaseId) { + knowledgeBaseRepository.removeById(knowledgeBaseId); + ragFileRepository.removeByKnowledgeBaseId(knowledgeBaseId); + // TODO: 删除知识库关联的所有文档 + } + + public KnowledgeBase getById(String knowledgeBaseId) { + return Optional.ofNullable(knowledgeBaseRepository.getById(knowledgeBaseId)) + .orElseThrow(() -> BusinessException.of(KnowledgeBaseErrorCode.KNOWLEDGE_BASE_NOT_FOUND)); + } + + public PagedResponse list(KnowledgeBaseQueryReq request) { + IPage page = new Page<>(request.getPage(), request.getSize()); + page = knowledgeBaseRepository.page(page, request); + return PagedResponse.of(page.getRecords(), page.getCurrent(), page.getTotal(), page.getPages()); + } + + @Transactional(rollbackFor = Exception.class) + public void addFiles(AddFilesReq request) { + KnowledgeBase knowledgeBase = Optional.ofNullable(knowledgeBaseRepository.getById(request.getKnowledgeBaseId())) + .orElseThrow(() -> BusinessException.of(KnowledgeBaseErrorCode.KNOWLEDGE_BASE_NOT_FOUND)); + List ragFiles = request.getFiles().stream().map(fileInfo -> { + RagFile ragFile = new RagFile(); + ragFile.setKnowledgeBaseId(knowledgeBase.getId()); + ragFile.setFileId(fileInfo.fileId()); + ragFile.setFileName(fileInfo.fileName()); + ragFile.setStatus(FileStatus.UNPROCESSED); + return ragFile; + }).toList(); + ragFileRepository.saveBatch(ragFiles, 100); + eventPublisher.publishEvent(new DataInsertedEvent(knowledgeBase.getId(), request.getProcessType())); + } + + public PagedResponse listFiles(String knowledgeBaseId, RagFileReq request) { + IPage page = new Page<>(request.getPage(), request.getSize()); + page = ragFileRepository.page(page); + return PagedResponse.of(page.getRecords(), page.getCurrent(), page.getTotal(), page.getPages()); + } + + public void deleteFile(String knowledgeBaseId, DeleteFilesReq request) { + } + + public PagedResponse getChunks(String knowledgeBaseId, String ragFileId, PagingQuery pagingQuery) { + IPage page = new Page<>(pagingQuery.getPage(), pagingQuery.getSize()); + return PagedResponse.of(page.getRecords(), page.getCurrent(), page.getTotal(), page.getPages()); + } +} \ No newline at end of file diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/FileStatus.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/FileStatus.java new file mode 100644 index 000000000..8f3132fda --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/FileStatus.java @@ -0,0 +1,26 @@ +package com.datamate.rag.indexer.domain.model; + +/** + * 文件状态枚举 + * + * @author dallas + * @since 2025-10-29 + */ +public enum FileStatus { + /** + * 未处理 + */ + UNPROCESSED, + /** + * 处理中 + */ + PROCESSING, + /** + * 已处理 + */ + PROCESSED, + /** + * 处理失败 + */ + PROCESS_FAILED +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/KnowledgeBase.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/KnowledgeBase.java new file mode 100644 index 000000000..4a571b3c3 --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/KnowledgeBase.java @@ -0,0 +1,37 @@ +package com.datamate.rag.indexer.domain.model; + +import com.baomidou.mybatisplus.annotation.TableName; +import com.datamate.common.domain.model.base.BaseEntity; +import lombok.Getter; +import lombok.Setter; + +/** + * 知识库实体类 + * + * @author dallas + * @since 2025-10-24 + */ +@Getter +@Setter +@TableName("t_rag_knowledge_base") +public class KnowledgeBase extends BaseEntity { + /** + * 知识库名称 + */ + private String name; + + /** + * 知识库描述 + */ + private String description; + + /** + * 嵌入模型 + */ + private String embeddingModel; + + /** + * 聊天模型 + */ + private String chatModel; +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/RagChunk.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/RagChunk.java new file mode 100644 index 000000000..6a6b8846b --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/RagChunk.java @@ -0,0 +1,10 @@ +package com.datamate.rag.indexer.domain.model; + +/** + * RAG 文档块实体类 + * + * @author dallas + * @since 2025-10-29 + */ +public class RagChunk { +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/RagFile.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/RagFile.java new file mode 100644 index 000000000..ec0445a16 --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/RagFile.java @@ -0,0 +1,47 @@ +package com.datamate.rag.indexer.domain.model; + + +import com.baomidou.mybatisplus.annotation.TableField; +import com.baomidou.mybatisplus.annotation.TableName; +import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler; +import com.datamate.common.domain.model.base.BaseEntity; +import lombok.Getter; +import lombok.Setter; + +import java.util.Map; + +/** + * Rag 文件实体类 + * + * @author dallas + * @since 2025-10-24 + */ +@Getter +@Setter +@TableName("t_rag_file") +public class RagFile extends BaseEntity { + /** + * 知识库ID + */ + private String knowledgeBaseId; + /** + * 文件名 + */ + private String fileName; + /** + * 文件ID + */ + private String fileId; + /** + * 分块数量 + */ + private Integer chunkCount; + + /** + * 元数据 + */ + @TableField(typeHandler = JacksonTypeHandler.class) + private Map metadata; + + private FileStatus status; +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/repository/KnowledgeBaseRepository.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/repository/KnowledgeBaseRepository.java new file mode 100644 index 000000000..273abc9be --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/repository/KnowledgeBaseRepository.java @@ -0,0 +1,23 @@ +package com.datamate.rag.indexer.domain.repository; + +import com.baomidou.mybatisplus.core.metadata.IPage; +import com.baomidou.mybatisplus.extension.repository.IRepository; +import com.datamate.rag.indexer.domain.model.KnowledgeBase; +import com.datamate.rag.indexer.interfaces.dto.KnowledgeBaseQueryReq; + +/** + * 知识库仓储接口 + * + * @author dallas + * @since 2025-10-24 + */ +public interface KnowledgeBaseRepository extends IRepository { + /** + * 分页查询知识库 + * + * @param page 分页信息 + * @param request 查询请求 + * @return 知识库分页结果 + */ + IPage page(IPage page, KnowledgeBaseQueryReq request); +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/repository/RagFileRepository.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/repository/RagFileRepository.java new file mode 100644 index 000000000..d55b2b129 --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/repository/RagFileRepository.java @@ -0,0 +1,18 @@ +package com.datamate.rag.indexer.domain.repository; + +import com.baomidou.mybatisplus.extension.repository.IRepository; +import com.datamate.rag.indexer.domain.model.RagFile; + +import java.util.List; + +/** + * 知识库文件仓储接口 + * + * @author dallas + * @since 2025-10-24 + */ +public interface RagFileRepository extends IRepository { + void removeByKnowledgeBaseId(String knowledgeBaseId); + + List findByKnowledgeBaseId(String knowledgeBaseId); +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/DataInsertedEvent.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/DataInsertedEvent.java new file mode 100644 index 000000000..417de1e2c --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/DataInsertedEvent.java @@ -0,0 +1,12 @@ +package com.datamate.rag.indexer.infrastructure.event; + +import com.datamate.rag.indexer.interfaces.dto.ProcessType; + +/** + * 数据插入事件 + * + * @author dallas + * @since 2025-10-29 + */ +public record DataInsertedEvent(String knowledgeBaseId, ProcessType processType) { +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java new file mode 100644 index 000000000..5c9979e59 --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java @@ -0,0 +1,157 @@ +package com.datamate.rag.indexer.infrastructure.event; + +import com.datamate.common.models.domain.entity.ModelConfig; +import com.datamate.common.models.domain.repository.ModelConfigRepository; +import com.datamate.common.models.infrastructure.client.ModelClient; +import com.datamate.rag.indexer.domain.model.FileStatus; +import com.datamate.rag.indexer.domain.model.RagFile; +import com.datamate.rag.indexer.domain.repository.RagFileRepository; +import com.datamate.rag.indexer.interfaces.dto.ProcessType; +import com.datamate.datamanagement.domain.model.dataset.DatasetFile; +import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository; +import dev.langchain4j.data.document.Document; +import dev.langchain4j.data.document.DocumentParser; +import dev.langchain4j.data.document.DocumentSplitter; +import dev.langchain4j.data.document.loader.FileSystemDocumentLoader; +import dev.langchain4j.data.document.parser.TextDocumentParser; +import dev.langchain4j.data.document.parser.apache.pdfbox.ApachePdfBoxDocumentParser; +import dev.langchain4j.data.document.parser.apache.poi.ApachePoiDocumentParser; +import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser; +import dev.langchain4j.data.document.parser.markdown.MarkdownDocumentParser; +import dev.langchain4j.data.document.splitter.*; +import dev.langchain4j.data.document.transformer.jsoup.HtmlToTextDocumentTransformer; +import dev.langchain4j.data.embedding.Embedding; +import dev.langchain4j.data.segment.TextSegment; +import dev.langchain4j.model.embedding.EmbeddingModel; +import dev.langchain4j.model.output.Response; +import dev.langchain4j.store.embedding.EmbeddingStore; +import dev.langchain4j.store.embedding.milvus.MilvusEmbeddingStore; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.jetbrains.annotations.NotNull; +import org.springframework.scheduling.annotation.Async; +import org.springframework.stereotype.Service; +import org.springframework.transaction.event.TransactionPhase; +import org.springframework.transaction.event.TransactionalEventListener; + +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Semaphore; + +/** + * RAG ETL服务 + * + * @author dallas + * @since 2025-10-29 + */ +@Slf4j +@Service +@RequiredArgsConstructor +public class RagEtlService { + private static final Semaphore SEMAPHORE = new Semaphore(10); + + private final RagFileRepository ragFileRepository; + + private final DatasetFileRepository datasetFileRepository; + + private final ModelConfigRepository modelConfigRepository; + + private final ExecutorService executor = Executors.newVirtualThreadPerTaskExecutor(); + + @Async + @TransactionalEventListener(phase = TransactionPhase.AFTER_COMMIT) + public void processAfterCommit(DataInsertedEvent event) { + // 执行 RAG 处理流水线 + List ragFiles = ragFileRepository.findByKnowledgeBaseId(event.knowledgeBaseId()); + + ragFiles.forEach(ragFile -> { + try { + SEMAPHORE.acquire(); + executor.submit(() -> { + try { + // 执行 RAG 处理流水线 + ragFile.setStatus(FileStatus.PROCESSING); + ragFileRepository.updateById(ragFile); + processRagFile(ragFile, event.processType()); + // 更新文件状态为已处理 + ragFile.setStatus(FileStatus.PROCESSED); + ragFileRepository.updateById(ragFile); + } catch (Exception e) { + // 处理异常 + ragFile.setStatus(FileStatus.PROCESS_FAILED); + ragFileRepository.updateById(ragFile); + } finally { + SEMAPHORE.release(); + } + }); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + ); + } + + private void processRagFile(RagFile ragFile, ProcessType processType) { + DatasetFile file = datasetFileRepository.getById(ragFile.getFileId()); + // 使用文档解析器解析文档 + DocumentParser parser = documentParser(file.getFileType()); + // 从文件系统读取文档 + Document document = FileSystemDocumentLoader.loadDocument(file.getFilePath(), parser); + // 对html文档进行转换 + if (Arrays.asList("html", "htm").contains(file.getFileType().toLowerCase())) { + document= new HtmlToTextDocumentTransformer().transform(document); + } + // 使用文档分块器对文档进行分块 + DocumentSplitter splitter = documentSplitter(processType); + List split = splitter.split(document); + + // 更新分块数量 + ragFile.setChunkCount(split.size()); + ragFileRepository.updateById(ragFile); + + // 调用模型客户端获取嵌入模型 + ModelConfig model = modelConfigRepository.getById("1"); + EmbeddingModel embeddingModel = ModelClient.invokeEmbeddingModel(model); + // 调用嵌入模型获取嵌入向量 + Response<@NotNull List> response = embeddingModel.embedAll(split); + // 存储嵌入向量到 Milvus + embeddingStore().addAll(response.content(), split); + } + + /** + * 根据文件类型返回对应的文档解析器 + * + * @param fileType 文件类型 + * @return 文档解析器 + */ + public DocumentParser documentParser(String fileType) { + fileType = fileType.toLowerCase(); + return switch (fileType) { + case "txt", "html", "htm" -> new TextDocumentParser(); + case "md" -> new MarkdownDocumentParser(); + case "pdf" -> new ApachePdfBoxDocumentParser(); + case "doc", "docx", "xls", "xlsx", "ppt", "pptx" -> new ApachePoiDocumentParser(); + default -> new ApacheTikaDocumentParser(); + }; + } + + public DocumentSplitter documentSplitter(ProcessType processType) { + return switch (processType) { + case CHAPTER_CHUNK -> new DocumentByParagraphSplitter(1000, 100); + case PARAGRAPH_CHUNK -> new DocumentByLineSplitter(1000, 100); + case LENGTH_CHUNK -> new DocumentBySentenceSplitter(1000, 100); + case CUSTOM_SEPARATOR_CHUNK -> new DocumentByWordSplitter(1000, 100); + case DEFAULT_CHUNK -> new DocumentByRegexSplitter("\\n\\n", "",1000, 100); + }; + } + + public EmbeddingStore embeddingStore() { + return MilvusEmbeddingStore.builder() + .uri("http://milvus:19530") + .collectionName("rag_embeddings") + .dimension(1536) + .build(); + } +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/impl/KnowledgeBaseRepositoryImpl.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/impl/KnowledgeBaseRepositoryImpl.java new file mode 100644 index 000000000..c186bac89 --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/impl/KnowledgeBaseRepositoryImpl.java @@ -0,0 +1,31 @@ +package com.datamate.rag.indexer.infrastructure.persistence.impl; + +import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; +import com.baomidou.mybatisplus.core.metadata.IPage; +import com.baomidou.mybatisplus.extension.repository.CrudRepository; +import com.datamate.rag.indexer.domain.model.KnowledgeBase; +import com.datamate.rag.indexer.domain.repository.KnowledgeBaseRepository; +import com.datamate.rag.indexer.infrastructure.persistence.mapper.KnowledgeBaseMapper; +import com.datamate.rag.indexer.interfaces.dto.KnowledgeBaseQueryReq; +import org.springframework.stereotype.Repository; +import org.springframework.util.StringUtils; + +/** + * 知识库仓储实现类 + * + * @author dallas + * @since 2025-10-24 + */ +@Repository +public class KnowledgeBaseRepositoryImpl extends CrudRepository implements KnowledgeBaseRepository { + + @Override + public IPage page(IPage page, KnowledgeBaseQueryReq request) { + return this.page(page, new LambdaQueryWrapper() + .like(StringUtils.hasText(request.getName()), KnowledgeBase::getName, request.getName()) + .like(StringUtils.hasText(request.getDescription()), KnowledgeBase::getDescription, request.getDescription()) + .like(StringUtils.hasText(request.getCreatedBy()), KnowledgeBase::getCreatedBy, request.getCreatedBy()) + .like(StringUtils.hasText(request.getUpdatedBy()), KnowledgeBase::getUpdatedBy, request.getUpdatedBy()) + .orderByDesc(KnowledgeBase::getCreatedAt)); + } +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/impl/RagFileRepositoryImpl.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/impl/RagFileRepositoryImpl.java new file mode 100644 index 000000000..0e0c0986c --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/impl/RagFileRepositoryImpl.java @@ -0,0 +1,32 @@ +package com.datamate.rag.indexer.infrastructure.persistence.impl; + +import com.baomidou.mybatisplus.extension.repository.CrudRepository; +import com.datamate.rag.indexer.domain.model.FileStatus; +import com.datamate.rag.indexer.domain.model.RagFile; +import com.datamate.rag.indexer.domain.repository.RagFileRepository; +import com.datamate.rag.indexer.infrastructure.persistence.mapper.RagFileMapper; +import org.springframework.stereotype.Repository; + +import java.util.List; + +/** + * 知识库文件仓储实现类 + * + * @author dallas + * @since 2025-10-24 + */ +@Repository +public class RagFileRepositoryImpl extends CrudRepository implements RagFileRepository { + @Override + public void removeByKnowledgeBaseId(String knowledgeBaseId) { + lambdaUpdate().eq(RagFile::getKnowledgeBaseId, knowledgeBaseId).remove(); + } + + @Override + public List findByKnowledgeBaseId(String knowledgeBaseId) { + return lambdaQuery() + .eq(RagFile::getKnowledgeBaseId, knowledgeBaseId) + .in(RagFile::getStatus, FileStatus.UNPROCESSED, FileStatus.PROCESS_FAILED) + .list(); + } +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/mapper/KnowledgeBaseMapper.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/mapper/KnowledgeBaseMapper.java new file mode 100644 index 000000000..cf1525f4d --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/mapper/KnowledgeBaseMapper.java @@ -0,0 +1,16 @@ +package com.datamate.rag.indexer.infrastructure.persistence.mapper; + + +import com.baomidou.mybatisplus.core.mapper.BaseMapper; +import com.datamate.rag.indexer.domain.model.KnowledgeBase; +import org.apache.ibatis.annotations.Mapper; + +/** + * 知识库映射器接口 + * + * @author dallas + * @since 2025-10-24 + */ +@Mapper +public interface KnowledgeBaseMapper extends BaseMapper { +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/mapper/RagFileMapper.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/mapper/RagFileMapper.java new file mode 100644 index 000000000..e0f233f5a --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/mapper/RagFileMapper.java @@ -0,0 +1,16 @@ +package com.datamate.rag.indexer.infrastructure.persistence.mapper; + + +import com.baomidou.mybatisplus.core.mapper.BaseMapper; +import com.datamate.rag.indexer.domain.model.RagFile; +import org.apache.ibatis.annotations.Mapper; + +/** + * RAG文件映射器接口 + * + * @author dallas + * @since 2025-10-24 + */ +@Mapper +public interface RagFileMapper extends BaseMapper { +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/EmbeddingController.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/EmbeddingController.java new file mode 100644 index 000000000..06963dc10 --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/EmbeddingController.java @@ -0,0 +1,8 @@ +package com.datamate.rag.indexer.interfaces; + +import org.springframework.web.bind.annotation.RestController; + +@RestController +public class EmbeddingController { + +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/KnowledgeBaseController.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/KnowledgeBaseController.java new file mode 100644 index 000000000..d0ed0999e --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/KnowledgeBaseController.java @@ -0,0 +1,137 @@ +package com.datamate.rag.indexer.interfaces; + +import com.datamate.rag.indexer.application.KnowledgeBaseService; +import com.datamate.rag.indexer.domain.model.KnowledgeBase; +import com.datamate.rag.indexer.domain.model.RagChunk; +import com.datamate.rag.indexer.domain.model.RagFile; +import com.datamate.common.infrastructure.common.Response; +import com.datamate.common.interfaces.PagedResponse; +import com.datamate.common.interfaces.PagingQuery; +import com.datamate.rag.indexer.interfaces.dto.*; +import lombok.RequiredArgsConstructor; +import org.springframework.web.bind.annotation.*; + +import javax.validation.Valid; + +/** + * 知识库控制器 + * + * @author dallas + * @since 2025-09-30 + */ +@RestController +@RequiredArgsConstructor +@RequestMapping("/v1/knowledge-base") +public class KnowledgeBaseController { + private final KnowledgeBaseService knowledgeBaseService; + + @GetMapping(path = "/test1") + public String test() { + return "test1"; + } + + /** + * 创建知识库 + * + * @param request 知识库创建请求 + * @return 知识库 ID + */ + @PostMapping("/create") + public String create(@RequestBody @Valid KnowledgeBaseCreateReq request) { + return knowledgeBaseService.create(request); + } + + /** + * 更新知识库 + * + * @param knowledgeBaseId 知识库 ID + * @param request 知识库更新请求 + */ + @PutMapping("/{knowledgeBaseId}") + public void update(@PathVariable("knowledgeBaseId") String knowledgeBaseId, + @RequestBody @Valid KnowledgeBaseUpdateReq request) { + knowledgeBaseService.update(knowledgeBaseId, request); + } + + /** + * 删除知识库 + * + * @param knowledgeBaseId 知识库 ID + */ + @DeleteMapping("/{knowledgeBaseId}") + public void delete(@PathVariable("knowledgeBaseId") String knowledgeBaseId) { + knowledgeBaseService.delete(knowledgeBaseId); + } + + /** + * 获取知识库 + * + * @param knowledgeBaseId 知识库 ID + * @return 知识库 + */ + @GetMapping("/{knowledgeBaseId}") + public KnowledgeBase get(@PathVariable("knowledgeBaseId") String knowledgeBaseId) { + return knowledgeBaseService.getById(knowledgeBaseId); + } + + /** + * 获取知识库列表 + * + * @return 知识库列表 + */ + @PostMapping("/list") + public PagedResponse list(@RequestBody @Valid KnowledgeBaseQueryReq request) { + return knowledgeBaseService.list(request); + } + + /** + * 添加文件到知识库 + * + * @param knowledgeBaseId 知识库 ID + * @param request 添加文件请求 + */ + @PostMapping("/{knowledgeBaseId}/files") + public void addFiles(@PathVariable("knowledgeBaseId") String knowledgeBaseId, + @RequestBody @Valid AddFilesReq request) { + request.setKnowledgeBaseId(knowledgeBaseId); + knowledgeBaseService.addFiles(request); + } + + /** + * 获取知识库文件列表 + * + * @param knowledgeBaseId 知识库 ID + * @return 知识库文件列表 + */ + @GetMapping("/{knowledgeBaseId}/files") + public PagedResponse listFiles(@PathVariable("knowledgeBaseId") String knowledgeBaseId, + @RequestBody @Valid RagFileReq request) { + return knowledgeBaseService.listFiles(knowledgeBaseId, request); + } + + /** + * 删除知识库文件 + * + * @param knowledgeBaseId 知识库 ID + * @param request 删除文件请求 + */ + @DeleteMapping("/{knowledgeBaseId}/files") + public void deleteFile(@PathVariable("knowledgeBaseId") String knowledgeBaseId, + @RequestBody DeleteFilesReq request) { + knowledgeBaseService.deleteFile(knowledgeBaseId, request); + } + + /** + * 知识库文件详情 + * + * @param knowledgeBaseId 知识库 ID + * @param ragFileId 文件 ID + * @return 文件详情 + */ + @GetMapping("/{knowledgeBaseId}/files/{ragFileId}") + public PagedResponse getChunks(@PathVariable("knowledgeBaseId") String knowledgeBaseId, + @PathVariable("ragFileId") String ragFileId, + PagingQuery pagingQuery) { + return knowledgeBaseService.getChunks(knowledgeBaseId, ragFileId, pagingQuery); + } +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/AddFilesReq.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/AddFilesReq.java new file mode 100644 index 000000000..52568a2ae --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/AddFilesReq.java @@ -0,0 +1,23 @@ +package com.datamate.rag.indexer.interfaces.dto; + +import lombok.Getter; +import lombok.Setter; + +import java.util.List; + +/** + * 添加文件请求 + * + * @author dallas + * @since 2025-10-29 + */ +@Getter +@Setter +public class AddFilesReq { + private String knowledgeBaseId; + private ProcessType processType; + private List files; + + public record FileInfo(String fileId, String fileName) { + } +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/DeleteFilesReq.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/DeleteFilesReq.java new file mode 100644 index 000000000..0837d4e55 --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/DeleteFilesReq.java @@ -0,0 +1,13 @@ +package com.datamate.rag.indexer.interfaces.dto; + +import java.util.List; + +/** + * 删除文件请求 + * + * @author dallas + * @since 2025-10-29 + */ +public class DeleteFilesReq { + private List fileIds; +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseCreateReq.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseCreateReq.java new file mode 100644 index 000000000..c3df78c17 --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseCreateReq.java @@ -0,0 +1,41 @@ +package com.datamate.rag.indexer.interfaces.dto; + +import jakarta.validation.constraints.NotEmpty; +import jakarta.validation.constraints.Pattern; +import jakarta.validation.constraints.Size; +import lombok.Getter; +import lombok.Setter; + +/** + * 知识库创建请求 + * + * @author dallas + * @since 2025-10-24 + */ +@Setter +@Getter +public class KnowledgeBaseCreateReq { + /** + * 知识库名称 + */ + @NotEmpty(message = "知识库名称不能为空") + @Size(min = 1, max = 255, message = "知识库名称长度必须在 1 到 255 之间") + @Pattern(regexp = "^[a-zA-Z0-9_]+$", message = "知识库名称只能包含字母、数字和下划线") + private String name; + /** + * 知识库描述 + */ + @Size(min = 1, max = 512, message = "知识库描述长度必须在 1 到 512 之间") + private String description; + + /** + * 嵌入模型 + */ + @NotEmpty(message = "嵌入模型不能为空") + private String embeddingModel; + + /** + * 聊天模型 + */ + private String chatModel; +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseQueryReq.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseQueryReq.java new file mode 100644 index 000000000..a2e71476d --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseQueryReq.java @@ -0,0 +1,24 @@ +package com.datamate.rag.indexer.interfaces.dto; + +import com.datamate.common.interfaces.PagingQuery; +import lombok.Getter; +import lombok.Setter; + +import java.time.LocalDateTime; + +/** + * + * + * @author dallas + * @since 2025-10-29 + */ +@Setter +@Getter +public class KnowledgeBaseQueryReq extends PagingQuery { + private String name; + private String description; + private LocalDateTime createdAt; + private LocalDateTime updatedAt; + private String createdBy; + private String updatedBy; +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseUpdateReq.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseUpdateReq.java new file mode 100644 index 000000000..a1156543c --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseUpdateReq.java @@ -0,0 +1,30 @@ +package com.datamate.rag.indexer.interfaces.dto; + +import jakarta.validation.constraints.NotEmpty; +import jakarta.validation.constraints.Pattern; +import jakarta.validation.constraints.Size; +import lombok.Getter; +import lombok.Setter; + +/** + * 知识库更新请求 + * + * @author dallas + * @since 2025-10-24 + */ +@Getter +@Setter +public class KnowledgeBaseUpdateReq { + /** + * 知识库名称 + */ + @NotEmpty(message = "知识库名称不能为空") + @Size(min = 1, max = 255, message = "知识库名称长度必须在 1 到 255 之间") + @Pattern(regexp = "^[a-zA-Z0-9_]+$", message = "知识库名称只能包含字母、数字和下划线") + private String name; + /** + * 知识库描述 + */ + @Size(min = 1, max = 512, message = "知识库描述长度必须在 1 到 512 之间") + private String description; +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/ProcessType.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/ProcessType.java new file mode 100644 index 000000000..7301163c3 --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/ProcessType.java @@ -0,0 +1,33 @@ +package com.datamate.rag.indexer.interfaces.dto; + +/** + * 分块处理类型 + * + * @author dallas + * @since 2025-10-29 + */ +public enum ProcessType { + /** + * 章节分块 + */ + CHAPTER_CHUNK, + /** + * 段落分块 + */ + PARAGRAPH_CHUNK, + + /** + * 按长度分块 + */ + LENGTH_CHUNK, + + /** + * 自定义分割符分块 + */ + CUSTOM_SEPARATOR_CHUNK, + + /** + * 默认分块 + */ + DEFAULT_CHUNK, +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/RagFileReq.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/RagFileReq.java new file mode 100644 index 000000000..a26f1b055 --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/RagFileReq.java @@ -0,0 +1,13 @@ +package com.datamate.rag.indexer.interfaces.dto; + +import com.datamate.common.interfaces.PagingQuery; + +/** + * RAG 文件请求 + * + * @author dallas + * @since 2025-10-29 + */ +public class RagFileReq extends PagingQuery { + private String fileName; +} diff --git a/backend/shared/domain-common/src/main/java/com/datamate/common/interfaces/PagedResponse.java b/backend/shared/domain-common/src/main/java/com/datamate/common/interfaces/PagedResponse.java index 7d8732889..4a9647ac6 100644 --- a/backend/shared/domain-common/src/main/java/com/datamate/common/interfaces/PagedResponse.java +++ b/backend/shared/domain-common/src/main/java/com/datamate/common/interfaces/PagedResponse.java @@ -12,7 +12,9 @@ @NoArgsConstructor @AllArgsConstructor public class PagedResponse { + // 当前页码(从 0 开始) private long page; + // 每页数量 private long size; private long totalElements; private long totalPages; diff --git a/backend/shared/domain-common/src/main/java/com/datamate/common/interfaces/PagingQuery.java b/backend/shared/domain-common/src/main/java/com/datamate/common/interfaces/PagingQuery.java index 798075f93..5c646dd89 100644 --- a/backend/shared/domain-common/src/main/java/com/datamate/common/interfaces/PagingQuery.java +++ b/backend/shared/domain-common/src/main/java/com/datamate/common/interfaces/PagingQuery.java @@ -1,14 +1,8 @@ package com.datamate.common.interfaces; -import lombok.AllArgsConstructor; import lombok.Getter; -import lombok.NoArgsConstructor; -import lombok.Setter; @Getter -@Setter -@NoArgsConstructor -@AllArgsConstructor public class PagingQuery { /** * 页码,从0开始 @@ -19,4 +13,19 @@ public class PagingQuery { * 每页大小 */ private Integer size = 20; + + public void setPage(Integer page) { + if (page == null || page < 0) { + this.page = 0; + } else { + this.page = page; + } + } + public void setSize(Integer size) { + if (size == null || size <= 0) { + this.size = 20; + } else { + this.size = size; + } + } } diff --git a/deployment/docker/datamate/docker-compose.yml b/deployment/docker/datamate/docker-compose.yml index 623fa7d2b..61cfa50f6 100644 --- a/deployment/docker/datamate/docker-compose.yml +++ b/deployment/docker/datamate/docker-compose.yml @@ -91,3 +91,4 @@ volumes: networks: datamate: driver: bridge + name: datamate-network diff --git a/deployment/docker/deer-flow/docker-compose.yml b/deployment/docker/deer-flow/docker-compose.yml index 00069fa35..03853e3e5 100644 --- a/deployment/docker/deer-flow/docker-compose.yml +++ b/deployment/docker/deer-flow/docker-compose.yml @@ -24,5 +24,5 @@ services: networks: datamate: driver: bridge - name: datamate_datamate + name: datamate-network external: true diff --git a/deployment/docker/milvus/docker-compose.yml b/deployment/docker/milvus/docker-compose.yml new file mode 100644 index 000000000..639606806 --- /dev/null +++ b/deployment/docker/milvus/docker-compose.yml @@ -0,0 +1,74 @@ +version: '3.5' + +services: + etcd: + container_name: milvus-etcd + image: quay.io/coreos/etcd:v3.5.18 + environment: + - ETCD_AUTO_COMPACTION_MODE=revision + - ETCD_AUTO_COMPACTION_RETENTION=1000 + - ETCD_QUOTA_BACKEND_BYTES=4294967296 + - ETCD_SNAPSHOT_COUNT=50000 + volumes: + - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd + command: etcd -advertise-client-urls=http://etcd:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd + networks: + - datamate + healthcheck: + test: ["CMD", "etcdctl", "endpoint", "health"] + interval: 30s + timeout: 20s + retries: 3 + + minio: + container_name: milvus-minio + image: minio/minio:RELEASE.2024-12-18T13-15-44Z + environment: + MINIO_ACCESS_KEY: minioadmin + MINIO_SECRET_KEY: minioadmin + ports: + - "9001:9001" + - "9000:9000" + volumes: + - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data + command: minio server /minio_data --console-address ":9001" + networks: + - datamate + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] + interval: 30s + timeout: 20s + retries: 3 + + standalone: + container_name: milvus-standalone + image: milvusdb/milvus:v2.6.2 + command: ["milvus", "run", "standalone"] + security_opt: + - seccomp:unconfined + environment: + ETCD_ENDPOINTS: etcd:2379 + MINIO_ADDRESS: minio:9000 + MQ_TYPE: woodpecker + volumes: + - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus + networks: + - datamate + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"] + interval: 30s + start_period: 90s + timeout: 20s + retries: 3 + ports: + - "19530:19530" + - "9091:9091" + depends_on: + - "etcd" + - "minio" + +networks: + datamate: + name: datamate-network + external: true + driver: bridge diff --git a/scripts/db/rag-management-init.sql b/scripts/db/rag-management-init.sql new file mode 100644 index 000000000..0e429aef5 --- /dev/null +++ b/scripts/db/rag-management-init.sql @@ -0,0 +1,29 @@ +USE datamate; + +create table if not exists t_rag_knowledge_base +( + id VARCHAR(36) PRIMARY KEY COMMENT 'UUID', + name VARCHAR(255) NOT NULL COMMENT '知识库名称', + description VARCHAR(512) NULL COMMENT '知识库描述', + embedding_model VARCHAR(255) NOT NULL COMMENT '嵌入模型', + chat_model VARCHAR(255) NULL COMMENT '聊天模型', + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + created_by VARCHAR(255) COMMENT '创建者', + updated_by VARCHAR(255) COMMENT '更新者' +) comment '知识库表'; + +create table if not exists t_rag_file +( + id VARCHAR(36) PRIMARY KEY COMMENT 'UUID', + knowledge_base_id VARCHAR(36) NOT NULL COMMENT '知识库ID', + file_name VARCHAR(255) NOT NULL COMMENT '文件名', + file_id VARCHAR(255) NOT NULL COMMENT '文件ID', + chunk_count INT COMMENT '切片数', + metadata JSON COMMENT '元数据', + status VARCHAR(50) COMMENT '文件状态', + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + created_by VARCHAR(255) COMMENT '创建者', + updated_by VARCHAR(255) COMMENT '更新者' +) comment '知识库切片表';