diff --git a/EdgeCraftRAG/Dockerfile.server b/EdgeCraftRAG/Dockerfile.server index f910252616..32701f6a32 100644 --- a/EdgeCraftRAG/Dockerfile.server +++ b/EdgeCraftRAG/Dockerfile.server @@ -6,7 +6,7 @@ RUN apt-get remove -y libze-intel-gpu1 libigc1 libigdfcl1 libze-dev || true; \ apt-get update; \ apt-get install -y curl RUN curl -sL 'https://keyserver.ubuntu.com/pks/lookup?fingerprint=on&op=get&search=0x0C0E6AF955CE463C03FC51574D098D70AFBE5E1F' | tee /etc/apt/trusted.gpg.d/driver.asc -RUN echo -e "Types: deb\nURIs: https://ppa.launchpadcontent.net/kobuk-team/intel-graphics/ubuntu/\nSuites: plucky\nComponents: main\nSigned-By: /etc/apt/trusted.gpg.d/driver.asc" > /etc/apt/sources.list.d/driver.sources +RUN echo -e "Types: deb\nURIs: https://ppa.launchpadcontent.net/kobuk-team/intel-graphics/ubuntu/\nSuites: questing\nComponents: main\nSigned-By: /etc/apt/trusted.gpg.d/driver.asc" > /etc/apt/sources.list.d/driver.sources RUN apt-get update && apt-get install -y libze-intel-gpu1 libze1 intel-metrics-discovery intel-opencl-icd clinfo intel-gsc && apt-get install -y libze-intel-gpu1 libze1 intel-metrics-discovery intel-opencl-icd clinfo intel-gsc && apt-get install -y libze-dev intel-ocloc libze-intel-gpu-raytracing RUN useradd -m -s /bin/bash user && \ @@ -18,11 +18,13 @@ RUN mkdir /templates && \ COPY ./edgecraftrag/prompt_template/default_prompt.txt /templates/ RUN chown -R user /templates/default_prompt.txt -COPY ./edgecraftrag /home/user/edgecraftrag - -RUN mkdir -p /home/user/ui_cache +RUN mkdir -p /home/user/ui_cache /home/user/edgecraftrag ENV UI_UPLOAD_PATH=/home/user/ui_cache +# Copy requirements first so pip install is cached independently from source changes +COPY ./edgecraftrag/requirements.txt /home/user/edgecraftrag/requirements.txt +RUN chown -R user /home/user/edgecraftrag + USER user WORKDIR /home/user/edgecraftrag @@ -37,4 +39,7 @@ ENV PYTHONPATH="$PYTHONPATH:/home/user/genai/tools/llm_bench" RUN python3 -m nltk.downloader -d /home/user/nltk_data punkt_tab averaged_perceptron_tagger_eng +# Copy the full source last — changes here no longer bust the pip cache layers above +COPY ./edgecraftrag /home/user/edgecraftrag + ENTRYPOINT ["python3", "-m", "edgecraftrag.server"] diff --git a/EdgeCraftRAG/README.md b/EdgeCraftRAG/README.md index 0f225c9b91..cef19d1749 100644 --- a/EdgeCraftRAG/README.md +++ b/EdgeCraftRAG/README.md @@ -7,10 +7,9 @@ quality and performance. ## What's New -1. Support Agent component and enable deep_search agent -2. Optimize pipeline execution performance with asynchronous api -3. Support session list display in UI -4. Support vllm-based embedding service +1. Support decouple operation for pipeline and knowledge base +2. Optimize Agentic workflow user experience +3. User Guide enhancement ## Table of contents diff --git a/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_01.jpg b/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_01.jpg new file mode 100644 index 0000000000..99c76a9747 Binary files /dev/null and b/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_01.jpg differ diff --git a/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_02.jpg b/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_02.jpg new file mode 100644 index 0000000000..e60cea1158 Binary files /dev/null and b/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_02.jpg differ diff --git a/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_03.jpg b/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_03.jpg new file mode 100644 index 0000000000..039012503e Binary files /dev/null and b/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_03.jpg differ diff --git a/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_04.jpg b/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_04.jpg new file mode 100644 index 0000000000..6d216e08a9 Binary files /dev/null and b/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_04.jpg differ diff --git a/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_05.jpg b/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_05.jpg new file mode 100644 index 0000000000..f9e508aa5f Binary files /dev/null and b/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_05.jpg differ diff --git a/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_06.jpg b/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_06.jpg new file mode 100644 index 0000000000..37be0eb3c7 Binary files /dev/null and b/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_06.jpg differ diff --git a/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_07.jpg b/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_07.jpg new file mode 100644 index 0000000000..4054cb715d Binary files /dev/null and b/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_07.jpg differ diff --git a/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_08.jpg b/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_08.jpg new file mode 100644 index 0000000000..80f69b0d08 Binary files /dev/null and b/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_08.jpg differ diff --git a/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_09.png b/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_09.png new file mode 100644 index 0000000000..f33ea0b5eb Binary files /dev/null and b/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_09.png differ diff --git a/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_10.png b/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_10.png new file mode 100644 index 0000000000..fdf61928a7 Binary files /dev/null and b/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_10.png differ diff --git a/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_11.png b/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_11.png new file mode 100644 index 0000000000..2da3873635 Binary files /dev/null and b/EdgeCraftRAG/assets/img/Explore_Edge_Craft_RAG_11.png differ diff --git a/EdgeCraftRAG/assets/img/kbadmin_index.png b/EdgeCraftRAG/assets/img/kbadmin_index.png deleted file mode 100644 index 7383a01c79..0000000000 Binary files a/EdgeCraftRAG/assets/img/kbadmin_index.png and /dev/null differ diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/README.md b/EdgeCraftRAG/docker_compose/intel/gpu/arc/README.md index a9ca944088..a0c71dba87 100755 --- a/EdgeCraftRAG/docker_compose/intel/gpu/arc/README.md +++ b/EdgeCraftRAG/docker_compose/intel/gpu/arc/README.md @@ -1,8 +1,10 @@ # Example Edge Craft Retrieval-Augmented Generation Deployment on Intel® Arc® Platform -This document outlines the deployment process for Edge Craft Retrieval-Augmented Generation service on Intel Arc server. This example includes the following sections: +[中文版](README_zh.md) -- [EdgeCraftRAG Quick Start Deployment](#edgecraftrag-quick-start-deployment): Demonstrates how to quickly deploy a Edge Craft Retrieval-Augmented Generation service/pipeline on Intel® Arc® platform. +This document outlines the deployment process for Edge Craft Retrieval-Augmented Generation service on Intel® Arc® Platform. This example includes the following sections: + +- [EdgeCraftRAG Quick Start Deployment](#edgecraftrag-quick-start-deployment): Demonstrates how to quickly deploy a Edge Craft Retrieval-Augmented Generation service/pipeline on Intel® Arc® Platform. - [EdgeCraftRAG Docker Compose Files](#edgecraftrag-docker-compose-files): Describes some example deployments and their docker compose files. - [EdgeCraftRAG Service Configuration](#edgecraftrag-service-configuration): Describes the service and possible configuration changes. @@ -12,23 +14,31 @@ This section describes how to quickly deploy and test the EdgeCraftRAG service m 1. [Prerequisites](#1-prerequisites) 2. [Access the Code](#2-access-the-code) -3. [Prepare models](#3-prepare-models) -4. [Prepare env variables and configurations](#4-prepare-env-variables-and-configurations) -5. [Deploy the Service on Arc GPU Using Docker Compose](#5-deploy-the-service-on-intel-gpu-using-docker-compose) -6. [Access UI](#6-access-ui) -7. [Cleanup the Deployment](#7-cleanup-the-deployment) +3. [Run quick_start.sh](#3-run-quick_startsh) +4. [Access UI](#4-access-ui) +5. [Cleanup the Deployment](#5-cleanup-the-deployment) ### 1. Prerequisites -EC-RAG supports vLLM deployment(default method) and local OpenVINO deployment for Intel Arc GPU. Prerequisites are shown as below: -Hardware: Intel Arc A770 -OS: Ubuntu Server 22.04.1 or newer (at least 6.2 LTS kernel) -Driver & libraries: please to [Installing GPUs Drivers](https://dgpu-docs.intel.com/driver/installation-rolling.html#installing-gpu-drivers) for detailed driver & libraries setup +EC-RAG supports vLLM deployment(default method) and local OpenVINO deployment for Intel Arc GPU and Core Ultra Platform. Prerequisites are shown as below: + +#### Core Ultra + +**OS**: Ubuntu 24.04 or newer +**Driver & libraries**: Please refer to [Installing Client GPUs on Ubuntu Desktop](https://dgpu-docs.intel.com/driver/client/overview.html#installing-client-gpus-on-ubuntu-desktop) +**Available Inferencing Framework**: openVINO + +#### Intel Arc B60 -Hardware: Intel Arc B60 -please to [Install Native Environment](https://github.com/intel/llm-scaler/tree/main/vllm#11-install-native-environment) for detailed setup +**OS**: Ubuntu 25.04 Desktop (for Core Ultra and Xeon-W), Ubuntu 25.04 Server (for Xeon-SP). +**Driver & libraries**: Please refer to [Install Bare Metal Environment](https://github.com/intel/llm-scaler/tree/main/vllm#11-install-bare-metal-environment) for detailed setup +**Available Inferencing Framework**: openVINO, vLLM -Below steps are based on **vLLM** as inference engine, if you want to choose **OpenVINO**, please refer to [OpenVINO Local Inference](../../../../docs/Advanced_Setup.md#openvino-local-inference) +#### Intel Arc A770 + +**OS**: Ubuntu Server 22.04.1 or newer (at least 6.2 LTS kernel) +**Driver & libraries**: Please refer to [Installing GPUs Drivers](https://dgpu-docs.intel.com/driver/installation-rolling.html#installing-gpu-drivers) for detailed driver & libraries setup +**Available Inferencing Framework**: openVINO, vLLM ### 2. Access the Code @@ -39,123 +49,54 @@ git clone https://github.com/opea-project/GenAIExamples.git cd GenAIExamples/EdgeCraftRAG ``` -Checkout a released version, such as v1.5: - -``` -git checkout v1.5 -``` - -### 3. Prepare models +> **NOTE**: If you want to checkout a released version, such as v1.5: +> +> ``` +> git checkout v1.5 +> ``` -```bash -# Prepare models for embedding, reranking: -export MODEL_PATH="${PWD}/models" # Your model path for embedding, reranking and LLM models -mkdir -p $MODEL_PATH -pip install --upgrade --upgrade-strategy eager "optimum[openvino]" -optimum-cli export openvino -m BAAI/bge-small-en-v1.5 ${MODEL_PATH}/BAAI/bge-small-en-v1.5 --task sentence-similarity -optimum-cli export openvino -m BAAI/bge-reranker-large ${MODEL_PATH}/BAAI/bge-reranker-large --task text-classification - -# Prepare LLM model -export LLM_MODEL="Qwen/Qwen3-8B" # Your model id -pip install modelscope -modelscope download --model $LLM_MODEL --local_dir "${MODEL_PATH}/${LLM_MODEL}" -# Optionally, you can also download models with huggingface: -# pip install -U huggingface_hub -# huggingface-cli download $LLM_MODEL --local-dir "${MODEL_PATH}/${LLM_MODEL}" -``` +### 3. Run quick_start.sh -### 4. Prepare env variables and configurations - -#### Prepare env variables for vLLM deployment +Run quick start script from the `EdgeCraftRAG` root directory: ```bash -ip_address=$(hostname -I | awk '{print $1}') -# Use `ip a` to check your active ip -export HOST_IP=$ip_address # Your host ip - -# Check group id of video and render -export VIDEOGROUPID=$(getent group video | cut -d: -f3) -export RENDERGROUPID=$(getent group render | cut -d: -f3) - -# If you have a proxy configured, execute below line -export no_proxy=${no_proxy},${HOST_IP},edgecraftrag,edgecraftrag-server -export NO_PROXY=${NO_PROXY},${HOST_IP},edgecraftrag,edgecraftrag-server -# If you have a HF mirror configured, it will be imported to the container -# export HF_ENDPOINT=https://hf-mirror.com # your HF mirror endpoint" - -# Make sure all 3 folders have 1000:1000 permission, otherwise -export DOC_PATH=${PWD}/tests -export TMPFILE_PATH=${PWD}/tests -chown 1000:1000 ${MODEL_PATH} ${DOC_PATH} ${TMPFILE_PATH} -# In addition, also make sure the .cache folder has 1000:1000 permission, otherwise -chown 1000:1000 -R $HOME/.cache +./tools/quick_start.sh ``` -For more advanced env variables and configurations, please refer to [Prepare env variables for vLLM deployment](../../../../docs/Advanced_Setup.md#prepare-env-variables-for-vllm-deployment) +The script is located in the `tools` directory. For detailed usage of `quick_start.sh` and `build_images.sh`, please refer to [tools/README.md](../../../../tools/README.md). -### 5. Deploy the Service on Intel GPU Using Docker Compose +By default, this script starts local OpenVINO deployment when no environment variables are configured. -set Milvus DB and chat history round for inference: +If you prefer manual model preparation, env setup, and docker compose options, please refer to [Manual deployment details in Advanced Setup](../../../../docs/Advanced_Setup.md#manual-deployment-details-for-arc-platform). -```bash -# EC-RAG support Milvus as persistent database, by default milvus is disabled, you can choose to set MILVUS_ENABLED=1 to enable it -export MILVUS_ENABLED=0 -# If you enable Milvus, the default storage path is PWD, uncomment if you want to change: -# export DOCKER_VOLUME_DIRECTORY= # change to your preference +### 4. Access UI -# EC-RAG support chat history round setting, by default chat history is disabled, you can set CHAT_HISTORY_ROUND to control it -# export CHAT_HISTORY_ROUND= # change to your preference -``` - -#### option a. Deploy the Service on Arc A770 Using Docker Compose - -```bash -export VLLM_SERVICE_PORT_A770=8086 # You can set your own port for vllm service - -# Launch EC-RAG service with compose -docker compose --profile a770 -f docker_compose/intel/gpu/arc/compose.yaml up -d -``` +Open your browser, access http://${HOST_IP}:8082 -#### option b. Deploy the Service on Arc B60 Using Docker Compose +After startup completes, `quick_start.sh` will print: -```bash -# Besides MILVUS_ENABLED and CHAT_HISTORY_ROUND, below environments are exposed for vLLM config, you can change them to your preference: -# export VLLM_SERVICE_PORT_B60=8086 -# export DTYPE=float16 -# export TP=1 # for multi GPU, you can change TP value -# export DP=1 -# export ZE_AFFINITY_MASK=0 # for multi GPU, you can export ZE_AFFINITY_MASK=0,1,2... -# export ENFORCE_EAGER=1 -# export TRUST_REMOTE_CODE=1 -# export DISABLE_SLIDING_WINDOW=1 -# export GPU_MEMORY_UTIL=0.8 -# export NO_ENABLE_PREFIX_CACHING=1 -# export MAX_NUM_BATCHED_TOKENS=8192 -# export DISABLE_LOG_REQUESTS=1 -# export MAX_MODEL_LEN=49152 -# export BLOCK_SIZE=64 -# export QUANTIZATION=fp8 -docker compose --profile b60 -f docker_compose/intel/gpu/arc/compose.yaml up -d +```text +Service launched successfully. +UI access URL: http://${HOST_IP}:8082 +If you are accessing from another machine, replace ${HOST_IP} with your server's reachable IP or hostname. ``` -### 6. Access UI - -Open your browser, access http://${HOST_IP}:8082 - > Your browser should be running on the same host of your console, otherwise you will need to access UI with your host domain name instead of ${HOST_IP}. Below is the UI front page, for detailed operations on UI and EC-RAG settings, please refer to [Explore_Edge_Craft_RAG](../../../../docs/Explore_Edge_Craft_RAG.md) ![front_page](../../../../assets/img/front_page.png) -### 7. Cleanup the Deployment +### 5. Cleanup the Deployment -To stop the containers associated with the deployment, execute the following command: +To stop the containers associated with the deployment, execute the helper script command: +```bash +./tools/quick_start.sh cleanup ``` -docker compose -f docker_compose/intel/gpu/arc/compose.yaml down -``` -All the EdgeCraftRAG containers will be stopped and then removed on completion of the "down" command. +All the EdgeCraftRAG containers will be stopped and then removed on completion. + +If you prefer the manual docker compose cleanup command, please refer to [Manual cleanup details in Advanced Setup](../../../../docs/Advanced_Setup.md#6-cleanup-the-deployment-manual). ## EdgeCraftRAG Docker Compose Files diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/README_zh.md b/EdgeCraftRAG/docker_compose/intel/gpu/arc/README_zh.md new file mode 100644 index 0000000000..f46ff6255d --- /dev/null +++ b/EdgeCraftRAG/docker_compose/intel/gpu/arc/README_zh.md @@ -0,0 +1,125 @@ +# 在 Intel® Arc® 平台上部署 Edge Craft 检索增强生成(EC-RAG)示例 + +[English](README.md) + +本文档介绍了在 Intel® Arc® 平台上部署 Edge Craft 检索增强生成服务的流程。该示例包含以下部分: + +- [EdgeCraftRAG 快速开始部署](#edgecraftrag-快速开始部署):演示如何在 Intel® Arc® 平台上快速部署 Edge Craft 检索增强生成服务/流水线。 +- [EdgeCraftRAG Docker Compose 文件](#edgecraftrag-docker-compose-文件):说明一些示例部署及其 docker compose 文件。 +- [EdgeCraftRAG 服务配置](#edgecraftrag-服务配置):说明服务以及可进行的配置变更。 + +## EdgeCraftRAG 快速开始部署 + +本节介绍如何在 Intel® Arc® 平台上手动快速部署并测试 EdgeCraftRAG 服务。基本步骤如下: + +1. [前置条件](#1-前置条件) +2. [获取代码](#2-获取代码) +3. [运行 quick_start.sh](#3-运行-quick_startsh) +4. [访问 UI](#4-访问-ui) +5. [清理部署](#5-清理部署) + +### 1. 前置条件 + +EC-RAG 支持 vLLM 部署(默认方式)以及面向 Intel Arc GPU 和 Core Ultra 平台的本地 OpenVINO 部署。前置条件如下: + +#### Core Ultra + +**操作系统**:Ubuntu 24.04 或更高版本 +**驱动与库**:请参考 [Installing Client GPUs on Ubuntu Desktop](https://dgpu-docs.intel.com/driver/client/overview.html#installing-client-gpus-on-ubuntu-desktop) +**可用推理框架**:openVINO + +#### Intel Arc B60 + +**操作系统**:Ubuntu 25.04 Desktop(适用于 Core Ultra 和 Xeon-W),Ubuntu 25.04 Server(适用于 Xeon-SP)。 +**驱动与库**:详细安装请参考 [Install Bare Metal Environment](https://github.com/intel/llm-scaler/tree/main/vllm#11-install-bare-metal-environment) +**可用推理框架**:openVINO、vLLM + +#### Intel Arc A770 + +**操作系统**:Ubuntu Server 22.04.1 或更高版本(至少 6.2 LTS 内核) +**驱动与库**:详细驱动与库安装请参考 [Installing GPUs Drivers](https://dgpu-docs.intel.com/driver/installation-rolling.html#installing-gpu-drivers) +**可用推理框架**:openVINO、vLLM + +### 2. 获取代码 + +克隆 GenAIExample 仓库,并进入 EdgeCraftRAG 在 Intel® Arc® 平台上的 Docker Compose 文件与配套脚本目录: + +``` +git clone https://github.com/opea-project/GenAIExamples.git +cd GenAIExamples/EdgeCraftRAG +``` + +> **注意**:如果你想切换到某个发布版本,例如 v1.5: +> +> ``` +> git checkout v1.5 +> ``` + +### 3. 运行 quick_start.sh + +在 `EdgeCraftRAG` 根目录下运行快速启动脚本: + +```bash +./tools/quick_start.sh +``` + +该脚本位于 `tools` 目录。有关 `quick_start.sh` 和 `build_images.sh` 的详细用法,请参考 [tools/README_zh.md](../../../../tools/README_zh.md)。 + +在不配置任何环境变量时,脚本默认启动本地 OpenVINO 部署。 + +如果你希望使用手动方式(模型准备、环境变量配置、Docker Compose 启动),请参考 [Advanced Setup 中的手动部署说明](../../../../docs/Advanced_Setup_zh.md#arc-平台手动部署详细说明)。 + +### 4. 访问 UI + +打开浏览器访问 http://${HOST_IP}:8082 + +启动完成后,`quick_start.sh` 会输出: + +```text +Service launched successfully. +UI access URL: http://${HOST_IP}:8082 +If you are accessing from another machine, replace ${HOST_IP} with your server's reachable IP or hostname. +``` + +> 浏览器应运行在与控制台相同的主机上;否则你需要使用主机域名而不是 ${HOST_IP} 来访问 UI。 + +下图为 UI 首页。有关 UI 操作和 EC-RAG 设置的详细说明,请参考 [Explore_Edge_Craft_RAG](../../../../docs/Explore_Edge_Craft_RAG_zh.md) +![front_page](../../../../assets/img/front_page.png) + +### 5. 清理部署 + +若要停止与本次部署关联的容器,请执行脚本命令: + +```bash +./tools/quick_start.sh cleanup +``` + +执行完成后,所有 EdgeCraftRAG 容器都会停止并被移除。 + +如果你希望使用手动 docker compose 清理命令,请参考 [Advanced Setup 中的手动清理说明](../../../../docs/Advanced_Setup_zh.md#6-清理部署手动)。 + +## EdgeCraftRAG Docker Compose 文件 + +`compose.yaml` 是默认的 compose 文件,使用 tgi 作为服务框架。 + +| 服务名称 | 镜像名称 | +| ------------------- | ---------------------------------------- | +| etcd | quay.io/coreos/etcd:v3.5.5 | +| minio | minio/minio:RELEASE.2023-03-20T20-16-18Z | +| milvus-standalone | milvusdb/milvus:v2.4.6 | +| edgecraftrag-server | opea/edgecraftrag-server:latest | +| edgecraftrag-ui | opea/edgecraftrag-ui:latest | +| ecrag | opea/edgecraftrag:latest | + +## EdgeCraftRAG 服务配置 + +下表全面概述了示例 Docker Compose 文件中各类部署所使用的 EdgeCraftRAG 服务。表中每一行代表一个独立服务,详细说明了可用镜像及其在部署架构中的功能描述。 + +| 服务名称 | 可选镜像名称 | 可选 | 描述 | +| ------------------- | ---------------------------------------- | ---- | ---------------------------------------------------------- | +| etcd | quay.io/coreos/etcd:v3.5.5 | 否 | 提供分布式键值存储,用于服务发现和配置管理。 | +| minio | minio/minio:RELEASE.2023-03-20T20-16-18Z | 否 | 提供对象存储服务,用于存储文档和模型文件。 | +| milvus-standalone | milvusdb/milvus:v2.4.6 | 否 | 提供向量数据库能力,用于管理 embedding 和相似度检索。 | +| edgecraftrag-server | opea/edgecraftrag-server:latest | 否 | 作为 EdgeCraftRAG 服务后端,具体形态随部署方式不同而变化。 | +| edgecraftrag-ui | opea/edgecraftrag-ui:latest | 否 | 提供 EdgeCraftRAG 服务的用户界面。 | +| ecrag | opea/edgecraftrag:latest | 否 | 作为反向代理,管理 UI 与后端服务之间的流量。 | diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml index 55f6a79b4e..1a169cc364 100644 --- a/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml +++ b/EdgeCraftRAG/docker_compose/intel/gpu/arc/compose.yaml @@ -207,14 +207,14 @@ services: https_proxy: ${https_proxy} MODEL_PATH: "/llm/models" SERVED_MODEL_NAME: ${LLM_MODEL} - TENSOR_PARALLEL_SIZE: ${TENSOR_PARALLEL_SIZE:-1} + TENSOR_PARALLEL_SIZE: ${TP:-1} MAX_NUM_SEQS: ${MAX_NUM_SEQS:-64} MAX_NUM_BATCHED_TOKENS: ${MAX_NUM_BATCHED_TOKENS:-10240} MAX_MODEL_LEN: ${MAX_MODEL_LEN:-10240} - LOAD_IN_LOW_BIT: ${LOAD_IN_LOW_BIT:-fp8} + LOAD_IN_LOW_BIT: ${QUANTIZATION:-fp8} CCL_DG2_USM: ${CCL_DG2_USM:-""} PORT: ${VLLM_SERVICE_PORT_A770:-8086} - ZE_AFFINITY_MASK: ${SELECTED_XPU_0:-0} + ZE_AFFINITY_MASK: ${ZE_AFFINITY_MASK:-0} shm_size: '32g' entrypoint: /bin/bash -c "\ cd /llm && \ diff --git a/EdgeCraftRAG/docker_compose/intel/gpu/arc/set_env.sh b/EdgeCraftRAG/docker_compose/intel/gpu/arc/set_env.sh index 3247f3ce91..07a46a461a 100755 --- a/EdgeCraftRAG/docker_compose/intel/gpu/arc/set_env.sh +++ b/EdgeCraftRAG/docker_compose/intel/gpu/arc/set_env.sh @@ -20,12 +20,12 @@ export LLM_MODEL_PATH=${LLM_MODEL_PATH} export VLLM_SERVICE_PORT_B60=${VLLM_SERVICE_PORT_B60} export VLLM_SERVICE_PORT_A770=${VLLM_SERVICE_PORT_A770} -export TENSOR_PARALLEL_SIZE=${TENSOR_PARALLEL_SIZE} +export TP=${TP} export vLLM_ENDPOINT=${vLLM_ENDPOINT} export MAX_NUM_SEQS=${MAX_NUM_SEQS} export MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS} export MAX_MODEL_LEN=${MAX_MODEL_LEN} -export LOAD_IN_LOW_BIT=${LOAD_IN_LOW_BIT} +export QUANTIZATION=${QUANTIZATION} export CCL_DG2_USM=${CCL_DG2_USM} export ZE_AFFINITY_MASK=${ZE_AFFINITY_MASK} diff --git a/EdgeCraftRAG/docs/API_Guide.md b/EdgeCraftRAG/docs/API_Guide.md index 2e7d787849..0fbb9fd50f 100644 --- a/EdgeCraftRAG/docs/API_Guide.md +++ b/EdgeCraftRAG/docs/API_Guide.md @@ -1,50 +1,78 @@ -# Edge Craft Retrieval-Augmented Generation API guide +# Edge Craft Retrieval-Augmented Generation API Guide + +> **Base URLs** +> +> - EC-RAG Server: `http://${HOST_IP}:16010` +> - EC-RAG Mega Service: `http://${HOST_IP}:16011` + +--- ## Pipeline Management ### Create a pipeline ```bash -curl -X POST http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" -d @tests/test_pipeline_local_llm.json | jq '.' +curl -X POST http://${HOST_IP}:16010/v1/settings/pipelines \ + -H "Content-Type: application/json" \ + -d @tests/test_pipeline_local_llm.json | jq '.' ``` ### Update a pipeline ```bash -curl -X PATCH http://${HOST_IP}:16010/v1/settings/pipelines/rag_test_local_llm -H "Content-Type: application/json" -d @tests/test_pipeline_local_llm.json | jq '.' +curl -X PATCH http://${HOST_IP}:16010/v1/settings/pipelines/{name} \ + -H "Content-Type: application/json" \ + -d @tests/test_pipeline_local_llm.json | jq '.' ``` ### Check all pipelines ```bash -curl -X GET http://${HOST_IP}:16010/v1/settings/pipelines -H "Content-Type: application/json" | jq '.' +curl -X GET http://${HOST_IP}:16010/v1/settings/pipelines \ + -H "Content-Type: application/json" | jq '.' +``` + +### Check a specific pipeline + +```bash +curl -X GET http://${HOST_IP}:16010/v1/settings/pipelines/{name} \ + -H "Content-Type: application/json" | jq '.' ``` ### Activate a pipeline ```bash -curl -X PATCH http://${HOST_IP}:16010/v1/settings/pipelines/rag_test_local_llm -H "Content-Type: application/json" -d '{"active": "true"}' | jq '.' +curl -X PATCH http://${HOST_IP}:16010/v1/settings/pipelines/{name} \ + -H "Content-Type: application/json" \ + -d '{"active": "true"}' | jq '.' ``` ### Remove a pipeline ```bash # Firstly, deactivate the pipeline if the pipeline status is active -curl -X PATCH http://${HOST_IP}:16010/v1/settings/pipelines/rag_test_local_llm -H "Content-Type: application/json" -d '{"active": "false"}' | jq '.' +curl -X PATCH http://${HOST_IP}:16010/v1/settings/pipelines/{name} \ + -H "Content-Type: application/json" \ + -d '{"active": "false"}' | jq '.' + # Then delete the pipeline -curl -X DELETE http://${HOST_IP}:16010/v1/settings/pipelines/rag_test_local_llm -H "Content-Type: application/json" | jq '.' +curl -X DELETE http://${HOST_IP}:16010/v1/settings/pipelines/{name} \ + -H "Content-Type: application/json" | jq '.' ``` -### Get pipeline json +### Get pipeline JSON ```bash -curl -X GET http://${HOST_IP}:16010/v1/settings/pipelines/{name}/json -H "Content-Type: application/json" | jq '.' +curl -X GET http://${HOST_IP}:16010/v1/settings/pipelines/{name}/json \ + -H "Content-Type: application/json" | jq '.' ``` -### Import pipeline from a json file +### Import pipeline from a JSON file ```bash -curl -X POST http://${HOST_IP}:16010/v1/settings/pipelines/import -H "Content-Type: multipart/form-data" -F "file=@your_test_pipeline_json_file.txt"| jq '.' +curl -X POST http://${HOST_IP}:16010/v1/settings/pipelines/import \ + -H "Content-Type: multipart/form-data" \ + -F "file=@your_pipeline.json" | jq '.' ``` ### Enable and check benchmark for pipelines @@ -56,19 +84,28 @@ Benchmarking activities may significantly reduce system performance. **DO NOT** perform benchmarking in a production environment. ```bash -# Set ENABLE_BENCHMARK as true before launch services +# Set ENABLE_BENCHMARK as true before launching services export ENABLE_BENCHMARK="true" -# check the benchmark data for pipeline {pipeline_name} -curl -X GET http://${HOST_IP}:16010/v1/settings/pipelines/{pipeline_name}/benchmarks -H "Content-Type: application/json" | jq '.' +# Check the benchmark data for the active pipeline +curl -X GET http://${HOST_IP}:16010/v1/settings/pipeline/benchmark \ + -H "Content-Type: application/json" | jq '.' + +# Check the benchmark data for a specific pipeline +curl -X GET http://${HOST_IP}:16010/v1/settings/pipelines/{pipeline_name}/benchmarks \ + -H "Content-Type: application/json" | jq '.' ``` +--- + ## Model Management ### Load a model ```bash -curl -X POST http://${HOST_IP}:16010/v1/settings/models -H "Content-Type: application/json" -d '{"model_type": "reranker", "model_id": "BAAI/bge-reranker-large", "model_path": "./models/bge_ov_reranker", "device": "cpu", "weight": "INT4"}' | jq '.' +curl -X POST http://${HOST_IP}:16010/v1/settings/models \ + -H "Content-Type: application/json" \ + -d '{"model_type": "reranker", "model_id": "BAAI/bge-reranker-large", "model_path": "./models/bge_ov_reranker", "device": "cpu", "weight": "INT4"}' | jq '.' ``` It will take some time to load the model. @@ -76,147 +113,432 @@ It will take some time to load the model. ### Check all models ```bash -curl -X GET http://${HOST_IP}:16010/v1/settings/models -H "Content-Type: application/json" | jq '.' +curl -X GET http://${HOST_IP}:16010/v1/settings/models \ + -H "Content-Type: application/json" | jq '.' ``` -### Update a model +### Check a specific model ```bash -curl -X PATCH http://${HOST_IP}:16010/v1/settings/models/BAAI/bge-reranker-large -H "Content-Type: application/json" -d '{"model_type": "reranker", "model_id": "BAAI/bge-reranker-large", "model_path": "./models/bge_ov_reranker", "device": "gpu", "weight": "INT4"}' | jq '.' +curl -X GET http://${HOST_IP}:16010/v1/settings/models/BAAI/bge-reranker-large \ + -H "Content-Type: application/json" | jq '.' ``` -### Check a certain model +### Update a model ```bash -curl -X GET http://${HOST_IP}:16010/v1/settings/models/BAAI/bge-reranker-large -H "Content-Type: application/json" | jq '.' +curl -X PATCH http://${HOST_IP}:16010/v1/settings/models/BAAI/bge-reranker-large \ + -H "Content-Type: application/json" \ + -d '{"model_type": "reranker", "model_id": "BAAI/bge-reranker-large", "model_path": "./models/bge_ov_reranker", "device": "gpu", "weight": "INT4"}' | jq '.' ``` ### Delete a model ```bash -curl -X DELETE http://${HOST_IP}:16010/v1/settings/models/BAAI/bge-reranker-large -H "Content-Type: application/json" | jq '.' +curl -X DELETE http://${HOST_IP}:16010/v1/settings/models/BAAI/bge-reranker-large \ + -H "Content-Type: application/json" | jq '.' ``` +### Get available model weights + +Query the available compression weights (INT4 / INT8 / FP16) for a given model path: + +```bash +curl -X GET "http://${HOST_IP}:16010/v1/settings/weight/BAAI/bge-reranker-large" \ + -H "Content-Type: application/json" | jq '.' +``` + +### Get available model IDs by type + +Supported `model_type` values: `LLM`, `vLLM`, `reranker`, `embedding`, `vLLM_embedding`, `kbadmin_embedding_model` + +```bash +# List available local LLM models +curl -X GET "http://${HOST_IP}:16010/v1/settings/avail-models/LLM" \ + -H "Content-Type: application/json" | jq '.' + +# List models served by a vLLM server (optional server_address parameter) +curl -X GET "http://${HOST_IP}:16010/v1/settings/avail-models/vLLM?server_address=http://localhost:8086" \ + -H "Content-Type: application/json" | jq '.' + +# List available embedding models +curl -X GET "http://${HOST_IP}:16010/v1/settings/avail-models/embedding" \ + -H "Content-Type: application/json" | jq '.' +``` + +### Get available models from a vLLM server + +```bash +curl -X GET "http://${HOST_IP}:16010/v1/available_models?server_address=http://localhost:8086" \ + -H "Content-Type: application/json" | jq '.' +``` + +--- + ## Knowledge Base Management ### Create a knowledge base ```bash -curl -X POST http://${HOST_IP}:16010/v1/knowledge -H "Content-Type: application/json" -d '{"name": "default_kb","description": "Your knowledge base Description","active":true}' | jq '.' +curl -X POST http://${HOST_IP}:16010/v1/knowledge \ + -H "Content-Type: application/json" \ + -d @tests/configs/test_kb.json | jq '.' ``` -### Update a knowledge base +### Check all knowledge bases + +```bash +curl -X GET http://${HOST_IP}:16010/v1/knowledge \ + -H "Content-Type: application/json" | jq '.' +``` + +### Check a specific knowledge base + +```bash +curl -X GET http://${HOST_IP}:16010/v1/knowledge/default_kb \ + -H "Content-Type: application/json" | jq '.' +``` + +### Get knowledge base JSON ```bash -curl -X PATCH http://${HOST_IP}:16010/v1/knowledge/patch -H "Content-Type: application/json" -d '{"name": "default_kb","active":"True","description": "Your knowledge base Description","active":"True"}' | jq '.' +curl -X GET http://${HOST_IP}:16010/v1/knowledge/default_kb/json \ + -H "Content-Type: application/json" | jq '.' ``` -### Check all knowledge base +### Get knowledge base file map (paginated) ```bash -curl -X GET http://${HOST_IP}:16010/v1/knowledge -H "Content-Type: application/json" | jq '.' +curl -X GET "http://${HOST_IP}:16010/v1/knowledge/default_kb/filemap?page_num=1&page_size=20" \ + -H "Content-Type: application/json" | jq '.' ``` -### Activate knowledge base +### Update a knowledge base ```bash -curl -X PATCH http://${HOST_IP}:16010/v1/knowledge/patch -H "Content-Type: application/json" -d '{"name": "default_kb","active":true}' | jq '.' +curl -X PATCH http://${HOST_IP}:16010/v1/knowledge/patch \ + -H "Content-Type: application/json" \ + -d '{"name": "default_kb", "active": "True", "description": "Your knowledge base description"}' | jq '.' +``` + +### Activate a knowledge base + +```bash +curl -X PATCH http://${HOST_IP}:16010/v1/knowledge/patch \ + -H "Content-Type: application/json" \ + -d '{"name": "default_kb", "active": true}' | jq '.' ``` ### Remove a knowledge base ```bash -curl -X DELETE http://${HOST_IP}:16010/v1/knowledge/default_kb -H "Content-Type: application/json" | jq '.' +curl -X DELETE http://${HOST_IP}:16010/v1/knowledge/default_kb \ + -H "Content-Type: application/json" | jq '.' ``` ### Add file to knowledge base ```bash -curl -X POST http://${HOST_IP}:16010/v1/knowledge/default_kb/files -H "Content-Type: application/json" -d '{"local_path": "docs/#REPLACE WITH YOUR DIR WITHIN MOUNTED DOC PATH#"}' | jq '.' +curl -X POST http://${HOST_IP}:16010/v1/knowledge/default_kb/files \ + -H "Content-Type: application/json" \ + -d '{"local_path": "/home/user/ui_cache/#REPLACE WITH YOUR FILE OR DIR PATH#"}' | jq '.' +``` + +### Delete file from knowledge base + +```bash +curl -X DELETE http://${HOST_IP}:16010/v1/knowledge/default_kb/files \ + -H "Content-Type: application/json" \ + -d '{"local_path": "/home/user/ui_cache/#REPLACE WITH YOUR FILE PATH#"}' | jq '.' +``` + +--- + +## Experience Knowledge Base Management + +Experience knowledge bases store curated Q&A pairs that can be retrieved to augment pipeline responses. + +### Get all experiences + +```bash +curl -X GET http://${HOST_IP}:16010/v1/experiences \ + -H "Content-Type: application/json" | jq '.' ``` -### Delete file to knowledge base +### Get experience by ID or question ```bash -curl -X DELETE http://${HOST_IP}:16010/v1/knowledge/default_kb/files -H "Content-Type: application/json" -d '{"local_path": "docs/#REPLACE WITH YOUR DIR WITHIN MOUNTED DOC PATH#"}' | jq '.' +curl -X POST http://${HOST_IP}:16010/v1/experience \ + -H "Content-Type: application/json" \ + -d '{"idx": "your-experience-id"}' | jq '.' +``` + +### Update an experience + +```bash +curl -X PATCH http://${HOST_IP}:16010/v1/experiences \ + -H "Content-Type: application/json" \ + -d '{"idx": "your-experience-id", "question": "Updated question?", "content": "Updated answer"}' | jq '.' +``` + +### Delete an experience + +```bash +curl -X DELETE http://${HOST_IP}:16010/v1/experiences \ + -H "Content-Type: application/json" \ + -d '{"idx": "your-experience-id"}' | jq '.' +``` + +### Add experiences from a file + +```bash +curl -X POST http://${HOST_IP}:16010/v1/experiences/files \ + -H "Content-Type: application/json" \ + -d '{"local_path": "/home/user/ui_cache/experiences.json"}' | jq '.' +``` + +### Check and add multiple experiences (duplicate check first) + +```bash +# Step 1: Check for duplicates; if none, experiences are added automatically +curl -X POST http://${HOST_IP}:16010/v1/multiple_experiences/check \ + -H "Content-Type: application/json" \ + -d '[{"question": "What is EC-RAG?", "content": "EdgeCraft RAG is ..."}]' | jq '.' + +# Step 2 (only if duplicates detected): Confirm with overwrite flag +# flag=true → overwrite duplicates; flag=false → append +curl -X POST "http://${HOST_IP}:16010/v1/multiple_experiences/confirm?flag=true" \ + -H "Content-Type: application/json" \ + -d '[{"question": "What is EC-RAG?", "content": "EdgeCraft RAG is ..."}]' | jq '.' ``` -## File Management +--- -### Add a text +## Agent Management + +### Check all agents ```bash -curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d '{"text":"#REPLACE WITH YOUR TEXT"}' | jq '.' +curl -X GET http://${HOST_IP}:16010/v1/agents \ + -H "Content-Type: application/json" | jq '.' ``` -### Add files from existed file path +### Check a specific agent ```bash -curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d '{"local_path":"docs/#REPLACE WITH YOUR DIR WITHIN MOUNTED DOC PATH#"}' | jq '.' -curl -X POST http://${HOST_IP}:16010/v1/data -H "Content-Type: application/json" -d '{"local_path":"docs/#REPLACE WITH YOUR FILE WITHIN MOUNTED DOC PATH#"}' | jq '.' +curl -X GET http://${HOST_IP}:16010/v1/agents/{name} \ + -H "Content-Type: application/json" | jq '.' ``` -### Check all files +### Get default configs for an agent type ```bash -curl -X GET http://${HOST_IP}:16010/v1/data/files -H "Content-Type: application/json" | jq '.' +curl -X GET http://${HOST_IP}:16010/v1/agents/configs/{agent_type} \ + -H "Content-Type: application/json" | jq '.' ``` -### Check one file +### Create an agent ```bash -curl -X GET http://${HOST_IP}:16010/v1/data/files/test2.docx -H "Content-Type: application/json" | jq '.' +curl -X POST http://${HOST_IP}:16010/v1/agents \ + -H "Content-Type: application/json" \ + -d '{"name": "my_agent", "type": "react_llm", "pipeline_idx": "your-pipeline-idx"}' | jq '.' ``` -### Delete a file +### Update an agent ```bash -curl -X DELETE http://${HOST_IP}:16010/v1/data/files/test2.docx -H "Content-Type: application/json" | jq '.' +curl -X PATCH http://${HOST_IP}:16010/v1/agents/{name} \ + -H "Content-Type: application/json" \ + -d '{"name": "my_agent", "active": true}' | jq '.' ``` -### Update a file +### Delete an agent ```bash -curl -X PATCH http://${HOST_IP}:16010/v1/data/files/test.pdf -H "Content-Type: application/json" -d '{"local_path":"docs/#REPLACE WITH YOUR FILE WITHIN MOUNTED DOC PATH#"}' | jq '.' +curl -X DELETE http://${HOST_IP}:16010/v1/agents/{name} \ + -H "Content-Type: application/json" | jq '.' ``` +--- + ## System Prompt Management ### Get system prompt ```bash -curl -X GET http://${HOST_IP}:16010/v1/chatqna/prompt -H "Content-Type: application/json" | jq '.' +curl -X GET http://${HOST_IP}:16010/v1/chatqna/prompt \ + -H "Content-Type: application/json" | jq '.' +``` + +### Get tagged system prompt + +```bash +curl -X GET http://${HOST_IP}:16010/v1/chatqna/prompt/tagged \ + -H "Content-Type: application/json" | jq '.' +``` + +### Get default system prompt + +```bash +curl -X GET http://${HOST_IP}:16010/v1/chatqna/prompt/default \ + -H "Content-Type: application/json" | jq '.' ``` ### Update system prompt ```bash -curl -X POST http://${HOST_IP}:16010/v1/chatqna/prompt -H "Content-Type: application/json" -d '{"prompt":"This is a template prompt"}' | jq '.' +curl -X POST http://${HOST_IP}:16010/v1/chatqna/prompt \ + -H "Content-Type: application/json" \ + -d '{"prompt": "This is a custom prompt template"}' | jq '.' +``` + +### Upload system prompt from file + +```bash +curl -X POST http://${HOST_IP}:16010/v1/chatqna/prompt-file \ + -H "Content-Type: multipart/form-data" \ + -F "file=@your_prompt_file.txt" | jq '.' +``` + +### Reset system prompt to default + +```bash +curl -X POST http://${HOST_IP}:16010/v1/chatqna/prompt/reset \ + -H "Content-Type: application/json" | jq '.' +``` + +--- + +## Data Management + +### Get all nodes (chunks) in the active knowledge base + +```bash +curl -X GET http://${HOST_IP}:16010/v1/data/nodes \ + -H "Content-Type: application/json" | jq '.' +``` + +### Get nodes by document name + +```bash +curl -X GET "http://${HOST_IP}:16010/v1/data/{document_name}/nodes" \ + -H "Content-Type: application/json" | jq '.' +``` + +### Get all document names in the active knowledge base + +```bash +curl -X GET http://${HOST_IP}:16010/v1/data/documents \ + -H "Content-Type: application/json" | jq '.' +``` + +### Get all files + +```bash +curl -X GET http://${HOST_IP}:16010/v1/data/files \ + -H "Content-Type: application/json" | jq '.' +``` + +### Get a specific file + +```bash +curl -X GET http://${HOST_IP}:16010/v1/data/files/{name} \ + -H "Content-Type: application/json" | jq '.' +``` + +### Upload a file (from UI) + +```bash +curl -X POST "http://${HOST_IP}:16010/v1/data/file/{file_name}" \ + -H "Content-Type: multipart/form-data" \ + -F "file=@/path/to/your/document.pdf" | jq '.' ``` -### Reset system prompt +--- + +## Session Management + +### Get all sessions ```bash -curl -X POST http://${HOST_IP}:16010/v1/chatqna/prompt/reset -H "Content-Type: application/json" | jq '.' +curl -X GET http://${HOST_IP}:16010/v1/sessions \ + -H "Content-Type: application/json" | jq '.' ``` -### Use custom system prompt file +### Get a session by ID + +```bash +curl -X GET http://${HOST_IP}:16010/v1/session/{session_id} \ + -H "Content-Type: application/json" | jq '.' +``` + +--- + +## System Information + +### Get system status + +Returns CPU usage, memory usage, disk usage, OS info, and current time. ```bash -curl -X POST http://${HOST_IP}:16010/v1/chatqna/prompt-file -H "Content-Type: multipart/form-data" -F "file=@your_prompt_file.txt" +curl -X GET http://${HOST_IP}:16010/v1/system/info \ + -H "Content-Type: application/json" | jq '.' ``` +### Get available inference devices + +Returns OpenVINO-available devices (e.g., CPU, GPU, AUTO). + +```bash +curl -X GET http://${HOST_IP}:16010/v1/system/device \ + -H "Content-Type: application/json" | jq '.' +``` + +--- + ## ChatQnA ### Retrieval API +Retrieve relevant context chunks from the active knowledge base without running LLM generation. + +```bash +curl -X POST http://${HOST_IP}:16010/v1/retrieval \ + -H "Content-Type: application/json" \ + -d '{"messages": "Your question here", "top_n": 5, "max_tokens": 512}' | jq '.' +``` + +### ChatQnA API (Mega Service) + +Send a question through the full RAG pipeline (retrieval + LLM generation). + +```bash +curl -X POST http://${HOST_IP}:16011/v1/chatqna \ + -H "Content-Type: application/json" \ + -d '{"messages": "Your question here", "top_n": 5, "max_tokens": 512}' | jq '.' +``` + +### RAGQnA API (with contexts in response) + +Returns the LLM answer together with the retrieved context chunks. + ```bash -curl -X POST http://${HOST_IP}:16010/v1/retrieval -H "Content-Type: application/json" -d '{"messages":"#Please enter the question you need to retrieve here#", "top_n":5, "max_tokens":512}' | jq '.' +# Non-streaming +curl -X POST http://${HOST_IP}:16010/v1/ragqna \ + -H "Content-Type: application/json" \ + -d '{"messages": "Your question here", "top_n": 5, "max_tokens": 512, "stream": false}' | jq '.' +# Streaming +curl -X POST http://${HOST_IP}:16010/v1/ragqna \ + -H "Content-Type: application/json" \ + -d '{"messages": "Your question here", "top_n": 5, "max_tokens": 512, "stream": true}' ``` -### ChatQnA API +### Check vLLM server connection ```bash -curl -X POST http://${HOST_IP}:16011/v1/chatqna -H "Content-Type: application/json" -d '{"messages":"#REPLACE WITH YOUR QUESTION HERE#", "top_n":5, "max_tokens":512}' | jq '.' +curl -X POST http://${HOST_IP}:16010/v1/check/vllm \ + -H "Content-Type: application/json" \ + -d '{"server_address": "http://localhost:8086", "model_name": "Qwen/Qwen3-8B"}' | jq '.' ``` diff --git a/EdgeCraftRAG/docs/Advanced_Setup.md b/EdgeCraftRAG/docs/Advanced_Setup.md index 4d17e28983..3ac7fa5f91 100644 --- a/EdgeCraftRAG/docs/Advanced_Setup.md +++ b/EdgeCraftRAG/docs/Advanced_Setup.md @@ -1,34 +1,71 @@ # Edge Craft Retrieval-Augmented Generation Advanced Setup -## OpenVINO Local Inference +[中文版](Advanced_Setup_zh.md) -EC-RAG support using local OpenVINO models to do inference, please follow below steps to run local inference: +## Build Docker Images for Mega Service, Server and UI by your own -### 1. (Optional) Build Docker Images for Mega Service, Server and UI by your own - -**All the docker images can be automatically‌ pulled**, If you want to build the images by your own, please follow the steps: +**All the docker images can be automatically‌ pulled**, if you want to build the images by your own, please use helper script: ```bash -git clone https://github.com/opea-project/GenAIExamples.git -cd GenAIExamples/EdgeCraftRAG -docker build --no-cache --pull --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --build-arg no_proxy="$no_proxy" -t opea/edgecraftrag:latest -f Dockerfile . -docker build --no-cache --pull --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --build-arg no_proxy="$no_proxy" -t opea/edgecraftrag-server:latest -f Dockerfile.server . -docker build --no-cache --pull --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --build-arg no_proxy="$no_proxy" -t opea/edgecraftrag-ui:latest -f ui/docker/Dockerfile.ui . +# Build all images +./tools/build_images.sh + +# Build a single image +./tools/build_images.sh mega +./tools/build_images.sh server +./tools/build_images.sh ui + +# Build multiple selected images +./tools/build_images.sh mega server ``` -### 2. Prepare models +## Manual deployment details for Arc platform + +### Prepare models + +There are 3 models need to be prepared: **Embedding**, **Reranking**, **LLM** +You'll need to decide the inferencing framework for these models. + +#### Embedding and Reranking + +Embedding and reranking are usually servered by local OpenVINO inferencing, to prepare these 2 models: ```bash -# Prepare models for embedding, reranking and generation, you can also choose other OpenVINO optimized models -export MODEL_PATH="${PWD}/ov_models" # Your model path for embedding, reranking and LLM models +# Prepare models for embedding, reranking: +export MODEL_PATH="${PWD}/models" # Your model path for embedding, reranking and LLM models mkdir -p $MODEL_PATH pip install --upgrade --upgrade-strategy eager "optimum[openvino]" optimum-cli export openvino -m BAAI/bge-small-en-v1.5 ${MODEL_PATH}/BAAI/bge-small-en-v1.5 --task sentence-similarity optimum-cli export openvino -m BAAI/bge-reranker-large ${MODEL_PATH}/BAAI/bge-reranker-large --task text-classification +``` + +#### LLM + +##### openVINO + +If you have Core Ultra platform only, please prepare openVINO models: +You can also run openVINO models on discrete GPU. + +```bash +# Prepare LLM model for openVINO optimum-cli export openvino --model Qwen/Qwen3-8B ${MODEL_PATH}/Qwen/Qwen3-8B/INT4_compressed_weights --task text-generation-with-past --weight-format int4 --group-size 128 --ratio 0.8 ``` -### 3. Prepare env variables and configurations +##### vLLM + +Alternatively, if you have discrete GPU and want to use vLLM, please prepare models for vLLM: + +```bash +# Prepare LLM model for vLLM +export LLM_MODEL="Qwen/Qwen3-8B" # Your model id +pip install modelscope +modelscope download --model $LLM_MODEL --local_dir "${MODEL_PATH}/${LLM_MODEL}" +# Optionally, you can also download models with huggingface: +# pip install -U huggingface_hub +# huggingface-cli download $LLM_MODEL --local-dir "${MODEL_PATH}/${LLM_MODEL}" +``` + +### Prepare env variables and configurations ```bash ip_address=$(hostname -I | awk '{print $1}') @@ -48,12 +85,12 @@ export NO_PROXY=${NO_PROXY},${HOST_IP},edgecraftrag,edgecraftrag-server # Make sure all 3 folders have 1000:1000 permission, otherwise export DOC_PATH=${PWD}/tests export TMPFILE_PATH=${PWD}/tests -chown 1000:1000 -R ${MODEL_PATH} ${DOC_PATH} ${TMPFILE_PATH} +chown 1000:1000 ${MODEL_PATH} ${DOC_PATH} ${TMPFILE_PATH} # In addition, also make sure the .cache folder has 1000:1000 permission, otherwise chown 1000:1000 -R $HOME/.cache ``` -### 4. Start Edge Craft RAG Services with Docker Compose +Set Milvus DB and chat history round for inference: ```bash # EC-RAG support Milvus as persistent database, by default milvus is disabled, you can choose to set MILVUS_ENABLED=1 to enable it @@ -63,25 +100,48 @@ export MILVUS_ENABLED=0 # EC-RAG support chat history round setting, by default chat history is disabled, you can set CHAT_HISTORY_ROUND to control it # export CHAT_HISTORY_ROUND= # change to your preference +``` + +### Deploy the Service on Intel GPU Using Docker Compose -# EC-RAG support pipeline performance benchmark, use ENABLE_BENCHMARK=true/false to turn on/off benchmark -# export ENABLE_BENCHMARK= # change to your preference +#### Option a. Deploy openVINO LLM based EC-RAG for Core Ultra, Arc B60, Arc A770 -export MAX_MODEL_LEN=5000 -# Launch EC-RAG service with compose +```bash docker compose -f docker_compose/intel/gpu/arc/compose.yaml up -d ``` +#### Option b.1. Deploy vLLM based EC-RAG for Arc B60 + +```bash +docker compose --profile b60 -f docker_compose/intel/gpu/arc/compose.yaml up -d +``` + +#### Option b.2. Deploy vLLM based EC-RAG for Arc A770 + +```bash +docker compose --profile a770 -f docker_compose/intel/gpu/arc/compose.yaml up -d +``` + +### 6. Cleanup the Deployment (Manual) + +To stop the containers associated with the deployment, execute the following command: + +```bash +docker compose -f docker_compose/intel/gpu/arc/compose.yaml down +``` + +All the EdgeCraftRAG containers will be stopped and then removed on completion of the `down` command. + ## EC-RAG with Kbadmin EC-RAG support kbadmin as a knowledge base manager Please make sure all the kbadmin services have been launched -EC-RAG Docker Images preparation is the same as local inference section, please refer to [Build Docker Images](#1-optional-build-docker-images-for-mega-service-server-and-ui-by-your-own) -Model preparation is the same as vLLM inference section, please refer to [Prepare models](../docker_compose/intel/gpu/arc/README.md#2-prepare-models) +EC-RAG Docker Images preparation is the same as local inference section, please refer to [Build Docker Images](#build-docker-images-for-mega-service-server-and-ui-by-your-own) +Model preparation is the same as Arc manual deployment section, please refer to [Prepare models](#prepare-models) ### 1. Start Edge Craft RAG Services with Docker Compose -This section is the same as default vLLM inference section, please refer to [Prepare env variables and configurations](../docker_compose/intel/gpu/arc/README.md#prepare-env-variables-and-configurations) and [Start Edge Craft RAG Services with Docker Compose](../docker_compose/intel/gpu/arc/README.md#deploy-the-service-on-arc-a770-using-docker-compose) +This section is the same as Arc manual deployment section, please refer to [Prepare env variables and configurations](#prepare-env-variables-and-configurations) and [Deploy the Service on Intel GPU Using Docker Compose](#deploy-the-service-on-intel-gpu-using-docker-compose) ### 2. Access Kbadmin UI @@ -92,36 +152,12 @@ please refer to [ChatQnA with Kbadmin in UI](./Explore_Edge_Craft_RAG.md#chatqna In this sample, we will use Qwen3-30B-A3B deployment on 4 Arc B60 GPUs as an example. Before started, please prepare models into MODEL_PATH and prepare docker images -### Prepare env variables and configurations - ```bash export MODEL_PATH="${PWD}/models" # Your model path export LLM_MODEL="Qwen/Qwen3-30B-A3B" -ip_address=$(hostname -I | awk '{print $1}') -# Use `ip a` to check your active ip -export HOST_IP=$ip_address # Your host ip - -# Check group id of video and render -export VIDEOGROUPID=$(getent group video | cut -d: -f3) -export RENDERGROUPID=$(getent group render | cut -d: -f3) +pip install modelscope +modelscope download --model $LLM_MODEL --local_dir "${MODEL_PATH}/${LLM_MODEL}" -# If you have a proxy configured, execute below line -export no_proxy=${no_proxy},${HOST_IP},edgecraftrag,edgecraftrag-server -export NO_PROXY=${NO_PROXY},${HOST_IP},edgecraftrag,edgecraftrag-server -# If you have a HF mirror configured, it will be imported to the container -# export HF_ENDPOINT=https://hf-mirror.com # your HF mirror endpoint" - -# Make sure all 3 folders have 1000:1000 permission, otherwise -export DOC_PATH=${PWD}/tests -export TMPFILE_PATH=${PWD}/tests -chown 1000:1000 ${MODEL_PATH} ${DOC_PATH} ${TMPFILE_PATH} -# In addition, also make sure the .cache folder has 1000:1000 permission, otherwise -chown 1000:1000 -R $HOME/.cache -``` - -### Deploy the Service on Arc B60 Using Docker Compose - -```bash # vLLM envs export TP=4 # for multi GPU, you can change TP value export ZE_AFFINITY_MASK=0,1,2,3 # for multi GPU, you can export ZE_AFFINITY_MASK=0,1,2... diff --git a/EdgeCraftRAG/docs/Advanced_Setup_zh.md b/EdgeCraftRAG/docs/Advanced_Setup_zh.md new file mode 100644 index 0000000000..a1a870355f --- /dev/null +++ b/EdgeCraftRAG/docs/Advanced_Setup_zh.md @@ -0,0 +1,165 @@ +# Edge Craft 检索增强生成(EC-RAG)高级部署说明 + +[English](Advanced_Setup.md) + +## 自行构建 Mega Service、Server 和 UI 镜像 + +**所有 Docker 镜像都可以自动拉取**。如果你希望自行构建镜像,请使用辅助脚本: + +```bash +# 构建全部镜像 +./tools/build_images.sh + +# 单独构建某个镜像 +./tools/build_images.sh mega +./tools/build_images.sh server +./tools/build_images.sh ui + +# 组合构建多个镜像 +./tools/build_images.sh mega server +``` + +## Arc 平台手动部署详细说明 + +### 准备模型 + +需要准备 3 类模型:**Embedding**、**Reranking**、**LLM**。 +你需要根据场景选择对应推理框架。 + +#### Embedding 与 Reranking + +Embedding 与 Reranking 通常由本地 OpenVINO 推理提供,可按如下方式准备: + +```bash +# 准备 embedding、reranking 模型: +export MODEL_PATH="${PWD}/models" # embedding、reranking、LLM 模型目录 +mkdir -p $MODEL_PATH +pip install --upgrade --upgrade-strategy eager "optimum[openvino]" +optimum-cli export openvino -m BAAI/bge-small-en-v1.5 ${MODEL_PATH}/BAAI/bge-small-en-v1.5 --task sentence-similarity +optimum-cli export openvino -m BAAI/bge-reranker-large ${MODEL_PATH}/BAAI/bge-reranker-large --task text-classification +``` + +#### LLM + +##### openVINO + +如果仅使用 Core Ultra 平台,请准备 openVINO 模型: +你也可以在独立 GPU 上运行 openVINO 模型。 + +```bash +# 准备 openVINO 的 LLM 模型 +optimum-cli export openvino --model Qwen/Qwen3-8B ${MODEL_PATH}/Qwen/Qwen3-8B/INT4_compressed_weights --task text-generation-with-past --weight-format int4 --group-size 128 --ratio 0.8 +``` + +##### vLLM + +如果你有独立 GPU 并希望使用 vLLM,可按如下方式准备模型: + +```bash +# 准备 vLLM 的 LLM 模型 +export LLM_MODEL="Qwen/Qwen3-8B" # 模型 ID +pip install modelscope +modelscope download --model $LLM_MODEL --local_dir "${MODEL_PATH}/${LLM_MODEL}" +# 可选:也可以使用 huggingface 下载模型: +# pip install -U huggingface_hub +# huggingface-cli download $LLM_MODEL --local-dir "${MODEL_PATH}/${LLM_MODEL}" +``` + +### 准备环境变量与配置 + +```bash +ip_address=$(hostname -I | awk '{print $1}') +# 可使用 `ip a` 查看当前活动 ip +export HOST_IP=$ip_address # 主机 IP + +# 获取 video / render 组 ID +export VIDEOGROUPID=$(getent group video | cut -d: -f3) +export RENDERGROUPID=$(getent group render | cut -d: -f3) + +# 如果你配置了代理,请执行以下命令 +export no_proxy=${no_proxy},${HOST_IP},edgecraftrag,edgecraftrag-server +export NO_PROXY=${NO_PROXY},${HOST_IP},edgecraftrag,edgecraftrag-server +# 如果你配置了 HF 镜像,会被注入容器 +# export HF_ENDPOINT=https://hf-mirror.com # 你的 HF 镜像地址 + +# 确保以下 3 个目录权限为 1000:1000 +export DOC_PATH=${PWD}/tests +export TMPFILE_PATH=${PWD}/tests +chown 1000:1000 ${MODEL_PATH} ${DOC_PATH} ${TMPFILE_PATH} +# 同时确保 .cache 目录权限为 1000:1000 +chown 1000:1000 -R $HOME/.cache +``` + +设置 Milvus 和聊天历史轮数: + +```bash +# EC-RAG 支持 Milvus 持久化数据库,默认关闭;设置 MILVUS_ENABLED=1 可开启 +export MILVUS_ENABLED=0 +# 启用 Milvus 时,默认存储路径为 PWD,如需修改请取消注释: +# export DOCKER_VOLUME_DIRECTORY= # 按需修改 + +# EC-RAG 支持聊天历史轮数,默认关闭;可通过 CHAT_HISTORY_ROUND 控制 +# export CHAT_HISTORY_ROUND= # 按需修改 +``` + +### 使用 Docker Compose 在 Intel GPU 上部署服务 + +#### 选项 a:为 Core Ultra / Arc B60 / Arc A770 部署基于 openVINO LLM 的 EC-RAG + +```bash +docker compose -f docker_compose/intel/gpu/arc/compose.yaml up -d +``` + +#### 选项 b.1:为 Arc B60 部署基于 vLLM 的 EC-RAG + +```bash +docker compose --profile b60 -f docker_compose/intel/gpu/arc/compose.yaml up -d +``` + +#### 选项 b.2:为 Arc A770 部署基于 vLLM 的 EC-RAG + +```bash +docker compose --profile a770 -f docker_compose/intel/gpu/arc/compose.yaml up -d +``` + +### 6. 清理部署(手动) + +若要停止本次部署相关容器,请执行: + +```bash +docker compose -f docker_compose/intel/gpu/arc/compose.yaml down +``` + +执行 `down` 后,EdgeCraftRAG 相关容器将停止并移除。 + +## EC-RAG 与 Kbadmin + +EC-RAG 支持 kbadmin 作为知识库管理器。 +请先确保所有 kbadmin 服务已启动。 +EC-RAG 镜像准备与本地推理章节一致,请参考 [自行构建镜像](#自行构建-mega-serviceserver-和-ui-镜像)。 +模型准备与 Arc 手动部署章节一致,请参考 [准备模型](#准备模型)。 + +### 1. 使用 Docker Compose 启动 Edge Craft RAG 服务 + +此部分与 Arc 手动部署一致,请参考 [准备环境变量与配置](#准备环境变量与配置) 和 [使用 Docker Compose 在 Intel GPU 上部署服务](#使用-docker-compose-在-intel-gpu-上部署服务)。 + +### 2. 访问 Kbadmin UI + +请参考 [ChatQnA with Kbadmin in UI](./Explore_Edge_Craft_RAG.md#chatqna-with-kbadmin-in-ui)。 + +## 在多 GPU 上部署 EC-RAG + +本示例以 4 张 Arc B60 部署 Qwen3-30B-A3B 为例。 +开始前请先准备模型到 `MODEL_PATH` 并准备好 Docker 镜像。 + +```bash +export MODEL_PATH="${PWD}/models" # 模型路径 +export LLM_MODEL="Qwen/Qwen3-30B-A3B" +pip install modelscope +modelscope download --model $LLM_MODEL --local_dir "${MODEL_PATH}/${LLM_MODEL}" + +# vLLM 环境变量 +export TP=4 # 多卡时可按需调整 TP +export ZE_AFFINITY_MASK=0,1,2,3 # 多卡时可按需调整 +docker compose --profile b60 -f docker_compose/intel/gpu/arc/compose.yaml up -d +``` diff --git a/EdgeCraftRAG/docs/Agentic_RAG_Guide.md b/EdgeCraftRAG/docs/Agentic_RAG_Guide.md index c499c8ede1..b353b7e990 100644 --- a/EdgeCraftRAG/docs/Agentic_RAG_Guide.md +++ b/EdgeCraftRAG/docs/Agentic_RAG_Guide.md @@ -1,5 +1,7 @@ # EC-RAG Agent Guide +[中文版](Agentic_RAG_Guide_zh.md) + This guide explains how to enable, configure, and use Agent capabilities within EdgeCraftRAG (EC-RAG). It covers the creation and management of different agent types to enhance retrieval and generation workflows.(Currently, EC-RAG Agent is an experimental option.) ## Overview diff --git a/EdgeCraftRAG/docs/Agentic_RAG_Guide_Zh.md b/EdgeCraftRAG/docs/Agentic_RAG_Guide_zh.md similarity index 99% rename from EdgeCraftRAG/docs/Agentic_RAG_Guide_Zh.md rename to EdgeCraftRAG/docs/Agentic_RAG_Guide_zh.md index 5862a3f448..3c9802288d 100644 --- a/EdgeCraftRAG/docs/Agentic_RAG_Guide_Zh.md +++ b/EdgeCraftRAG/docs/Agentic_RAG_Guide_zh.md @@ -1,5 +1,7 @@ # EC-RAG智能体指南 +[English](Agentic_RAG_Guide.md) + 目前,EC-RAG智能体将作为试用版功能开放体验。 本指南将为您介绍如何在 EdgeCraftRAG (EC-RAG) 中启用、配置和使用智能体(Agent)功能,涵盖不同类型智能体的创建和管理。 diff --git a/EdgeCraftRAG/docs/Experience_Guide.md b/EdgeCraftRAG/docs/Experience_Guide.md index 4be0dcfeb8..8d31242b1e 100644 --- a/EdgeCraftRAG/docs/Experience_Guide.md +++ b/EdgeCraftRAG/docs/Experience_Guide.md @@ -1,5 +1,7 @@ # EC-RAG Experience Guide +[中文版](Experience_Guide_zh.md) + This guide explains how to enable, configure, and use Experience capabilities within EdgeCraftRAG (EC-RAG). It covers the creation and management of experience to enhance the answer quality ## Overview diff --git a/EdgeCraftRAG/docs/Experience_Guide_Zh.md b/EdgeCraftRAG/docs/Experience_Guide_zh.md similarity index 98% rename from EdgeCraftRAG/docs/Experience_Guide_Zh.md rename to EdgeCraftRAG/docs/Experience_Guide_zh.md index 8c39ad6380..2a3602f61b 100644 --- a/EdgeCraftRAG/docs/Experience_Guide_Zh.md +++ b/EdgeCraftRAG/docs/Experience_Guide_zh.md @@ -1,5 +1,7 @@ # EC-RAG 经验注入指南 +[English](Experience_Guide.md) + 本指南将为您介绍如何在 EdgeCraftRAG (EC-RAG) 中启用、配置和使用经验注入(Experience)功能,涵盖 Experience 的创建和管理,旨在提升您使用EC-RAG时的回答质量。 ## 概述 diff --git a/EdgeCraftRAG/docs/Explore_Edge_Craft_RAG.md b/EdgeCraftRAG/docs/Explore_Edge_Craft_RAG.md index 0a5a91ba5e..9cebfcb5b2 100644 --- a/EdgeCraftRAG/docs/Explore_Edge_Craft_RAG.md +++ b/EdgeCraftRAG/docs/Explore_Edge_Craft_RAG.md @@ -1,55 +1,53 @@ # Explore Edge Craft RAG +[中文版](Explore_Edge_Craft_RAG_zh.md) + ## ChatQnA with LLM Example in UI ### Create Pipeline To create a default pipeline, you need to click the `Create Pipeline` button in the `Pipeline Setting` page. -![create_pipeline](../assets/img/create_pipeline.png) - -Then follow the pipeline create guide in UI to set your pipeline, please note that in `Indexer Type` you can set MilvusVector as indexer(Please make sure Milvus is enabled before set MilvusVector as indexer, you can refer to [Enable Milvus](../docker_compose/intel/gpu/arc/README.md#deploy-the-service-using-docker-compose)). -if choosing MilvusVector, you need to verify vector uri first, please input 'Your_IP:milvus_port' then click `Test` button. Note that milvus_port is 19530 -![milvus](../assets/img/milvus.png) +![alt text](../assets/img/Explore_Edge_Craft_RAG_01.jpg) Then, In ` Generator` config page, choose LLM Inference Type to `Vllm`. In `Large Language Model` field, input your LLM_MODEL name, e.g. 'Qwen/Qwen3-8B'. In `Vllm Url` field, you need to input 'IP:vllm_port' then click `Test` button. Note that defaultly vllm_port is '8086' (Note if the test fails, it might be because vLLM service not ready yet, you can wait for 30s and try again) -![generator](../assets/img/generator.png) +![alt text](../assets/img/Explore_Edge_Craft_RAG_02.jpg) You can also create multiple pipelines or update/remove existing pipelines through the `Operation` field, but please note that active pipelines cannot be updated. -![pipeline_operation](../assets/img/pipeline_operation.png) +![alt text](../assets/img/Explore_Edge_Craft_RAG_03.jpg) ### Upload files & ChatQnA After the pipeline creation, you can go to `Knowledge Base` page and click `Create Knowledge Base` button to create your knowledge base. -![upload_data](../assets/img/kb_create.png) +![alt text](../assets/img/Explore_Edge_Craft_RAG_04.jpg) + +Then follow the knowledge base creation guide in UI to set your knowledge base, please note that in `Indexer Type` you can set MilvusVector as indexer(Please make sure Milvus is enabled before set MilvusVector as indexer, you can refer to [Enable Milvus](Advanced_Setup.md#deploy-the-service-on-intel-gpu-using-docker-compose)). +if choosing MilvusVector, you need to verify vector uri first, please input 'Your_IP:milvus_port' then click `Test` button. Note that milvus_port is 19530 +![alt text](../assets/img/Explore_Edge_Craft_RAG_05.jpg) When creating Knowledge base, please choose `Activated` option, since only the files in activated Knowledge base can be retrieved in ChatQnA -![upload_data](../assets/img/kb.png) +![alt text](../assets/img/Explore_Edge_Craft_RAG_06.jpg) After knowledge base creation, you can upload the documents for retrieval. -![upload_data](../assets/img/upload_file.png) +![alt text](../assets/img/Explore_Edge_Craft_RAG_07.jpg) Then, you can submit messages in the chat box in `Chat` page. -![chat_with_rag](../assets/img/chatqna.png) +![alt text](../assets/img/Explore_Edge_Craft_RAG_08.jpg) ## ChatQnA with Kbadmin in UI -### Kbadmin Pipeline - -In the `Node Parser` page, select 'kbadmin' as node parser. -![kbadmin_node_parser](../assets/img/kbadmin_type.png) +### Kbadmin Knowledge Base -In the `Indexer` page, input embedding and vector DB information, please note the embedding service port is 13020; the vector DB port is 29530. -![kbadmin_indexer](../assets/img/kbadmin_index.png) +Go to `Knowledge Base` page and click `Create Knowledge Base` button to create your knowledge base. +Please select 'kbadmin' in `Type`and select kb name from the kbs you created in kbadmin UI page. Loading kb name might be slow ,please wait with patient -### Upload files & ChatQnA +![alt text](../assets/img/Explore_Edge_Craft_RAG_09.png) -After the pipeline creation, you can go to `Knowledge Base` page and click `Create Knowledge Base` button to create your knowledge base. -Please select 'kbadmin' in `Type`and select kb name from the kbs you created in kbadmin UI page. +Ten you can select embedding information in 'Indexer' page -![upload_data](../assets/img/kbadmin_kb.png) +![alt text](../assets/img/Explore_Edge_Craft_RAG_10.png) -Then, you can submit messages in the chat box in `Chat` page. -![chat_with_rag](../assets/img/chatqna.png) +After creation, you can see kbadmin tag in knowledge base then you can submit messages in the chat box in `Chat` page. +![alt text](../assets/img/Explore_Edge_Craft_RAG_11.png) diff --git a/EdgeCraftRAG/docs/Explore_Edge_Craft_RAG_zh.md b/EdgeCraftRAG/docs/Explore_Edge_Craft_RAG_zh.md new file mode 100644 index 0000000000..f01bb4ef47 --- /dev/null +++ b/EdgeCraftRAG/docs/Explore_Edge_Craft_RAG_zh.md @@ -0,0 +1,53 @@ +# 探索 Edge Craft RAG + +[English](Explore_Edge_Craft_RAG.md) + +## 在 UI 中使用 LLM 进行 ChatQnA 示例 + +### 创建流水线 + +要创建默认流水线,请在 `Pipeline Setting`(流水线设置)页面点击 `Create Pipeline` 按钮。 +![alt text](../assets/img/Explore_Edge_Craft_RAG_01.jpg) + +然后,在 `Generator`(生成器)配置页面,将 LLM Inference Type 选择为 `Vllm`。 +在 `Large Language Model` 字段中输入您的 LLM 模型名称,例如 `Qwen/Qwen3-8B`。 +在 `Vllm Url` 字段中输入 `IP:vllm_port`,然后点击 `Test` 按钮。注意 vllm_port 默认为 `8086`。 +(注意:如果测试失败,可能是因为 vLLM 服务尚未就绪,可等待 30 秒后重试。) +![alt text](../assets/img/Explore_Edge_Craft_RAG_02.jpg) + +您也可以通过 `Operation` 字段创建多条流水线或更新/删除已有流水线,但请注意处于激活状态的流水线无法被更新。 +![alt text](../assets/img/Explore_Edge_Craft_RAG_03.jpg) + +### 上传文件与 ChatQnA + +流水线创建完成后,前往 `Knowledge Base`(知识库)页面,点击 `Create Knowledge Base` 按钮创建知识库。 +![alt text](../assets/img/Explore_Edge_Craft_RAG_04.jpg) + +然后按照 UI 中的知识库创建向导进行配置。请注意,在 `Indexer Type`(索引器类型)中可以选择 MilvusVector 作为索引器(请确保在选择 MilvusVector 之前已启用 Milvus,可参考 [启用 Milvus](Advanced_Setup_zh.md#使用-docker-compose-在-intel-gpu-上部署服务))。 +如果选择 MilvusVector,需要先验证向量数据库 URI,请输入 `Your_IP:milvus_port`,然后点击 `Test` 按钮。注意 milvus_port 默认为 19530。 +![alt text](../assets/img/Explore_Edge_Craft_RAG_05.jpg) + +创建知识库时,请选择 `Activated`(激活)选项,只有处于激活状态的知识库中的文件才能在 ChatQnA 中被检索。 +![alt text](../assets/img/Explore_Edge_Craft_RAG_06.jpg) + +知识库创建完成后,即可上传用于检索的文档。 +![alt text](../assets/img/Explore_Edge_Craft_RAG_07.jpg) + +然后,在 `Chat` 页面的聊天框中提交您的问题。 +![alt text](../assets/img/Explore_Edge_Craft_RAG_08.jpg) + +## 在 UI 中使用 Kbadmin 进行 ChatQnA + +### Kbadmin 知识库 + +流水线创建完成后,前往 `Knowledge Base` 页面,点击 `Create Knowledge Base` 按钮创建知识库。 +请在 `Type` 中选择 `kbadmin`,并从 kbadmin UI 页面中已创建的知识库列表中选择 kb 名称。加载kb名称可能比较耗时,请耐心等待。 + +![alt text](../assets/img/Explore_Edge_Craft_RAG_09.png) + +在 `Indexer` 页面,填写 Embedding 服务和向量数据库信息,注意 Embedding 服务端口为 13020,向量数据库端口为 29530。 + +![alt text](../assets/img/Explore_Edge_Craft_RAG_10.png) + +然后,在 `Chat` 页面的聊天框中提交您的问题。 +![alt text](../assets/img/Explore_Edge_Craft_RAG_11.png) diff --git a/EdgeCraftRAG/docs/Quick_Start_Guide_Zh.md b/EdgeCraftRAG/docs/Quick_Start_Guide_Zh.md deleted file mode 100644 index e500e3f34f..0000000000 --- a/EdgeCraftRAG/docs/Quick_Start_Guide_Zh.md +++ /dev/null @@ -1,104 +0,0 @@ -# 快速启动指南 - -## 1. 环境准备 - -EC-RAG支持通过vllm或本地OpenVINO在Intel Arc GPU上进行部署,其中vllm为系统默认方法。 - -具体环境和系统需求如下: - -硬件环境:Intel Arc A770 - -操作系统:Ubuntu server 22.04.1或更新的版本(至少需要6.2LTS kernel) - -驱动和库依赖:请参考[如何为client GPU安装驱动](https://dgpu-docs.intel.com/driver/client/overview.html) - -以下步骤均基于使用vllm作为推理引擎,如果您选择使用OpenVINO,可以参考[OpenVINO本地部署指南](Advanced_Setup.md#openvino-local-inference) - -## 2. 准备模型 - -您可以使用如下命令下载和准备所需的模型: - -```bash -git clone https://github.com/opea-project/GenAIExamples.git -cd GenAIExamples/EdgeCraftRAG -# 准备embedding和reranking的模型: -export MODEL_PATH="${PWD}/models" # 用于存储模型的路径 -mkdir -p $MODEL_PATH -pip install --upgrade --upgrade-strategy eager "optimum[openvino]" -optimum-cli export openvino -m BAAI/bge-small-en-v1.5 ${MODEL_PATH}/BAAI/bge-small-en-v1.5 --task sentence-similarity -optimum-cli export openvino -m BAAI/bge-reranker-large ${MODEL_PATH}/BAAI/bge-reranker-large --task text-classification - -# 准备LLM模型 -export LLM_MODEL="Qwen/Qwen3-8B" # 所需的模型ID -pip install modelscope -modelscope download --model $LLM_MODEL --local_dir "${MODEL_PATH}/${LLM_MODEL}" -# 或者,您也可以通过huggingface下载模型: -# pip install -U huggingface_hub -# huggingface-cli download $LLM_MODEL --local-dir "${MODEL_PATH}/${LLM_MODEL}" -``` - -## 3. 准备环境变量和配置文件 - -下面的步骤默认使用单一Intel Arc GPU进行推理,如果您需要使用多GPU,请参考[多GPU搭建指南](Advanced_Setup.md#multi-arc-setup) - -### 为vllm部署准备环境变量 - -```bash -ip_address=$(hostname -I | awk '{print $1}') -# 使用`ip a`命令来查看ip -export HOST_IP=$ip_address - -# 查看并设定video和render的id -export VIDEOGROUPID=$(getent group video | cut -d: -f3) -export RENDERGROUPID=$(getent group render | cut -d: -f3) - -# 您可以使用如下命令配置代理: -# export no_proxy=${no_proxy},${HOST_IP},edgecraftrag,edgecraftrag-server -# export NO_PROXY=${NO_PROXY},${HOST_IP},edgecraftrag,edgecraftrag-server - -# 您可以使用如下命令配置HF镜像: -# export HF_ENDPOINT=https://hf-mirror.com # your HF mirror endpoint" - -# 请使用chown 1000:1000 ${MODEL_PATH} ${PWD}来确保三个文件都有1000:1000权限 -# 请使用chown 1000:1000 -R $HOME/.cache命令来确保.cache有1000:1000权限 -``` - -如果您希望对环境变量进行更高阶的配置,可以参考[vllm环境变量准备指南](Advanced_Setup.md#prepare-env-variables-for-vllm-deployment) - -### 生成nginx配置文件 - -```bash -export VLLM_SERVICE_PORT_0=8100 # vllm服务端口,可以自定义设置 - -# 生成nginx配置文件 -# nginx-conf-generator.sh脚本需要两个参数: DP_NUM 和 output filepath -bash nginx/nginx-conf-generator.sh 1 nginx/nginx.conf -# 设置 NGINX_CONFIG_PATH -export NGINX_CONFIG_PATH="${PWD}/nginx/nginx.conf" -``` - -## 4. 通过docker compose启动EC-RAG服务 - -```bash -# # EC-RAG 支持 Milvus 作为持久化数据库,可以通过设置START_MILVUS=1来打开,默认为关闭状态 -export MILVUS_ENABLED=0 -# 如果您启用了Milvus,默认的存储路径为PWD,可以使用下面的命令进行更改 -# export DOCKER_VOLUME_DIRECTORY= # change to your preference - -# 通过compose命令启动EC-RAG -docker compose -f docker_compose/intel/gpu/arc/compose_vllm.yaml up -d -``` - -## 5. 访问UI - -打开浏览器,访问http://${HOST_IP}:8082 - -> 注意:浏览器应该运行在和控制台同样的主机上,否则请用域名而不是${HOST_IP}访问UI。 - -如下为UI首界面,您可以通过[更多关于EC-RAG的信息](Explore_Edge_Craft_RAG.md)来查看更多关于UI和EC-RAG配置的操作方法。 - -![front_page](../assets/img/front_page.png) - -| **Deploy Method** | **LLM Engine** | **LLM Model** | **Hardware** | -| ----------------- | -------------- | ------------- | ------------ | -| Docker Compose | vLLM | Qwen3-8B | Intel Arc | diff --git a/EdgeCraftRAG/edgecraftrag/VERSION b/EdgeCraftRAG/edgecraftrag/VERSION index 0441e5d7aa..38076adb15 100644 --- a/EdgeCraftRAG/edgecraftrag/VERSION +++ b/EdgeCraftRAG/edgecraftrag/VERSION @@ -1 +1 @@ -26.01-Release +26.03-Dev diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/agent.py b/EdgeCraftRAG/edgecraftrag/api/v1/agent.py index 324e62ac7d..a7bf33ed4a 100644 --- a/EdgeCraftRAG/edgecraftrag/api/v1/agent.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/agent.py @@ -5,7 +5,6 @@ import os import time -from edgecraftrag.api.v1.knowledge_base import Synchronizing_vector_data from edgecraftrag.api_schema import AgentCreateIn from edgecraftrag.base import AgentType from edgecraftrag.config_repository import MilvusConfigRepository, save_agent_configurations @@ -17,7 +16,7 @@ # GET Agents -@agent_app.get(path="/v1/settings/agents") +@agent_app.get(path="/v1/agents") async def get_all_agents(): out = [] agents = ctx.get_agent_mgr().get_agents() @@ -39,7 +38,7 @@ async def get_all_agents(): # GET Agent -@agent_app.get(path="/v1/settings/agents/{name}") +@agent_app.get(path="/v1/agents/{name}") async def get_agent(name): agent = ctx.get_agent_mgr().get_agent_by_name(name) if agent: @@ -59,7 +58,7 @@ async def get_agent(name): # POST Agent -@agent_app.post(path="/v1/settings/agents") +@agent_app.post(path="/v1/agents") async def create_agent(request: AgentCreateIn, status_code=status.HTTP_201_CREATED): try: agent = ctx.get_agent_mgr().create_agent(request) @@ -71,7 +70,7 @@ async def create_agent(request: AgentCreateIn, status_code=status.HTTP_201_CREAT # PATCH Agent -@agent_app.patch(path="/v1/settings/agents/{name}") +@agent_app.patch(path="/v1/agents/{name}") async def update_agent(name, request: AgentCreateIn): try: agentmgr = ctx.get_agent_mgr() @@ -92,7 +91,7 @@ async def update_agent(name, request: AgentCreateIn): # DELETE Agent -@agent_app.delete(path="/v1/settings/agents/{name}", status_code=status.HTTP_204_NO_CONTENT) +@agent_app.delete(path="/v1/agents/{name}", status_code=status.HTTP_204_NO_CONTENT) async def delete_agent(name): try: agentmgr = ctx.get_agent_mgr() @@ -109,7 +108,7 @@ async def delete_agent(name): # GET Agent Type default configs -@agent_app.get(path="/v1/settings/agents/configs/{agent_type}") +@agent_app.get(path="/v1/agents/configs/{agent_type}") async def get_agent_default_configs(agent_type): try: if agent_type in [e.value for e in AgentType]: @@ -164,28 +163,24 @@ async def manage_agent_bound_pipeline(bound_pl_idx, request): # case3: deactivate agent, while bound pipeline **was** active -> do NOT deactivate bound pipeline, do nothing # case4: deactivate agent, while bound pipeline **was NOT** active -> deactivate bound pipeline, activate previous active pipeline if exists pl_manager = ctx.get_pipeline_mgr() - prev_active_pipeline = pl_manager.get_active_pipeline() - active_kb = ctx.knowledgemgr.get_active_knowledge_base() - kb_name = active_kb.name if active_kb else "default" + active_kbs = ctx.knowledgemgr.get_active_knowledge_base() + # TODO: update single kb with kbs + # kb_name = active_kbs.name if active_kb else "default" if request.active: - pl_manager.activate_pipeline(bound_pl_idx, request.active, ctx.get_node_mgr(), kb_name, cache_prev=True) + pl_manager.activate_pipeline(bound_pl_idx, request.active, active_kbs, cache_prev=True) else: # at deactivate, prev_active_pl can be 1.other pl/2.None/3.current bound_pl prev_active_pl = pl_manager.get_prev_active_pipeline_name() if prev_active_pl and prev_active_pl != bound_pl_idx: # 1, restore to the other pipeline activated - pl_manager.activate_pipeline(prev_active_pl, True, ctx.get_node_mgr(), kb_name) + pl_manager.activate_pipeline(prev_active_pl, True, active_kbs) elif not prev_active_pl: # 2, deactivate current bound pipeline, leave no active pipeline as before - pl_manager.activate_pipeline(bound_pl_idx, False, ctx.get_node_mgr(), kb_name) + pl_manager.activate_pipeline(bound_pl_idx, False, active_kbs) else: # 3, do nothing pass # when agent is deactivated, clear cached previous active pipeline pl_manager.clear_prev_active_pipeline_name() - - cur_active_pipeline = pl_manager.get_active_pipeline() - if prev_active_pipeline and cur_active_pipeline and prev_active_pipeline.idx != cur_active_pipeline.idx: - await Synchronizing_vector_data(prev_active_pipeline, cur_active_pipeline) diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py b/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py index 3eec57efa9..67357f2183 100644 --- a/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/chatqna.py @@ -4,7 +4,6 @@ import asyncio import json from concurrent.futures import ThreadPoolExecutor -from typing import List import requests from comps.cores.proto.api_protocol import ChatCompletionRequest @@ -23,9 +22,9 @@ @chatqna_app.post(path="/v1/retrieval") async def retrieval(request: ChatCompletionRequest): try: - active_kb = ctx.knowledgemgr.get_active_knowledge_base() - if active_kb: - request.user = active_kb + active_kbs = ctx.knowledgemgr.get_active_knowledge_base() + if active_kbs: + request.user = active_kbs else: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, @@ -48,8 +47,8 @@ async def chatqna(request: ChatCompletionRequest): sessionid = request.user ctx.get_session_mgr().set_current_session(sessionid) experience_kb = ctx.knowledgemgr.get_active_experience() - active_kb = ctx.knowledgemgr.get_active_knowledge_base() - request.user = active_kb if active_kb else None + active_kbs = ctx.knowledgemgr.get_active_knowledge_base() + request.user = active_kbs if active_kbs else None if experience_kb: request.tool_choice = "auto" if experience_kb.experience_active else "none" @@ -70,10 +69,10 @@ async def chatqna(request: ChatCompletionRequest): request.model = generator.model_id if request.stream: - run_pipeline_gen, contexts = await ctx.get_pipeline_mgr().run_pipeline(chat_request=request) + run_pipeline_gen, _ = await ctx.get_pipeline_mgr().run_pipeline(chat_request=request) return StreamingResponse(save_session(sessionid, run_pipeline_gen), media_type="text/plain") else: - ret, contexts = await ctx.get_pipeline_mgr().run_pipeline(chat_request=request) + ret, _ = await ctx.get_pipeline_mgr().run_pipeline(chat_request=request) ctx.get_session_mgr().save_current_message(sessionid, "assistant", str(ret)) return str(ret) diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/data.py b/EdgeCraftRAG/edgecraftrag/api/v1/data.py index 9d5472f105..a72d727584 100644 --- a/EdgeCraftRAG/edgecraftrag/api/v1/data.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/data.py @@ -5,6 +5,7 @@ import os from typing import List +from edgecraftrag.api.v1.knowledge_base import add_file_to_knowledge_base from edgecraftrag.api_schema import DataIn, FilesIn from edgecraftrag.config_repository import MilvusConfigRepository from edgecraftrag.context import ctx @@ -14,55 +15,19 @@ data_app = FastAPI() -# Upload a text or files -@data_app.post(path="/v1/data") -async def add_data(request: DataIn): - pl = ctx.get_pipeline_mgr().get_active_pipeline() - docs = [] - if request.text is not None: - docs.extend(ctx.get_file_mgr().add_text(text=request.text)) - if request.local_path is not None: - docs.extend(ctx.get_file_mgr().add_files(docs=request.local_path)) - - nodelist = await ctx.get_pipeline_mgr().run_data_prepare(docs=docs) - if pl.indexer.comp_subtype != "kbadmin_indexer": - if nodelist is None or len(nodelist) == 0: - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="File not found") - ctx.get_node_mgr().add_nodes(pl.node_parser.idx, nodelist) - return "Done" - - -# Reindex all files -@data_app.post(path="/v1/data/reindex") -async def redindex_data(): - pl = ctx.get_pipeline_mgr().get_active_pipeline() - ctx.get_node_mgr().del_nodes_by_np_idx(pl.node_parser.idx) - - pl.indexer.reinitialize_indexer() - pl.update_indexer_to_retriever() - - all_docs = ctx.get_file_mgr().get_all_docs() - nodelist = ctx.get_pipeline_mgr().run_data_prepare(docs=all_docs) - if nodelist is not None and len(nodelist) > 0: - ctx.get_node_mgr().add_nodes(pl.node_parser.idx, nodelist) - - return "Done" - - # Gets the current nodelist @data_app.get(path="/v1/data/nodes") async def get_nodes_with_kb(kb_name=None): node_lists = {} - active_pl = ctx.get_pipeline_mgr().get_active_pipeline() if kb_name: kb = ctx.get_knowledge_mgr().get_knowledge_base_by_name_or_id(kb_name) else: kb = ctx.get_knowledge_mgr().get_active_knowledge_base() - if active_pl.indexer.comp_subtype == "faiss_vector": - return active_pl.indexer.docstore.docs - elif active_pl.indexer.comp_subtype == "milvus_vector": - collection_name = kb.name + active_pl.name - Milvus_node_list = MilvusConfigRepository.create_connection(collection_name, 1, active_pl.indexer.vector_url) + if kb.indexer.comp_subtype == "faiss_vector": + return kb.indexer.docstore.docs + elif kb.indexer.comp_subtype == "milvus_vector": + collection_name = kb.name + Milvus_node_list = MilvusConfigRepository.create_connection(collection_name, 1, kb.indexer.vector_url) results = Milvus_node_list.get_configs(output_fields=["text", "_node_content", "doc_id"]) for node_list in results: text = node_list.get("text") @@ -71,7 +36,7 @@ async def get_nodes_with_kb(kb_name=None): node_content["text"] = text node_lists[node_content.get("id_")] = node_content return node_lists - node_list = ctx.get_node_mgr().get_nodes(active_pl.node_parser.idx) + node_list = ctx.get_node_mgr().get_nodes(kb.node_parser.idx) return node_list @@ -114,21 +79,6 @@ async def get_document_names(): return {"total_documents": len(documents), "documents": list(documents.values())} -# Upload files by a list of file_path -@data_app.post(path="/v1/data/files") -async def add_files(request: FilesIn): - docs = [] - if request.local_paths is not None: - docs.extend(ctx.get_file_mgr().add_files(docs=request.local_paths)) - - nodelist = ctx.get_pipeline_mgr().run_data_prepare(docs=docs) - if nodelist is None or len(nodelist) == 0: - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="File not found") - pl = ctx.get_pipeline_mgr().get_active_pipeline() - ctx.get_node_mgr().add_nodes(pl.node_parser.idx, nodelist) - return "Done" - - # GET files @data_app.get(path="/v1/data/files") async def get_files(): @@ -141,35 +91,9 @@ async def get_file_docs(name): return ctx.get_file_mgr().get_file_by_name_or_id(name) -# DELETE a file -@data_app.delete(path="/v1/data/files/{name}") -async def delete_file(name): - if ctx.get_file_mgr().del_file(name): - pl = ctx.get_pipeline_mgr().get_active_pipeline() - - # Current solution: reindexing all docs after deleting one file - # TODO: delete the nodes related to the file - ctx.get_node_mgr().del_nodes_by_np_idx(pl.node_parser.idx) - pl.indexer.reinitialize_indexer() - pl.update_indexer_to_retriever() - - all_docs = ctx.get_file_mgr().get_all_docs() - nodelist = ctx.get_pipeline_mgr().run_data_prepare(docs=all_docs) - if nodelist is not None and len(nodelist) > 0: - ctx.get_node_mgr().add_nodes(pl.node_parser.idx, nodelist) - - return f"File {name} is deleted" - else: - return f"File {name} not found" - - # Upload & save a file from UI @data_app.post(path="/v1/data/file/{file_name}") async def upload_file(file_name: str, file: UploadFile = File(...)): - if ctx.get_pipeline_mgr().get_active_pipeline() is None: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="Please activate pipeline and upload the file" - ) try: # DIR for server to save files uploaded by UI UPLOAD_DIRECTORY = os.path.normpath(os.path.join(UI_DIRECTORY, file_name)) diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/knowledge_base.py b/EdgeCraftRAG/edgecraftrag/api/v1/knowledge_base.py index 113db2bbfb..f06c76616a 100644 --- a/EdgeCraftRAG/edgecraftrag/api/v1/knowledge_base.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/knowledge_base.py @@ -1,19 +1,31 @@ # Copyright (C) 2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import itertools import json import os import re from typing import Dict, List, Union -from edgecraftrag.api.v1.data import get_nodes_with_kb from edgecraftrag.api_schema import DataIn, ExperienceIn, KnowledgeBaseCreateIn +from edgecraftrag.base import ( + IndexerType, + ModelType, + NodeParserType, +) +from edgecraftrag.components.benchmark import Benchmark +from edgecraftrag.components.indexer import KBADMINIndexer, VectorIndexer, get_kbs_info +from edgecraftrag.components.node_parser import ( + HierarchyNodeParser, + KBADMINParser, + SimpleNodeParser, + SWindowNodeParser, + UnstructedNodeParser, +) from edgecraftrag.components.query_preprocess import query_search -from edgecraftrag.components.retriever import get_kbs_info from edgecraftrag.config_repository import ( MilvusConfigRepository, save_knowledge_configurations, - save_pipeline_configurations, ) from edgecraftrag.context import ctx from edgecraftrag.env import ( @@ -22,8 +34,7 @@ SEARCH_DIR, UI_DIRECTORY, ) -from fastapi import FastAPI, HTTPException, status -from llama_index.core.schema import Document +from fastapi import FastAPI, HTTPException, Query, status kb_app = FastAPI() @@ -37,6 +48,25 @@ async def get_all_knowledge_bases(): raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) +# Get knowledge base files in a certain range. +@kb_app.get("/v1/knowledge/{knowledge_name}/filemap") +async def get_knowledge_base_filemap( + knowledge_name: str, page_num: int = Query(1, ge=1), page_size: int = Query(20, ge=1) +): + kb = ctx.knowledgemgr.get_knowledge_base_by_name_or_id(knowledge_name) + if kb and kb.file_map: + file_map = kb.file_map + filemap_len = len(file_map) + start = (page_num - 1) * page_size + end = min(start + page_size, filemap_len) + if start >= filemap_len: + return None + file_map_subset = itertools.islice(file_map.items(), start, end) + return {"file_map": dict(file_map_subset), "total": kb.calculate_totals()} + else: + return None + + # Get the specified knowledge base. @kb_app.get("/v1/knowledge/{knowledge_name}") async def get_knowledge_base(knowledge_name: str): @@ -44,28 +74,35 @@ async def get_knowledge_base(knowledge_name: str): return kb +# Get the specified knowledge base json. +@kb_app.get("/v1/knowledge/{knowledge_name}/json") +async def get_knowledge_base_json(knowledge_name: str): + kb = ctx.knowledgemgr.get_knowledge_base_by_name_or_id(knowledge_name) + return kb.get_knowledge_json + + # Create a new knowledge base @kb_app.post(path="/v1/knowledge") async def create_knowledge_base(knowledge: KnowledgeBaseCreateIn): try: - active_pl = ctx.get_pipeline_mgr().get_active_pipeline() - if not active_pl: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail="Please activate pipeline", - ) if not re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", knowledge.name): raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="Knowledge base names must begin with a letter or underscore", ) - - if knowledge.active and knowledge.comp_type == "knowledge" and knowledge.comp_subtype == "origin_kb": - active_pl.indexer.reinitialize_indexer(knowledge.name) - active_pl.update_indexer_to_retriever() - elif knowledge.active and knowledge.comp_subtype == "kbadmin_kb": - active_pl.retriever.config_kbadmin_milvus(knowledge.name) - kb = ctx.knowledgemgr.create_knowledge_base(knowledge) + active_pl = ctx.get_pipeline_mgr().get_active_pipeline() + knowledge_json = knowledge.model_dump_json() + kb = ctx.knowledgemgr.create_knowledge_base(knowledge, knowledge_json) + if knowledge.comp_type == "knowledge": + try: + await update_kb_handler(kb, knowledge) + if knowledge.comp_subtype == "kbadmin_kb": + kb.indexer.config_kbadmin_milvus(knowledge.name) + if active_pl: + active_pl.update_retriever_list(ctx.knowledgemgr.get_active_knowledge_base()) + except Exception as e: + ctx.knowledgemgr.delete_knowledge_base(knowledge.name) + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) await save_knowledge_configurations("add", kb) return "Create knowledge base successfully" except Exception as e: @@ -77,24 +114,19 @@ async def create_knowledge_base(knowledge: KnowledgeBaseCreateIn): async def delete_knowledge_base(knowledge_name: str): try: rm_kb = ctx.knowledgemgr.get_knowledge_base_by_name_or_id(knowledge_name) - active_kb = ctx.knowledgemgr.get_active_knowledge_base() - active_pl = ctx.get_pipeline_mgr().get_active_pipeline() + active_kbs = ctx.knowledgemgr.get_active_knowledge_base() + kb_is_active = True if rm_kb in active_kbs else False if rm_kb.comp_type == "knowledge" and rm_kb.comp_subtype == "origin_kb": - if active_kb: - if active_kb.name == knowledge_name or active_kb.idx == knowledge_name: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail="Cannot delete a running knowledge base.", - ) + if kb_is_active: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="Cannot delete a running knowledge base, please deactivate it first.", + ) kb_file_path = rm_kb.get_file_paths() + if rm_kb.indexer.comp_subtype == "milvus_vector": + rm_kb.indexer.clear_milvus_collection(knowledge_name) if kb_file_path: - if active_pl.indexer.comp_subtype == "milvus_vector": - active_pl.indexer.clear_milvus_collection(knowledge_name) - active_pl.clear_document_cache(knowledge_name) - if active_kb: - active_pl.indexer.reinitialize_indexer(active_kb.name) - active_pl.update_indexer_to_retriever() - rm_kb.clear_documents(active_pl.name) + rm_kb.clear_documents() if rm_kb.comp_type == "experience": if rm_kb.experience_active: raise HTTPException( @@ -105,87 +137,67 @@ async def delete_knowledge_base(knowledge_name: str): rm_kb.clear_experiences() result = ctx.knowledgemgr.delete_knowledge_base(knowledge_name) await save_knowledge_configurations("delete", rm_kb) - await save_pipeline_configurations("update", active_pl) return result except Exception as e: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) -# Switch the active knowledge base @kb_app.patch(path="/v1/knowledge/patch") async def update_knowledge_base(knowledge: KnowledgeBaseCreateIn): try: kb = ctx.knowledgemgr.get_knowledge_base_by_name_or_id(knowledge.name) - active_pl = ctx.get_pipeline_mgr().get_active_pipeline() - if active_pl.indexer.comp_subtype == "kbadmin_indexer" and kb.comp_subtype != "kbadmin_kb": - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="The kbadmin pipeline must correspond to the kbadmin type kb.", - ) - if active_pl.indexer.comp_subtype != "kbadmin_indexer" and kb.comp_subtype == "kbadmin_kb": - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail="Not kbadmin pipeline cannot active kbadmin type kb.", - ) + if kb is None: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Knowledge base not found") + kb_indexer = kb.indexer + kb_node_parser = kb.node_parser if kb.comp_type == "knowledge" and kb.comp_subtype == "origin_kb": - if active_pl.indexer.comp_subtype != "milvus_vector": - if knowledge.active and knowledge.active != kb.active: - await handle_reload_data(kb, active_pl) - elif not knowledge.active and kb.description != knowledge.description: - pass - else: - if knowledge.active and knowledge.active != kb.active: - current_paths = kb.file_paths - file_paths = active_pl.compare_file_lists(kb.name, current_paths) - if "del_docs" not in file_paths: - await handle_pipeline_change(kb, active_pl, file_paths) - else: - need_delete_document_path = file_paths["del_docs"] - need_add_document_path = file_paths["add_docs"] - active_pl.indexer.reinitialize_indexer(kb.name) - if need_delete_document_path: - for file_path in need_delete_document_path: - await remove_file_from_knowledge_base(kb.name, DataIn(local_path=file_path)) - if need_add_document_path: - for file_path in need_add_document_path: - add_document = await add_file_to_knowledge_base( - kb.name, DataIn(local_path=file_path), False - ) - await add_document_handler(add_document) - active_pl.indexer.reinitialize_indexer(kb.name) - active_pl.update_indexer_to_retriever() - elif not knowledge.active and kb.description != knowledge.description: - pass + try: + await update_kb_handler(kb, knowledge) + except (ValueError, Exception) as e: + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) + + # reload data for knowledge base + node_parser_changed = kb_node_parser != kb.node_parser + if node_parser_changed or kb_indexer != kb.indexer: + await handle_reload_data(kb, node_parser_changed) elif kb.comp_subtype == "kbadmin_kb": - if knowledge.active and knowledge.active != kb.active: - active_pl.retriever.config_kbadmin_milvus(kb.name) - result = ctx.knowledgemgr.update_knowledge_base(knowledge) + kb.indexer.config_kbadmin_milvus(kb.name) + active_pl = ctx.get_pipeline_mgr().get_active_pipeline() + result = ctx.knowledgemgr.update_knowledge_base(knowledge, active_pl) + # Update knowledge json + knowledge_dict = knowledge.dict() + kb.update_knowledge_json(knowledge_dict) await save_knowledge_configurations("update", kb) - await save_pipeline_configurations("update", active_pl) return result except Exception as e: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) -# Add a files to the knowledge base +# Add files to the knowledge base @kb_app.post(path="/v1/knowledge/{knowledge_name}/files") -async def add_file_to_knowledge_base(knowledge_name, file_path: DataIn, only_add_file: bool = True): +async def add_file_to_knowledge_base(knowledge_name, file_path: DataIn): + """ + 1. Parse file into Llamaindex Document and add file to filemgr + 2. Add file path to knowledge base + 3. Update nodes and vector store for knowledge base + 4. Update pipeline retriever if active knowledge base's indexer changed + """ try: - active_pl = ctx.get_pipeline_mgr().get_active_pipeline() kb = ctx.knowledgemgr.get_knowledge_base_by_name_or_id(knowledge_name) + prev_indexer = kb.indexer if kb.comp_type == "experience": raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="The experience type cannot perform file operations.", ) - if kb.comp_subtype == "kbadmin_kb" or active_pl.indexer.comp_subtype == "kbadmin_indexer": + if kb.comp_subtype == "kbadmin_kb" or kb.indexer.comp_subtype == "kbadmin_indexer": raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="Please proceed to the kbadmin interface to perform the operation.", ) # Validate and normalize the user-provided path user_path = file_path.local_path - add_document = ctx.get_file_mgr().add_files(docs=user_path) + kb_file_list = kb.get_file_paths() normalized_path = os.path.normpath(os.path.join(UI_DIRECTORY, user_path)) if not normalized_path.startswith(UI_DIRECTORY): raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid file path") @@ -193,44 +205,30 @@ async def add_file_to_knowledge_base(knowledge_name, file_path: DataIn, only_add for root, _, files in os.walk(normalized_path): for file in files: file_full_path = os.path.join(root, file) - if file_full_path not in kb.get_file_paths(): - kb.add_file_path(file_full_path, add_document, active_pl.name, only_add_file) - active_pl.add_docs_to_list(knowledge_name, file_full_path) + if file_full_path not in kb_file_list: + await add_document_handler(file_full_path, kb) else: raise HTTPException( status_code=status.HTTP_409_CONFLICT, detail=f"File already exists {file_full_path}", ) - elif os.path.isfile(normalized_path) and normalized_path in kb.get_file_paths() and only_add_file: + elif os.path.isfile(normalized_path) and normalized_path in kb.get_file_paths(): raise HTTPException( status_code=status.HTTP_409_CONFLICT, detail=f"File already exists {normalized_path}", ) - elif os.path.isfile(normalized_path) and only_add_file: - kb.add_file_path(normalized_path, add_document, active_pl.name, only_add_file) - active_pl.add_docs_to_list(knowledge_name, user_path) elif os.path.isfile(normalized_path): - kb.add_file_path(normalized_path, add_document, active_pl.name, only_add_file) - active_pl.add_docs_to_list(knowledge_name, user_path) - return add_document + await add_document_handler(normalized_path, kb) else: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Error uploading file.") - active_kb = ctx.knowledgemgr.get_active_knowledge_base() - if active_pl.indexer.comp_subtype == "milvus_vector": - if knowledge_name == active_kb.name: - await add_document_handler(add_document) - else: - active_pl.indexer.reinitialize_indexer(knowledge_name) - await add_document_handler(add_document) - active_pl.indexer.reinitialize_indexer(active_kb.name) - active_pl.update_indexer_to_retriever() - else: - if active_kb: - if active_kb.name == knowledge_name or active_kb.idx == knowledge_name: - await add_document_handler(add_document) + # update retriever with indexer since indexer updated + if kb.active: + active_pl = ctx.get_pipeline_mgr().get_active_pipeline() + if active_pl: + active_pl.update_retriever(kb, prev_indexer) + await save_knowledge_configurations("update", kb) - await save_pipeline_configurations("update", active_pl) return "File upload successfully" except ValueError as e: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) @@ -240,28 +238,33 @@ async def add_file_to_knowledge_base(knowledge_name, file_path: DataIn, only_add @kb_app.delete(path="/v1/knowledge/{knowledge_name}/files") async def remove_file_from_knowledge_base(knowledge_name, file_path: DataIn): try: - active_pl = ctx.get_pipeline_mgr().get_active_pipeline() kb = ctx.knowledgemgr.get_knowledge_base_by_name_or_id(knowledge_name) if kb.comp_type == "experience": raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="The experience type cannot perform file operations.", ) - if kb.comp_subtype == "kbadmin_kb" or active_pl.indexer.comp_subtype == "kbadmin_indexer": + if kb.comp_subtype == "kbadmin_kb": raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="Please proceed to the kbadmin interface to perform the operation.", ) - document_list = kb.remove_file_path(file_path.local_path, active_pl.name) - active_pl.del_docs_to_list(knowledge_name, file_path.local_path) + prev_indexer = kb.indexer + document_list = kb.remove_file_path(file_path.local_path) + ctx.get_file_mgr().del_file(file_path.local_path) if not document_list: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, - detail="Deleted file documents not found", + detail="Deleted file documents not found", ) - await remove_document_handler(document_list, knowledge_name) + await remove_document_handler(document_list, kb) + # update retriever with indexer since indexer updated + if kb.active: + active_pl = ctx.get_pipeline_mgr().get_active_pipeline() + if active_pl: + active_pl.update_retriever(kb, prev_indexer) + await save_knowledge_configurations("update", kb) - await save_pipeline_configurations("update", active_pl) return "File deleted successfully" except ValueError as e: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(e)) @@ -362,12 +365,10 @@ async def view_sub_questions(que: ExperienceIn): @kb_app.get("/v1/kbadmin/kbs_list") -def get_kbs_list(): - active_pl = ctx.get_pipeline_mgr().get_active_pipeline() +def get_kbs_list(vector_url: str = Query(default="http://localhost:29530")): + active_kb = ctx.knowledgemgr.get_active_knowledge_base() try: - if not active_pl or active_pl.indexer.comp_subtype != "kbadmin_indexer": - return [] - CONNECTION_ARGS = {"uri": active_pl.indexer.vector_url} + CONNECTION_ARGS = {"uri": vector_url} kbs_list = get_kbs_info(CONNECTION_ARGS) kb_names = [name for name in kbs_list.keys()] return kb_names @@ -375,50 +376,31 @@ def get_kbs_list(): raise HTTPException(status_code=400, detail=str(e)) -# Update knowledge base data -async def add_document_handler(all_document=None): - if ctx.get_pipeline_mgr().get_active_pipeline() is None: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail="Please activate pipeline", - ) +# add knowledge file node +async def add_document_handler(file_path=None, kb=None): + if file_path and kb: + docs = ctx.get_file_mgr().add_files(docs=file_path) + kb.add_file_path(file_path, docs) + nodelist = await kb.run_node_parser(docs=docs) + ctx.get_node_mgr().add_nodes(kb.node_parser.idx, nodelist) + await kb.add_nodes_to_indexer(nodelist) - active_pl = ctx.get_pipeline_mgr().get_active_pipeline() - if all_document: - nodelist = await ctx.get_pipeline_mgr().run_data_prepare(docs=all_document) - if active_pl.indexer.comp_subtype != "kbadmin_indexer": - if nodelist is None or len(nodelist) == 0: - raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="File not found") - ctx.get_node_mgr().add_nodes(active_pl.node_parser.idx, nodelist) - return "success update file" +# remove knowledge file node +async def remove_document_handler(document_list=None, kb=None): -# Update knowledge base data -async def remove_document_handler(document_list=None, knowledge_name: str = "default_kb"): - if ctx.get_pipeline_mgr().get_active_pipeline() is None: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail="Please activate pipeline", - ) - - active_pl = ctx.get_pipeline_mgr().get_active_pipeline() - active_kb = ctx.get_knowledge_mgr().get_active_knowledge_base() - ctx.get_node_mgr().del_nodes_by_np_idx(active_pl.node_parser.idx) - if active_pl.indexer.comp_subtype == "milvus_vector": - active_pl.indexer.reinitialize_indexer(knowledge_name) - active_pl.indexer.delete(document_list) - if active_kb: - active_pl.indexer.reinitialize_indexer(active_kb.name) - active_pl.update_indexer_to_retriever() - elif active_kb.name == knowledge_name: - await handle_reload_data(active_kb, active_pl) + if kb.indexer.comp_subtype == "milvus_vector": + kb.indexer.reinitialize_indexer(kb.name) + kb.indexer.delete(document_list) + ctx.get_node_mgr().del_nodes_by_np_idx(kb.node_parser.idx) + else: + await handle_reload_data(kb, node_parser_changed=True) # Restore knowledge base configuration async def restore_knowledge_configurations(): knowledgebase_config_repo = MilvusConfigRepository.create_connection("knowledgebase_config", 1) all_datas = [] - active_pl = ctx.get_pipeline_mgr().get_active_pipeline() if knowledgebase_config_repo: all_Knowledgebases_repo = knowledgebase_config_repo.get_configs() for Knowledgebase_data in all_Knowledgebases_repo: @@ -434,135 +416,115 @@ async def restore_knowledge_configurations(): try: for Knowledgebase_data in all_datas: Knoweldge_req = KnowledgeBaseCreateIn(**Knowledgebase_data) - kb = ctx.knowledgemgr.create_knowledge_base(Knoweldge_req) - if not active_pl: - continue + knowledge_json = Knoweldge_req.model_dump_json() + kb = ctx.knowledgemgr.create_knowledge_base(Knoweldge_req, knowledge_json) + try: + await update_kb_handler(kb, Knoweldge_req) + except Exception as e: + ctx.knowledgemgr.delete_knowledge_base(Knoweldge_req.name) + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(e)) if kb.comp_type == "knowledge" and kb.comp_subtype == "origin_kb": if Knowledgebase_data["file_paths"]: - if active_pl.indexer.comp_subtype != "milvus_vector" and Knowledgebase_data["active"]: - await handle_reload_data(kb, active_pl) - elif Knowledgebase_data["active"]: - active_pl.indexer.reinitialize_indexer(Knowledgebase_data["name"]) - active_pl.update_indexer_to_retriever() + if kb.indexer.comp_subtype == "milvus_vector": + kb.indexer.reinitialize_indexer(Knowledgebase_data["name"]) else: - pass + ctx.get_file_mgr().add_files(docs=Knowledgebase_data["file_paths"]) + await handle_reload_data(kb, node_parser_changed=True) elif kb.comp_subtype == "kbadmin_kb": - if Knowledgebase_data["active"]: - active_pl.retriever.config_kbadmin_milvus(kb.name) - except Exception as e: - print(f"Error load Knowledge base: {e}") - - -async def Synchronizing_vector_data(old_active_pl, new_active_pl): - try: - active_kb = ctx.knowledgemgr.get_active_knowledge_base() + kb.indexer.config_kbadmin_milvus(kb.name) + # connect retriever with active kb's indexers active_pl = ctx.get_pipeline_mgr().get_active_pipeline() - # Determine whether it is kbadmin type - if old_active_pl: - if ( - old_active_pl.retriever.comp_subtype == "kbadmin_retriever" - and new_active_pl.retriever.comp_subtype == "kbadmin_retriever" - ): - if active_kb: - if active_kb.comp_subtype == "kbadmin_kb": - new_active_pl.retriever.config_kbadmin_milvus(active_kb.name) - return True - elif old_active_pl.retriever.comp_subtype == "kbadmin_retriever": - return True - if not active_kb: - return True - if new_active_pl.retriever.comp_subtype == "kbadmin_retriever": - if active_kb: - if active_kb.comp_subtype == "kbadmin_kb": - new_active_pl.retriever.config_kbadmin_milvus(active_kb.name) - return True - # Perform milvus data synchronization - if new_active_pl.indexer.comp_subtype == "milvus_vector": - # Pipeline component state not changed - current_paths = active_kb.file_paths - file_paths = active_pl.compare_file_lists(active_kb.name, current_paths) - if "del_docs" not in file_paths: - await handle_pipeline_change(active_kb, active_pl, file_paths) - else: - need_delete_document_path = file_paths["del_docs"] - need_add_document_path = file_paths["add_docs"] - active_pl.indexer.reinitialize_indexer(active_kb.name) - if need_delete_document_path: - for file_path in need_delete_document_path: - await remove_file_from_knowledge_base(active_kb.name, DataIn(local_path=file_path)) - if need_add_document_path: - for file_path in need_add_document_path: - add_document = await add_file_to_knowledge_base( - active_kb.name, DataIn(local_path=file_path), False - ) - await add_document_handler(add_document) - active_pl.indexer.reinitialize_indexer(active_kb.name) - active_pl.update_indexer_to_retriever() - else: - await handle_reload_data(active_kb, active_pl) - await save_knowledge_configurations("update", active_kb) + if active_pl: + active_pl.update_retriever_list(ctx.knowledgemgr.get_active_knowledge_base()) except Exception as e: - raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=e) - - -# When the pipeline is changed, the current kb and the data of the pipeline are reconstructed -async def handle_pipeline_change(kb, pl, file_paths): - exist_file = False - need_add_document_path = file_paths["add_docs"] - node_lists = await get_nodes_with_kb(kb.name) - pl.indexer.clear_milvus_collection(kb.name) - if need_add_document_path: - if os.path.isfile(need_add_document_path[0]): - kb.clear_documents(pl.name) - exist_file = True - pl.indexer.reinitialize_indexer(kb.name) - for file_path in need_add_document_path: - if exist_file: - add_document = await add_file_to_knowledge_base(kb.name, DataIn(local_path=file_path), False) - await add_document_handler(add_document) - else: - add_document = [] - document = {} - documents_list = kb.get_all_document(file_path, pl.name) - for document in documents_list: - need_add_node_list = {} - for node in node_lists.values(): - if document.get("doc_id") == node.get("doc_id"): - need_add_node_list[node["id_"]] = node - docuement_text = pl.nodes_to_document(need_add_node_list) - document["id_"] = document.get("doc_id") - document["text"] = docuement_text - document["excluded_embed_metadata_keys"] = [ - "file_name", - "file_type", - "file_size", - "creation_date", - "last_modified_date", - "last_accessed_date", - ] - document["excluded_llm_metadata_keys"] = [ - "file_name", - "file_type", - "file_size", - "creation_date", - "last_modified_date", - "last_accessed_date", - ] - document["metadata"] = document.get("metadata") - result_document = Document.from_dict(data=document) - add_document.append(result_document) - pl.add_docs_to_list(kb.name, file_path) - await add_document_handler(add_document) + print(f"Error load Knowledge base: {e}") # reloading data that is not a milvus indexer -async def handle_reload_data(kb, pl): - pl.indexer.reinitialize_indexer() - pl.update_indexer_to_retriever() - need_add_document_path = kb.get_file_paths() - ctx.get_node_mgr().del_nodes_by_np_idx(pl.node_parser.idx) - kb.clear_documents(pl.name) - if need_add_document_path: - for file_path in need_add_document_path: - add_document = await add_file_to_knowledge_base(kb.name, DataIn(local_path=file_path), False) - await add_document_handler(add_document) +async def handle_reload_data(kb, node_parser_changed: bool = False): + + if kb.indexer and kb.indexer.comp_subtype == "milvus_vector": + kb.indexer.clear_milvus_collection(kb.name) + kb.indexer.reinitialize_indexer(kb.name) + # update nodes + if node_parser_changed: + ctx.get_node_mgr().del_nodes_by_np_idx(kb.node_parser.idx) + kb.update_nodes([]) + kb_file_paths = kb.get_file_paths() + for file_path in kb_file_paths: + docs = ctx.get_file_mgr().get_docs_by_file(file_path) + nodelist = await kb.run_node_parser(docs=docs) + if nodelist is None or len(nodelist) == 0: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="File not found") + ctx.get_node_mgr().add_nodes(kb.node_parser.idx, nodelist) + # update indexer + await kb.update_nodes_to_indexer() + + +async def update_kb_handler(kb, knowledge): + if kb.enable_benchmark: + kb.benchmark = Benchmark(True, "") + if knowledge.node_parser is not None and knowledge.comp_subtype != "kbadmin_kb": + np = knowledge.node_parser + found_parser = ctx.get_node_parser_mgr().search_parser(np) + if found_parser is not None: + kb.node_parser = found_parser + else: + match np.parser_type: + case NodeParserType.SIMPLE: + kb.node_parser = SimpleNodeParser(chunk_size=np.chunk_size, chunk_overlap=np.chunk_overlap) + case NodeParserType.HIERARCHY: + """ + HierarchyNodeParser is for Auto Merging Retriever + (https://docs.llamaindex.ai/en/stable/examples/retrievers/auto_merging_retriever/) + By default, the hierarchy is: + 1st level: chunk size 2048 + 2nd level: chunk size 512 + 3rd level: chunk size 128 + Please set chunk size with List. e.g. chunk_size=[2048,512,128] + """ + kb.node_parser = HierarchyNodeParser.from_defaults( + chunk_sizes=np.chunk_sizes, chunk_overlap=np.chunk_overlap + ) + case NodeParserType.SENTENCEWINDOW: + kb.node_parser = SWindowNodeParser.from_defaults(window_size=np.window_size) + case NodeParserType.UNSTRUCTURED: + kb.node_parser = UnstructedNodeParser(chunk_size=np.chunk_size, chunk_overlap=np.chunk_overlap) + case NodeParserType.KBADMINPARSER: + kb.node_parser = KBADMINParser() + ctx.get_node_parser_mgr().add(kb.node_parser) + if knowledge.indexer is not None: + ind = knowledge.indexer + found_indexer = ( + ctx.get_indexer_mgr().search_indexer(ind) if ind.indexer_type != IndexerType.MILVUS_VECTOR else None + ) + if found_indexer is not None: + kb.indexer = found_indexer + else: + embed_model = None + match ind.indexer_type: + case IndexerType.DEFAULT_VECTOR | IndexerType.FAISS_VECTOR | IndexerType.MILVUS_VECTOR: + if ind.embedding_model: + embed_model = ctx.get_model_mgr().search_model(ind.embedding_model) + embed_type = ind.inference_type + if embed_model is None: + if embed_type == "local": + ind.embedding_model.model_type = ModelType.EMBEDDING + elif embed_type == "vllm": + ind.embedding_model.model_type = ModelType.VLLM_EMBEDDING + embed_model = ctx.get_model_mgr().load_model(ind.embedding_model) + ctx.get_model_mgr().add(embed_model) + new_indexer = VectorIndexer(embed_model, ind.indexer_type, ind.vector_url, kb.name) + case IndexerType.KBADMIN_INDEXER: + kbadmin_embedding_url = ind.embedding_url + KBADMIN_VECTOR_URL = ind.vector_url + embed_model = ind.embedding_model.model_id + new_indexer = KBADMINIndexer( + embed_model, ind.indexer_type, kbadmin_embedding_url, KBADMIN_VECTOR_URL + ) + case _: + pass + del kb.indexer + kb.indexer = new_indexer + ctx.get_indexer_mgr().add(kb.indexer) + return kb diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py b/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py index 289797d0ee..89238f48c2 100644 --- a/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/pipeline.py @@ -1,42 +1,22 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -import asyncio import json import os import re import time import weakref -from concurrent.futures import ThreadPoolExecutor -from edgecraftrag.api.v1.knowledge_base import Synchronizing_vector_data from edgecraftrag.api_schema import MilvusConnectRequest, PipelineCreateIn from edgecraftrag.base import ( GeneratorType, - IndexerType, InferenceType, ModelType, - NodeParserType, PostProcessorType, - RetrieverType, ) from edgecraftrag.components.benchmark import Benchmark from edgecraftrag.components.generator import FreeChatGenerator, QnAGenerator -from edgecraftrag.components.indexer import KBADMINIndexer, VectorIndexer -from edgecraftrag.components.node_parser import ( - HierarchyNodeParser, - KBADMINParser, - SimpleNodeParser, - SWindowNodeParser, - UnstructedNodeParser, -) from edgecraftrag.components.postprocessor import MetadataReplaceProcessor, RerankProcessor -from edgecraftrag.components.retriever import ( - AutoMergeRetriever, - KBadminRetriever, - SimpleBM25Retriever, - VectorSimRetriever, -) from edgecraftrag.config_repository import MilvusConfigRepository, save_pipeline_configurations from edgecraftrag.context import ctx from edgecraftrag.env import PIPELINE_FILE @@ -172,106 +152,10 @@ async def load_pipeline(request): async def update_pipeline_handler(pl, req): - active_kb = ctx.knowledgemgr.get_active_knowledge_base() - active_pipeline = ctx.get_pipeline_mgr().get_active_pipeline() - kb_name = active_kb.name if active_kb else "default" - - if req.node_parser is not None: - np = req.node_parser - found_parser = ctx.get_node_parser_mgr().search_parser(np) - if found_parser is not None: - pl.node_parser = found_parser - else: - match np.parser_type: - case NodeParserType.SIMPLE: - pl.node_parser = SimpleNodeParser(chunk_size=np.chunk_size, chunk_overlap=np.chunk_overlap) - case NodeParserType.HIERARCHY: - """ - HierarchyNodeParser is for Auto Merging Retriever - (https://docs.llamaindex.ai/en/stable/examples/retrievers/auto_merging_retriever/) - By default, the hierarchy is: - 1st level: chunk size 2048 - 2nd level: chunk size 512 - 3rd level: chunk size 128 - Please set chunk size with List. e.g. chunk_size=[2048,512,128] - """ - pl.node_parser = HierarchyNodeParser.from_defaults( - chunk_sizes=np.chunk_sizes, chunk_overlap=np.chunk_overlap - ) - case NodeParserType.SENTENCEWINDOW: - pl.node_parser = SWindowNodeParser.from_defaults(window_size=np.window_size) - case NodeParserType.UNSTRUCTURED: - pl.node_parser = UnstructedNodeParser(chunk_size=np.chunk_size, chunk_overlap=np.chunk_overlap) - case NodeParserType.KBADMINPARSER: - pl.node_parser = KBADMINParser() - ctx.get_node_parser_mgr().add(pl.node_parser) - - pl._node_changed = True - - if req.indexer is not None: - ind = req.indexer - found_indexer = ctx.get_indexer_mgr().search_indexer(ind) - if found_indexer is not None: - pl.indexer = found_indexer - else: - embed_model = None - match ind.indexer_type: - case IndexerType.DEFAULT_VECTOR | IndexerType.FAISS_VECTOR | IndexerType.MILVUS_VECTOR: - if ind.embedding_model: - embed_model = ctx.get_model_mgr().search_model(ind.embedding_model) - embed_type = ind.inference_type - if embed_model is None: - if embed_type == "local": - ind.embedding_model.model_type = ModelType.EMBEDDING - elif embed_type == "vllm": - ind.embedding_model.model_type = ModelType.VLLM_EMBEDDING - embed_model = ctx.get_model_mgr().load_model(ind.embedding_model) - ctx.get_model_mgr().add(embed_model) - # TODO: **RISK** if considering 2 pipelines with different - # nodes, but same indexer, what will happen? - pl.indexer = VectorIndexer(embed_model, ind.indexer_type, ind.vector_url, kb_name) - case IndexerType.KBADMIN_INDEXER: - kbadmin_embedding_url = ind.embedding_url - KBADMIN_VECTOR_URL = ind.vector_url - embed_model = ind.embedding_model.model_id - pl.indexer = KBADMINIndexer( - embed_model, ind.indexer_type, kbadmin_embedding_url, KBADMIN_VECTOR_URL - ) - case _: - pass - ctx.get_indexer_mgr().add(pl.indexer) - pl._index_changed = True - pl._index_to_retriever_updated = False - # As indexer changed, nodes are cleared in indexer's db - pl._node_changed = True - if req.indexer.indexer_type == "milvus_vector": - pl.reset_node_status() if req.retriever is not None: retr = req.retriever - match retr.retriever_type: - case RetrieverType.VECTORSIMILARITY: - if pl.indexer is not None: - pl.retriever = VectorSimRetriever(pl.indexer, similarity_top_k=retr.retrieve_topk) - else: - raise Exception("No indexer") - case RetrieverType.AUTOMERGE: - # AutoMergeRetriever looks at a set of leaf nodes and recursively "merges" subsets of leaf nodes that reference a parent node - if pl.indexer is not None: - pl.retriever = AutoMergeRetriever(pl.indexer, similarity_top_k=retr.retrieve_topk) - else: - return Exception("No indexer") - case RetrieverType.BM25: - if pl.indexer is not None: - pl.retriever = SimpleBM25Retriever(pl.indexer, similarity_top_k=retr.retrieve_topk) - else: - return Exception("No indexer") - case RetrieverType.KBADMIN_RETRIEVER: - pl.retriever = KBadminRetriever(pl.indexer, similarity_top_k=retr.retrieve_topk) - case _: - pass - # Index is updated to retriever - pl._index_to_retriever_updated = True + pl.update_retriever_config(retr.retriever_type, retr.retrieve_topk) if req.postprocessor is not None: pp = req.postprocessor @@ -332,11 +216,9 @@ async def update_pipeline_handler(pl, req): raise Exception("Inference Type Not Supported") if pl.status.active != req.active: - ctx.get_pipeline_mgr().activate_pipeline(pl.name, req.active, ctx.get_node_mgr(), kb_name) - - # Create and set up a separate event loop to run asynchronous tasks in threads - if req.active: - await Synchronizing_vector_data(active_pipeline, pl) + ctx.get_pipeline_mgr().activate_pipeline( + pl.name, req.active, ctx.get_knowledge_mgr().get_active_knowledge_base() + ) return pl diff --git a/EdgeCraftRAG/edgecraftrag/api/v1/system.py b/EdgeCraftRAG/edgecraftrag/api/v1/system.py index 25def7f78a..7a0fd62751 100644 --- a/EdgeCraftRAG/edgecraftrag/api/v1/system.py +++ b/EdgeCraftRAG/edgecraftrag/api/v1/system.py @@ -6,7 +6,7 @@ import cpuinfo import distro -import openvino.runtime as ov +import openvino as ov import psutil from fastapi import FastAPI, HTTPException, status diff --git a/EdgeCraftRAG/edgecraftrag/api_schema.py b/EdgeCraftRAG/edgecraftrag/api_schema.py index 00a7631975..4b43b378f9 100644 --- a/EdgeCraftRAG/edgecraftrag/api_schema.py +++ b/EdgeCraftRAG/edgecraftrag/api_schema.py @@ -55,13 +55,10 @@ class GeneratorIn(BaseModel): class PipelineCreateIn(BaseModel): idx: Optional[str] = None name: Optional[str] = None - node_parser: Optional[NodeParserIn] = None - indexer: Optional[IndexerIn] = None retriever: Optional[RetrieverIn] = None postprocessor: Optional[list[PostProcessorIn]] = None generator: Optional[list[GeneratorIn]] = None active: Optional[bool] = False - documents_cache: Optional[Dict] = None class DataIn(BaseModel): @@ -87,6 +84,8 @@ class KnowledgeBaseCreateIn(BaseModel): idx: Optional[str] = None name: str description: Optional[str] = None + node_parser: Optional[NodeParserIn] = None + indexer: Optional[IndexerIn] = None active: Optional[bool] = None comp_type: Optional[str] = "knowledge" comp_subtype: Optional[str] = "origin_kb" diff --git a/EdgeCraftRAG/edgecraftrag/components/generator.py b/EdgeCraftRAG/edgecraftrag/components/generator.py index 8be6b4b9dc..50d1452457 100644 --- a/EdgeCraftRAG/edgecraftrag/components/generator.py +++ b/EdgeCraftRAG/edgecraftrag/components/generator.py @@ -393,6 +393,8 @@ def __init__(self, llm_model, inference_type, vllm_endpoint, **kwargs): self.model_path = llm_instance.model_path self.llm = llm_model + if self.inference_type == InferenceType.LOCAL: + self.lock = asyncio.Lock() if self.inference_type == InferenceType.VLLM: self.vllm_name = llm_model().model_id if vllm_endpoint == "": @@ -400,9 +402,42 @@ def __init__(self, llm_model, inference_type, vllm_endpoint, **kwargs): self.vllm_endpoint = vllm_endpoint async def run(self, chat_request, retrieved_nodes, node_parser_type, **kwargs): - response = await self.run_vllm(chat_request, retrieved_nodes, node_parser_type, **kwargs) + if self.inference_type == InferenceType.LOCAL: + response = await self.run_local(chat_request, retrieved_nodes, node_parser_type, **kwargs) + elif self.inference_type == InferenceType.VLLM: + response = await self.run_vllm(chat_request, retrieved_nodes, node_parser_type, **kwargs) + else: + raise ValueError("LLM inference_type not supported") return response + async def run_local(self, chat_request, retrieved_nodes, node_parser_type, **kwargs): + if self.llm() is None: + # This could happen when User delete all LLMs through RESTful API + raise ValueError("No LLM available, please load LLM") + generate_kwargs = dict( + temperature=chat_request.temperature, + do_sample=chat_request.temperature > 0.0, + top_p=chat_request.top_p, + top_k=chat_request.top_k, + typical_p=chat_request.typical_p, + repetition_penalty=chat_request.repetition_penalty, + ) + self.llm().generate_kwargs = generate_kwargs + self.llm().max_new_tokens = chat_request.max_tokens + prompt_str = chatcompletion_to_chatml(chat_request) + if chat_request.stream: + + # Asynchronous generator + async def generator(): + async for chunk in local_stream_generator(self.lock, self.llm(), prompt_str, ""): + yield chunk or "" + await asyncio.sleep(0) + + return generator() + else: + result = self.llm().complete(prompt_str) + return result + async def run_vllm(self, chat_request, retrieved_nodes, node_parser_type, **kwargs): llm = OpenAILike( api_key="fake", diff --git a/EdgeCraftRAG/edgecraftrag/components/indexer.py b/EdgeCraftRAG/edgecraftrag/components/indexer.py index e2a517101d..6248c87db3 100644 --- a/EdgeCraftRAG/edgecraftrag/components/indexer.py +++ b/EdgeCraftRAG/edgecraftrag/components/indexer.py @@ -6,11 +6,13 @@ import faiss from edgecraftrag.base import BaseComponent, CompType, IndexerType from edgecraftrag.context import ctx +from langchain_milvus import Milvus +from langchain_openai import OpenAIEmbeddings from llama_index.core import StorageContext, VectorStoreIndex from llama_index.vector_stores.faiss import FaissVectorStore from llama_index.vector_stores.milvus import MilvusVectorStore from pydantic import model_serializer -from pymilvus import Collection, connections +from pymilvus import Collection, MilvusException, connections, utility class VectorIndexer(BaseComponent, VectorStoreIndex): @@ -30,9 +32,7 @@ def __init__(self, embed_model, vector_type, vector_url="http://localhost:19530" self._initialize_indexer(embed_model, vector_type, vector_url, kb_name) def _initialize_indexer(self, embed_model, vector_type, vector_url, kb_name): - # get active name - pl = ctx.get_pipeline_mgr().get_active_pipeline() - collection_name = kb_name + pl.name if pl else "default" + collection_name = kb_name if embed_model: try: self.d = len(embed_model.get_text_embedding("test")) @@ -62,12 +62,10 @@ def reinitialize_indexer(self, kb_name="default_kb"): self._initialize_indexer(self.model, self.comp_subtype, self.vector_url, kb_name) def clear_milvus_collection(self, kb_name="default_kb"): - # get active name - pl = ctx.get_pipeline_mgr().get_active_pipeline() - plname = pl.name if pl else "" milvus_vector_store = MilvusVectorStore( uri=self.vector_url, - collection_name=kb_name + plname, + dim=self.d, + collection_name=kb_name, overwrite=False, ) milvus_vector_store.clear() @@ -90,8 +88,33 @@ def __init__(self, embed_model, vector_type, kbadmin_embedding_url, vector_url=" comp_subtype=IndexerType.KBADMIN_INDEXER, ) self.embed_model = embed_model - self.kbadmin_embedding_url = kbadmin_embedding_url + self.kbadmin_embedding_url = kbadmin_embedding_url + "/v3" self.vector_url = vector_url + self.CONNECTION_ARGS = {"uri": self.vector_url} + self.vector_field = "q_1024_vec" + self.text_field = "content_with_weight" + self.embedding = OpenAIEmbeddings( + model=embed_model, + api_key="unused", + base_url=self.kbadmin_embedding_url, + tiktoken_enabled=False, + embedding_ctx_length=510, + ) + + def config_kbadmin_milvus(self, knowledge_name): + collection_name = knowledge_name + if not kbs_rev_maps: + get_kbs_info(self.CONNECTION_ARGS) + collection_name = kbs_rev_maps[collection_name] + self.vector_db = Milvus( + self.embedding, + connection_args=self.CONNECTION_ARGS, + collection_name=collection_name, + vector_field=self.vector_field, + text_field=self.text_field, + enable_dynamic_field=True, + index_params={"index_type": "FLAT", "metric_type": "IP", "params": {}}, + ) def insert_nodes(self, nodes): return None @@ -118,3 +141,68 @@ def ser_model(self): "vector_url": self.vector_url, } return set + + +# global kbs maps. +global kbs_rev_maps +kbs_rev_maps = {} + + +def get_kbs_info(CONNECTION_ARGS): + alias = "default" + try: + connections.connect("default", **CONNECTION_ARGS) + collections = utility.list_collections() + all_kb_infos = {} + new_infos = {} + for kb in collections: + collection = Collection(kb) + collection.load() + try: + if any(field.name == "kb_id" for field in collection.schema.fields): + docs = collection.query( + expr="pk != 0", + output_fields=["kb_name", "kb_id", "docnm_kwd"], + timeout=10, + ) + else: + docs = collection.query( + expr="pk != 0", + output_fields=["filename"], + timeout=10, + ) + collection.release() + except MilvusException as e: + continue + this_kbinfo = {} + for doc in docs: + try: + if "kb_name" in doc: + if not this_kbinfo: + this_kbinfo["name"] = doc["kb_name"] + this_kbinfo["uuid"] = doc["kb_id"] + this_kbinfo["files"] = set([doc["docnm_kwd"]]) + else: + this_kbinfo["files"].add(doc["docnm_kwd"]) + else: + if not this_kbinfo: + this_kbinfo["name"] = kb + this_kbinfo["uuid"] = "" + this_kbinfo["files"] = set([doc["filename"]]) + else: + this_kbinfo["files"].add(doc["filename"]) + except KeyError: + this_kbinfo = None + break + if this_kbinfo: + unique_files = list(this_kbinfo["files"]) + this_kbinfo["files"] = unique_files + new_infos[kb] = this_kbinfo + all_kb_infos.update(new_infos) + kbs_rev_maps.clear() + for kb_id in all_kb_infos: + kbs_rev_maps[all_kb_infos[kb_id]["name"]] = kb_id + return kbs_rev_maps + finally: + if connections.has_connection(alias): + connections.disconnect(alias) diff --git a/EdgeCraftRAG/edgecraftrag/components/knowledge_base.py b/EdgeCraftRAG/edgecraftrag/components/knowledge_base.py index 93f578080c..8846161877 100644 --- a/EdgeCraftRAG/edgecraftrag/components/knowledge_base.py +++ b/EdgeCraftRAG/edgecraftrag/components/knowledge_base.py @@ -3,20 +3,26 @@ import json import os +import time import uuid from typing import Any, Dict, List, Optional, Union -from edgecraftrag.base import BaseComponent, CompType +from edgecraftrag.base import BaseComponent, BenchType, CompType from edgecraftrag.config_repository import ( MilvusConfigRepository, MilvusDocumentRecordRepository, ) from edgecraftrag.env import DOCUMENT_DATA_FILE, EXPERIENCE_FILE from llama_index.core.schema import Document -from pydantic import model_serializer +from pydantic import Field, model_serializer class Knowledge(BaseComponent): + + node_parser: Optional[BaseComponent] = Field(default=None) + indexer: Optional[BaseComponent] = Field(default=None) + benchmark: Optional[BaseComponent] = Field(default=None) + def __init__( self, name: str, @@ -28,6 +34,7 @@ def __init__( idx: Optional[str] = None, all_document_maps: Optional[Dict] = None, file_paths: Optional[list] = None, + origin_json: Optional[str] = None, **kwargs, ): super().__init__(name=name, comp_type=CompType.KNOWLEDGE, **kwargs) @@ -37,12 +44,13 @@ def __init__( self.active = active self.comp_type = comp_type self.comp_subtype = comp_subtype + self.enable_benchmark = os.getenv("ENABLE_BENCHMARK", "False").lower() == "true" if idx is not None: self.idx = str(idx) if all_document_maps is not None: self.all_document_maps = all_document_maps else: - self.all_document_maps: Dict[str, Dict[str, str]] = {} + self.all_document_maps: Dict[str, str] = {} self.document_records: List[Dict[str, str]] = [] @@ -55,6 +63,19 @@ def __init__( self.experience_repo = MilvusConfigRepository.create_connection("experience_data", 1) self.document_record_repo = MilvusDocumentRecordRepository.create_connection("document_records", 1) + self.nodes = [] + self._origin_json = origin_json + + @property + def get_knowledge_json(self) -> str: + return self._origin_json + + def update_knowledge_json(self, knowledge_dict): + origin_json = json.loads(self._origin_json) + for k, v in knowledge_dict.items(): + if v is not None: + origin_json[k] = v + self._origin_json = json.dumps(origin_json) def _update_file_names(self) -> None: self.file_map = {os.path.basename(path): path for path in self.file_paths if path is not None} @@ -63,17 +84,13 @@ def add_file_path( self, file_path: str, documents: List[Document], - pl_name: str, - only_add_file: bool = True, ) -> bool: - if pl_name not in self.all_document_maps: - self.all_document_maps[pl_name] = {} - if file_path not in self.all_document_maps[pl_name]: + + if file_path not in self.all_document_maps: file_id = str(uuid.uuid4()) - self.all_document_maps[pl_name][file_path] = file_id + self.all_document_maps[file_path] = file_id else: - file_id = self.all_document_maps[pl_name][file_path] - + file_id = self.all_document_maps[file_path] records = [ { "file_id": file_id, @@ -85,20 +102,19 @@ def add_file_path( ] self._add_document_records(records) - if only_add_file and file_path not in self.file_paths: + if file_path not in self.file_paths: self.file_paths.append(file_path) self._update_file_names() - def remove_file_path(self, file_path: str, pl_name: str) -> List[str]: + def remove_file_path(self, file_path: str) -> List[str]: removed_doc_ids = [] - if pl_name in self.all_document_maps and file_path in self.all_document_maps[pl_name]: - file_id = self.all_document_maps[pl_name][file_path] + if file_path in self.all_document_maps: + file_id = self.all_document_maps[file_path] removed_doc_ids = self._remove_document_records_by_file_id(file_id) - - del self.all_document_maps[pl_name][file_path] - if file_path in self.file_paths: - self.file_paths.remove(file_path) - self._update_file_names() + del self.all_document_maps[file_path] + if file_path in self.file_paths: + self.file_paths.remove(file_path) + self._update_file_names() return removed_doc_ids @@ -310,34 +326,9 @@ def _remove_document_records_by_file_id(self, file_id: str) -> List[Dict[str, st json.dump(result_documents, f, ensure_ascii=False, indent=4) return deleted_records - def get_all_document(self, file_path, pl_name) -> List[Dict[str, Any]]: - doc_info_list = [] - if pl_name not in self.all_document_maps: - return doc_info_list - file_id = self.all_document_maps[pl_name].get(file_path) - if not file_id: - return doc_info_list - - if self.document_record_repo: - records = self.document_record_repo.get_records_by_file_id(file_id) - doc_info_list = [{"doc_id": rec["doc_id"], "metadata": rec.get("metadata", {})} for rec in records] - else: - if os.path.exists(DOCUMENT_DATA_FILE): - with open(DOCUMENT_DATA_FILE, "r", encoding="utf-8") as f: - all_data = json.load(f) - doc_info_list = [ - {"doc_id": item["doc_id"], "metadata": item.get("metadata", {})} - for item in all_data - if item.get("file_id") == file_id - ] - return doc_info_list - - def clear_documents(self, pl_name): - if pl_name not in self.all_document_maps: - return - for file_id in self.all_document_maps[pl_name].values(): + def clear_documents(self): + for file_id in self.all_document_maps.values(): self._remove_document_records_by_file_id(file_id) - self.all_document_maps[pl_name] = {} return True # Make sure the folder and its files exist @@ -358,6 +349,37 @@ def calculate_totals(self): total = None return total + def update_nodes(self, nodes: List[Document]): + self.nodes = nodes + + def add_nodes(self, nodes: List[Document]): + self.nodes.extend(nodes) + + async def run_node_parser(self, docs: List[Document]) -> Any: + start = 0 + if self.enable_benchmark: + benchmark_index = self.benchmark.init_benchmark_data() + start = time.perf_counter() + nodes = self.node_parser.run(docs=docs) + if self.enable_benchmark: + benchmark_data = ( + self.benchmark.get_benchmark_data(benchmark_index, CompType.NODEPARSER) + time.perf_counter() - start + ) + self.benchmark.update_benchmark_data(benchmark_index, CompType.NODEPARSER, benchmark_data) + + benchmark_data = self.benchmark.get_benchmark_data(benchmark_index, BenchType.CHUNK_NUM) + len(nodes) + self.benchmark.update_benchmark_data(benchmark_index, BenchType.CHUNK_NUM, benchmark_data) + self.add_nodes(nodes) + return nodes + + async def update_nodes_to_indexer(self) -> Any: + if self.indexer is not None: + self.indexer.insert_nodes(self.nodes) + + async def add_nodes_to_indexer(self, nodes) -> Any: + if self.indexer is not None: + self.indexer.insert_nodes(nodes) + def run(self, **kwargs) -> Any: pass @@ -368,6 +390,8 @@ def ser_model(self): "name": self.name, "comp_type": self.comp_type, "comp_subtype": self.comp_subtype, + "node_parser": self.node_parser, + "indexer": self.indexer, "file_map": self.file_map, "description": self.description, "active": self.active, diff --git a/EdgeCraftRAG/edgecraftrag/components/pipeline.py b/EdgeCraftRAG/edgecraftrag/components/pipeline.py index 59bb8fffc1..abd9b8622b 100644 --- a/EdgeCraftRAG/edgecraftrag/components/pipeline.py +++ b/EdgeCraftRAG/edgecraftrag/components/pipeline.py @@ -6,12 +6,11 @@ import os import time from concurrent.futures import ThreadPoolExecutor -from typing import Any, Callable, Dict, List, Optional +from typing import Any, Callable, List, Optional from comps.cores.proto.api_protocol import ChatCompletionRequest from edgecraftrag.base import ( BaseComponent, - BenchType, CallbackType, CompType, GeneratorType, @@ -22,10 +21,15 @@ from edgecraftrag.components.generator import clone_generator from edgecraftrag.components.postprocessor import RerankProcessor from edgecraftrag.components.query_preprocess import query_search -from edgecraftrag.components.retriever import AutoMergeRetriever, SimpleBM25Retriever, VectorSimRetriever +from edgecraftrag.components.retriever import ( + AutoMergeRetriever, + KBadminRetriever, + SimpleBM25Retriever, + VectorSimRetriever, +) from edgecraftrag.env import SEARCH_CONFIG_PATH, SEARCH_DIR from fastapi.responses import StreamingResponse -from llama_index.core.schema import Document, QueryBundle +from llama_index.core.schema import QueryBundle from pydantic import BaseModel, Field, model_serializer @@ -35,9 +39,7 @@ class PipelineStatus(BaseModel): class Pipeline(BaseComponent): - node_parser: Optional[BaseComponent] = Field(default=None) - indexer: Optional[BaseComponent] = Field(default=None) - retriever: Optional[BaseComponent] = Field(default=None) + retrievers: Optional[List[BaseComponent]] = Field(default=None) postprocessor: Optional[List[BaseComponent]] = Field(default=None) generator: Optional[List[BaseComponent]] = Field(default=None) benchmark: Optional[BaseComponent] = Field(default=None) @@ -46,7 +48,6 @@ class Pipeline(BaseComponent): run_retriever_postprocessor_cb: Optional[Callable[..., Any]] = Field(default=None) run_retriever_cb: Optional[Callable[..., Any]] = Field(default=None) run_postprocessor_cb: Optional[Callable[..., Any]] = Field(default=None) - run_data_prepare_cb: Optional[Callable[..., Any]] = Field(default=None) run_query_search_cb: Optional[Callable[..., Any]] = Field(default=None) def __init__( @@ -54,31 +55,24 @@ def __init__( name, origin_json=None, idx=None, - documents_cache=None, ): super().__init__(name=name, comp_type=CompType.PIPELINE) if self.name == "" or self.name is None: self.name = self.idx if idx is not None: self.idx = str(idx) - if documents_cache is not None: - self.documents_cache = documents_cache - else: - self.documents_cache = {} - self.generator = [] self.enable_benchmark = os.getenv("ENABLE_BENCHMARK", "False").lower() == "true" self.run_pipeline_cb = run_pipeline self.run_retriever_postprocessor_cb = run_retrieve_postprocess self.run_retriever_cb = run_retrieve self.run_postprocessor_cb = run_postprocess self.run_generator_cb = run_generator - self.run_data_prepare_cb = run_simple_doc self.run_query_search_cb = run_query_search - self._node_changed = False - self._index_changed = False - self._index_to_retriever_updated = True - self._origin_json = origin_json + self._origin_json = origin_json if origin_json is not None else "{}" + self.retriever_type = "" + self.retrieve_topk = 0 + self.retrievers = [] # TODO: consider race condition @property @@ -92,65 +86,10 @@ def update_pipeline_json(self, pipeline_dict): origin_json[k] = v self._origin_json = json.dumps(origin_json) - @property - def node_changed(self) -> bool: - return self._node_changed - - def reset_node_status(self) -> bool: - self._node_changed = False - self._index_changed = False - self._index_to_retriever_updated = True - - def check_active(self, nodelist, kb_name): - if self._node_changed: - if not self._index_changed: - print("Reinitializing indexer ...") - self.indexer.reinitialize_indexer(kb_name) - self._index_changed = True - self._index_to_retriever_updated = False - - if nodelist is not None and len(nodelist) > 0: - self.update_nodes(nodelist) - self._node_changed = False - - # Due to limitation, need to update retriever's db after reinitialize_indexer() - if self._index_changed and not self._index_to_retriever_updated: - self.update_indexer_to_retriever() - self._index_changed = False - self._index_to_retriever_updated = True - - self.reset_node_status() - - # TODO: update doc changes - # TODO: more operations needed, add, del, modify - def update_nodes(self, nodes): - if self.indexer is not None: - self.indexer.insert_nodes(nodes) - - def update_indexer_to_retriever(self): - if self.indexer is not None and self.retriever is not None: - old_retriever = self.retriever - retriever_type = old_retriever.comp_subtype - similarity_top_k = old_retriever.topk - match retriever_type: - case RetrieverType.VECTORSIMILARITY: - new_retriever = VectorSimRetriever(self.indexer, similarity_top_k=similarity_top_k) - case RetrieverType.AUTOMERGE: - new_retriever = AutoMergeRetriever(self.indexer, similarity_top_k=similarity_top_k) - case RetrieverType.BM25: - new_retriever = SimpleBM25Retriever(self.indexer, similarity_top_k=similarity_top_k) - case _: - new_retriever = old_retriever - - self.retriever = new_retriever - # Implement abstract run function # callback dispatcher async def run(self, **kwargs) -> Any: if "cbtype" in kwargs: - if kwargs["cbtype"] == CallbackType.DATAPREP: - if "docs" in kwargs: - return await self.run_data_prepare_cb(self, docs=kwargs["docs"]) if kwargs["cbtype"] == CallbackType.RETRIEVE_POSTPROCESS: if "chat_request" in kwargs: return await self.run_retriever_postprocessor_cb(self, chat_request=kwargs["chat_request"]) @@ -178,107 +117,22 @@ async def run(self, **kwargs) -> Any: if "chat_request" in kwargs: return await self.run_query_search_cb(self, chat_request=kwargs["chat_request"]) - def update(self, node_parser=None, indexer=None, retriever=None, postprocessor=None, generator=None): - if node_parser is not None: - self.node_parser = node_parser - if indexer is not None: - self.indexer = indexer - if retriever is not None: - self.retriever = retriever + def update(self, retrievers=None, postprocessor=None, generator=None): + if retrievers is not None: + self.retrievers = retrievers if postprocessor is not None: self.postprocessor = postprocessor if generator is not None: self.generator = generator - def add_docs_to_list(self, kb_name, file_paths): - if self.indexer.comp_subtype != "milvus_vector": - return None - target_config = self.connect_target_config() - if kb_name not in self.documents_cache: - self.documents_cache[kb_name] = {"files": [], "config": target_config} - if isinstance(file_paths, str): - file_paths = [file_paths] - self.documents_cache[kb_name]["files"].extend(file_paths) - - def del_docs_to_list(self, kb_name, file_paths): - if kb_name not in self.documents_cache: - return None - if isinstance(file_paths, str): - file_paths = [file_paths] - for file_path in file_paths: - if file_path in self.documents_cache[kb_name]["files"]: - self.documents_cache[kb_name]["files"].remove(file_path) - - def clear_document_cache(self, kb_name): - if kb_name in self.documents_cache: - del self.documents_cache[kb_name] - - def compare_file_lists(self, kb_name, current_files): - self.add_docs_to_list(kb_name, []) - target_config = self.connect_target_config() - if self.documents_cache[kb_name]["config"] == target_config: - diff = self.compare_mappings(self.documents_cache[kb_name]["files"], current_files) - else: - self.documents_cache[kb_name] = {"files": [], "config": self.connect_target_config()} - diff = {"add_docs": current_files} - return diff - - def compare_mappings(self, stored_files, new_files): - stored = set(stored_files) - new = set(new_files) - return {"add_docs": list(new - stored), "del_docs": list(stored - new)} - - def connect_target_config(self): - target_config = "" - if self.node_parser.comp_subtype == NodeParserType.SIMPLE: - target_config = ( - "simple" - + str(self.node_parser.chunk_size) - + str(self.node_parser.chunk_overlap) - + self.indexer.model.model_id - ) - elif self.node_parser.comp_subtype == NodeParserType.SENTENCEWINDOW: - target_config = "sentencewindow" + str(self.node_parser.window_size) + self.indexer.model.model_id - elif self.node_parser.comp_subtype == NodeParserType.HIERARCHY: - target_config = "hierarchical" + self.indexer.model.model_id - elif self.node_parser.comp_subtype == NodeParserType.UNSTRUCTURED: - target_config = ( - "target_config" - + str(self.node_parser.chunk_size) - + str(self.node_parser.chunk_overlap) - + self.indexer.model.model_id - ) - return target_config - - def nodes_to_document(self, node_dict: dict): - nodes = [] - for node_info in node_dict.values(): - nodes.append({"start": int(node_info["start_char_idx"]), "text": node_info["text"]}) - nodes_sorted = sorted(nodes, key=lambda x: x["start"]) - if not nodes_sorted: - return "" - merged_text = nodes_sorted[0]["text"] - for i in range(1, len(nodes_sorted)): - prev_text = merged_text - curr_text = nodes_sorted[i]["text"] - max_possible_overlap = min(len(prev_text), len(curr_text)) - overlap_len = 0 - for j in range(max_possible_overlap, 0, -1): - if prev_text.endswith(curr_text[:j]): - overlap_len = j - break - merged_text += curr_text[overlap_len:] - return merged_text - @model_serializer def ser_model(self): + retriever_config = self.retrievers[0] if self.retrievers else None set = { "idx": self.idx, "name": self.name, "comp_type": self.comp_type, - "node_parser": self.node_parser, - "indexer": self.indexer, - "retriever": self.retriever, + "retriever": retriever_config, "postprocessor": self.postprocessor, "generator": self.generator, "status": self.status, @@ -287,11 +141,6 @@ def ser_model(self): def model_existed(self, model_id: str) -> bool: # judge if the given model is existed in a pipeline by model_id - if self.indexer: - if hasattr(self.indexer, "_embed_model") and self.indexer._embed_model.model_id == model_id: - return True - if hasattr(self.indexer, "_llm") and self.indexer._llm.model_id == model_id: - return True if self.postprocessor: for processor in self.postprocessor: if hasattr(processor, "model_id") and processor.model_id == model_id: @@ -312,6 +161,56 @@ def get_generator(self, generator_type: str) -> Optional[BaseComponent]: return gen return None + def update_retriever_config(self, retriever_type: str, retrieve_topk: int): + self.retriever_type = retriever_type + self.retrieve_topk = retrieve_topk + + def update_retriever_list(self, active_kbs): + self.clear_retrievers() + for active_kb in active_kbs: + indexer = active_kb.indexer + if indexer is not None: + similarity_top_k = self.retrieve_topk + retriever = None + if active_kb.comp_subtype == "kbadmin_kb": + # For kbadmin_kb, only KBadminRetriever is supported + retriever = KBadminRetriever(indexer, similarity_top_k=similarity_top_k) + else: + match self.retriever_type: + case RetrieverType.VECTORSIMILARITY: + retriever = VectorSimRetriever(indexer, similarity_top_k=similarity_top_k) + case RetrieverType.AUTOMERGE: + retriever = AutoMergeRetriever(indexer, similarity_top_k=similarity_top_k) + case RetrieverType.BM25: + retriever = SimpleBM25Retriever(indexer, similarity_top_k=similarity_top_k) + case _: + raise ValueError(f"Retriever type {self.retriever_type} not supported") + if retriever: + self.retrievers.append(retriever) + + def update_retriever(self, kb, prev_indexer): + indexer = kb.indexer + for i, retriever in enumerate(self.retrievers): + if prev_indexer == retriever._index: + similarity_top_k = self.retrieve_topk + if kb.comp_subtype == "kbadmin_kb": + # For kbadmin_kb, only KBadminRetriever is supported + retriever = KBadminRetriever(indexer, similarity_top_k=similarity_top_k) + else: + match self.retriever_type: + case RetrieverType.VECTORSIMILARITY: + retriever = VectorSimRetriever(indexer, similarity_top_k=similarity_top_k) + case RetrieverType.AUTOMERGE: + retriever = AutoMergeRetriever(indexer, similarity_top_k=similarity_top_k) + case RetrieverType.BM25: + retriever = SimpleBM25Retriever(indexer, similarity_top_k=similarity_top_k) + case _: + raise ValueError(f"Retriever type {self.retriever_type} not supported") + break + + def clear_retrievers(self): + self.retrievers = [] + def create_freechat_gen_from_chatqna_gen(self) -> bool: if len(self.generator) == 0 or self.generator[0].comp_subtype != GeneratorType.CHATQNA: return False @@ -340,7 +239,9 @@ async def run_retrieve(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any if pl.enable_benchmark: benchmark_index = pl.benchmark.init_benchmark_data() start = time.perf_counter() - retri_res = pl.retriever.run(query=query, top_k=top_k) + retri_res = [] + for retriever in pl.retrievers: + retri_res.extend(retriever.run(query=query, top_k=top_k)) if pl.enable_benchmark: pl.benchmark.update_benchmark_data(benchmark_index, CompType.RETRIEVER, time.perf_counter() - start) contexts[CompType.RETRIEVER] = retri_res @@ -374,7 +275,9 @@ async def run_retrieve_postprocess(pl: Pipeline, chat_request: ChatCompletionReq if pl.enable_benchmark: benchmark_index = pl.benchmark.init_benchmark_data() start = time.perf_counter() - retri_res = pl.retriever.run(query=query, top_k=top_k) + retri_res = [] + for retriever in pl.retrievers: + retri_res.extend(retriever.run(query=query, top_k=top_k)) if pl.enable_benchmark: pl.benchmark.update_benchmark_data(benchmark_index, CompType.RETRIEVER, time.perf_counter() - start) contexts[CompType.RETRIEVER] = retri_res @@ -391,25 +294,6 @@ async def run_retrieve_postprocess(pl: Pipeline, chat_request: ChatCompletionReq return contexts -async def run_simple_doc(pl: Pipeline, docs: List[Document]) -> Any: - start = 0 - if pl.enable_benchmark: - benchmark_index = pl.benchmark.init_benchmark_data() - start = time.perf_counter() - n = pl.node_parser.run(docs=docs) - if pl.indexer is not None: - pl.indexer.insert_nodes(n) - if pl.enable_benchmark: - benchmark_data = ( - pl.benchmark.get_benchmark_data(benchmark_index, CompType.NODEPARSER) + time.perf_counter() - start - ) - pl.benchmark.update_benchmark_data(benchmark_index, CompType.NODEPARSER, benchmark_data) - - benchmark_data = pl.benchmark.get_benchmark_data(benchmark_index, BenchType.CHUNK_NUM) + len(n) - pl.benchmark.update_benchmark_data(benchmark_index, BenchType.CHUNK_NUM, benchmark_data) - return n - - async def run_query_search(pl: Pipeline, chat_request: ChatCompletionRequest) -> Any: query = chat_request.messages @@ -437,18 +321,24 @@ async def run_pipeline( benchmark_index = pl.benchmark.init_benchmark_data() contexts = {} retri_res = [] - active_kb = chat_request.user if chat_request.user else None + active_kbs = chat_request.user if chat_request.user else [] enable_rag_retrieval = ( chat_request.chat_template_kwargs.get("enable_rag_retrieval", True) if chat_request.chat_template_kwargs else True ) - if not active_kb: - enable_rag_retrieval = False - elif pl.retriever.comp_subtype == "kbadmin_retriever" and active_kb.comp_subtype == "origin_kb": - enable_rag_retrieval = False - elif pl.retriever.comp_subtype != "kbadmin_retriever" and active_kb.comp_subtype == "kbadmin_kb": + if not active_kbs: enable_rag_retrieval = False + # If using multiple knowledge bases, unstructured node parser cannot work with other types of node parser + np_types = set() + for kb in active_kbs: + if kb.comp_subtype == "kbadmin_kb": + np_types.add("kbadmin_node_parser") + else: + np_types.add(kb.node_parser.comp_subtype) + if len(np_types) > 1 and NodeParserType.UNSTRUCTURED in np_types: + raise ValueError("unstructured node parser cannot work with other types of node parser") + np_type = next(iter(np_types), None) query = chat_request.messages sub_questionss_result = None experience_status = True if chat_request.tool_choice == "auto" else False @@ -466,10 +356,12 @@ async def run_pipeline( start = time.perf_counter() top_k = ( None - if chat_request.k == pl.retriever.topk or chat_request.k != 0 or chat_request.k is None + if chat_request.k == pl.retrievers[0].topk or chat_request.k != 0 or chat_request.k is None else chat_request.k ) - retri_res = pl.retriever.run(query=query, top_k=top_k) + retri_res = [] + for retriever in pl.retrievers: + retri_res.extend(retriever.run(query=query, top_k=top_k)) if pl.enable_benchmark: pl.benchmark.update_benchmark_data(benchmark_index, CompType.RETRIEVER, time.perf_counter() - start) start = time.perf_counter() @@ -494,7 +386,6 @@ async def run_pipeline( _, prompt_str = target_generator.query_transform(chat_request, retri_res) input_token_size = pl.benchmark.cal_input_token_size(prompt_str) - np_type = pl.node_parser.comp_subtype if pl.enable_benchmark: start = time.perf_counter() if target_generator.inference_type == InferenceType.LOCAL: @@ -519,7 +410,12 @@ async def run_pipeline( async def run_generator( pl: Pipeline, chat_request: ChatCompletionRequest, generator_type: str = GeneratorType.CHATQNA ) -> Any: - np_type = pl.node_parser.comp_subtype + active_kbs = chat_request.user if chat_request.user else [] + # If using multiple knowledge bases, unstructured node parser cannot work with other types of node parser + np_types = {kb.node_parser.comp_subtype for kb in active_kbs} + if len(np_types) > 1 and NodeParserType.UNSTRUCTURED in np_types: + raise ValueError("unstructured node parser cannot work with other types of node parser") + np_type = active_kbs[0].node_parser.comp_subtype if active_kbs else None target_generator = pl.get_generator(generator_type) if target_generator is None: raise ValueError(f"No Generator ({generator_type}) Specified") diff --git a/EdgeCraftRAG/edgecraftrag/components/retriever.py b/EdgeCraftRAG/edgecraftrag/components/retriever.py index cdd3fe0bc2..62ee1b5630 100644 --- a/EdgeCraftRAG/edgecraftrag/components/retriever.py +++ b/EdgeCraftRAG/edgecraftrag/components/retriever.py @@ -6,14 +6,11 @@ import requests from edgecraftrag.base import BaseComponent, CompType, RetrieverType -from langchain_milvus import Milvus -from langchain_openai import OpenAIEmbeddings from llama_index.core.indices.vector_store.retrievers import VectorIndexRetriever from llama_index.core.retrievers import AutoMergingRetriever from llama_index.core.schema import BaseNode, Document, NodeWithScore from llama_index.retrievers.bm25 import BM25Retriever from pydantic import model_serializer -from pymilvus import Collection, MilvusException, connections, utility class VectorSimRetriever(BaseComponent, VectorIndexRetriever): @@ -112,10 +109,13 @@ def __init__(self, indexer, **kwargs): ) self._docstore = indexer._docstore self.topk = kwargs["similarity_top_k"] + self._index = indexer def run(self, **kwargs) -> Any: for k, v in kwargs.items(): if k == "query": + if self._index.comp_subtype == "milvus_vector": + raise NotImplementedError("not support BM25 retriever for Milvus vector store") top_k = kwargs["top_k"] if kwargs["top_k"] else self.topk nodes = cast(List[BaseNode], list(self._docstore.docs.values())) similarity_top_k = min(len(nodes), top_k) @@ -141,37 +141,13 @@ def __init__(self, indexer, **kwargs): comp_type=CompType.RETRIEVER, comp_subtype=RetrieverType.KBADMIN_RETRIEVER, ) - self.vector_db = None + self.vector_db = indexer.vector_db self.collection_name = None self.topk = kwargs.get("similarity_top_k", 30) - self.KBADMIN_MILVUS_URL = indexer.vector_url - self.CONNECTION_ARGS = {"uri": indexer.vector_url} - self.vector_field = "q_1024_vec" - self.text_field = "content_with_weight" self.embedding_model_name = indexer.embed_model - self.embedding_url = indexer.kbadmin_embedding_url + "/v3" - self.embedding = OpenAIEmbeddings( - model=self.embedding_model_name, - api_key="unused", - base_url=self.embedding_url, - tiktoken_enabled=False, - embedding_ctx_length=510, - ) - - def config_kbadmin_milvus(self, knowledge_name): - collection_name = knowledge_name - if not kbs_rev_maps: - get_kbs_info(self.CONNECTION_ARGS) - collection_name = kbs_rev_maps[collection_name] - self.vector_db = Milvus( - self.embedding, - connection_args=self.CONNECTION_ARGS, - collection_name=collection_name, - vector_field=self.vector_field, - text_field=self.text_field, - enable_dynamic_field=True, - index_params={"index_type": "FLAT", "metric_type": "IP", "params": {}}, - ) + self.embedding_url = indexer.kbadmin_embedding_url + self.embedding = indexer.embedding + self._index = indexer def similarity_search_with_embedding(self, query: str, k) -> list[tuple[Document, float]]: url = self.embedding_url + "/embeddings" @@ -201,70 +177,5 @@ def run(self, **kwargs) -> Any: @model_serializer def ser_model(self): - set = {"idx": self.idx, "retriever_type": self.comp_subtype, "CONNECTION_ARGS": self.CONNECTION_ARGS} + set = {"idx": self.idx, "retriever_type": self.comp_subtype} return set - - -# global kbs maps. -global kbs_rev_maps -kbs_rev_maps = {} - - -def get_kbs_info(CONNECTION_ARGS): - alias = "default" - try: - connections.connect("default", **CONNECTION_ARGS) - collections = utility.list_collections() - all_kb_infos = {} - new_infos = {} - for kb in collections: - collection = Collection(kb) - collection.load() - try: - if any(field.name == "kb_id" for field in collection.schema.fields): - docs = collection.query( - expr="pk != 0", - output_fields=["kb_name", "kb_id", "docnm_kwd"], - timeout=10, - ) - else: - docs = collection.query( - expr="pk != 0", - output_fields=["filename"], - timeout=10, - ) - collection.release() - except MilvusException as e: - continue - this_kbinfo = {} - for doc in docs: - try: - if "kb_name" in doc: - if not this_kbinfo: - this_kbinfo["name"] = doc["kb_name"] - this_kbinfo["uuid"] = doc["kb_id"] - this_kbinfo["files"] = set([doc["docnm_kwd"]]) - else: - this_kbinfo["files"].add(doc["docnm_kwd"]) - else: - if not this_kbinfo: - this_kbinfo["name"] = kb - this_kbinfo["uuid"] = "" - this_kbinfo["files"] = set([doc["filename"]]) - else: - this_kbinfo["files"].add(doc["filename"]) - except KeyError: - this_kbinfo = None - break - if this_kbinfo: - unique_files = list(this_kbinfo["files"]) - this_kbinfo["files"] = unique_files - new_infos[kb] = this_kbinfo - all_kb_infos.update(new_infos) - kbs_rev_maps.clear() - for kb_id in all_kb_infos: - kbs_rev_maps[all_kb_infos[kb_id]["name"]] = kb_id - return kbs_rev_maps - finally: - if connections.has_connection(alias): - connections.disconnect(alias) diff --git a/EdgeCraftRAG/edgecraftrag/config_repository.py b/EdgeCraftRAG/edgecraftrag/config_repository.py index d00d01596d..6e7cb52a5d 100644 --- a/EdgeCraftRAG/edgecraftrag/config_repository.py +++ b/EdgeCraftRAG/edgecraftrag/config_repository.py @@ -292,7 +292,6 @@ async def save_pipeline_configurations(operation: str = None, pipeline=None): if chatqna_gen: if GeneratorType.CHATQNA in gens_data: gens_data[GeneratorType.CHATQNA]["prompt_content"] = chatqna_gen.prompt_content - target_data["documents_cache"] = pipeline.documents_cache target_data["active"] = pipeline.status.active if pipeline_milvus_repo: @@ -340,17 +339,14 @@ async def save_knowledge_configurations(operation: str = None, kb=None): try: if not kb: return {"message": "Missing knowledgebase data"} - target_kb = { - "idx": kb.idx, - "name": kb.name, - "description": kb.description, - "active": kb.active, - "file_paths": kb.file_paths, - "comp_type": kb.comp_type, - "comp_subtype": kb.comp_subtype, - "experience_active": kb.experience_active, - "all_document_maps": kb.all_document_maps, - } + json_str = kb.get_knowledge_json + # Node parers and indexer is loaded from json file + target_kb = json.loads(json_str) + target_kb["idx"] = kb.idx + target_kb["description"] = kb.description + target_kb["experience_active"] = kb.experience_active + target_kb["all_document_maps"] = kb.all_document_maps + target_kb["file_paths"] = kb.file_paths target_idx = target_kb.get("idx") if not target_idx: return {"message": "Missing 'idx' in knowledgebase data"} diff --git a/EdgeCraftRAG/edgecraftrag/controllers/filemgr.py b/EdgeCraftRAG/edgecraftrag/controllers/filemgr.py index 0278f1f6ac..839ac23ef7 100644 --- a/EdgeCraftRAG/edgecraftrag/controllers/filemgr.py +++ b/EdgeCraftRAG/edgecraftrag/controllers/filemgr.py @@ -3,6 +3,7 @@ import asyncio import os +from pathlib import Path from typing import Any, Callable, List, Optional from edgecraftrag.base import BaseMgr @@ -61,11 +62,13 @@ def get_all_docs(self) -> List[Document]: all_docs.extend(file.documents) return all_docs - def get_docs_by_file(self, name) -> List[Document]: + def get_docs_by_file(self, file_path) -> List[Document]: + name = Path(file_path).name file = self.get_file_by_name_or_id(name) return file.documents if file else [] - def del_file(self, name): + def del_file(self, file_path): + name = Path(file_path).name file = self.get_file_by_name_or_id(name) if file: self.remove(file.idx) @@ -73,7 +76,8 @@ def del_file(self, name): else: return False - def update_file(self, name): + def update_file(self, file_path): + name = Path(file_path).name file = self.get_file_by_name_or_id(name) if file: self.remove(file.idx) diff --git a/EdgeCraftRAG/edgecraftrag/controllers/knowledge_basemgr.py b/EdgeCraftRAG/edgecraftrag/controllers/knowledge_basemgr.py index 233a2bd5bd..9dc91533cd 100644 --- a/EdgeCraftRAG/edgecraftrag/controllers/knowledge_basemgr.py +++ b/EdgeCraftRAG/edgecraftrag/controllers/knowledge_basemgr.py @@ -1,6 +1,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import gc from typing import Any, Dict, List, Optional from edgecraftrag.api_schema import KnowledgeBaseCreateIn @@ -12,7 +13,7 @@ class KnowledgeManager(BaseMgr): def __init__(self): super().__init__() - self.active_knowledge_idx: Optional[str] = None + self.active_knowledge_idx: Optional[List[str]] = [] self.active_experience_idx: Optional[str] = None def get_knowledge_base_by_name_or_id(self, name: str): @@ -21,11 +22,11 @@ def get_knowledge_base_by_name_or_id(self, name: str): return kb raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="knowledge base does not exist") - def get_active_knowledge_base(self) -> Optional[Knowledge]: - if self.active_knowledge_idx: - return self.get_knowledge_base_by_name_or_id(self.active_knowledge_idx) - else: - return None + def get_active_knowledge_base(self) -> Optional[List[Knowledge]]: + active_kbs = [] + for idx in self.active_knowledge_idx: + active_kbs.append(self.get_knowledge_base_by_name_or_id(idx)) + return active_kbs def get_active_experience(self): if self.active_experience_idx: @@ -37,12 +38,15 @@ def active_knowledge(self, knowledge: KnowledgeBaseCreateIn): kb = self.get_knowledge_base_by_name_or_id(knowledge.name) if kb.comp_type != "knowledge": raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail="Experience type cannot be active") - kb = self.get_knowledge_base_by_name_or_id(knowledge.name) - self.active_knowledge_idx = kb.idx if knowledge.active else None + self.active_knowledge_idx.append(kb.idx) + kb.active = True + return kb - for idx, comp in self.components.items(): - if isinstance(comp, Knowledge): - comp.active = idx == self.active_knowledge_idx + def deactive_knowledge(self, knowledge: KnowledgeBaseCreateIn): + kb = self.get_knowledge_base_by_name_or_id(knowledge.name) + if kb.idx in self.active_knowledge_idx: + self.active_knowledge_idx.remove(kb.idx) + kb.active = False return kb def active_experience(self, knowledge: KnowledgeBaseCreateIn): @@ -56,12 +60,12 @@ def active_experience(self, knowledge: KnowledgeBaseCreateIn): comp.experience_active = idx == self.active_experience_idx return kb - def create_knowledge_base(self, knowledge: KnowledgeBaseCreateIn) -> Knowledge: + def create_knowledge_base(self, knowledge: KnowledgeBaseCreateIn, origin_json: str) -> Knowledge: for _, kb in self.components.items(): if kb.name == knowledge.name: raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail="The knowledge base already exists.") if knowledge.comp_type == "experience": - for idx, kb in self.components.items(): + for _, kb in self.components.items(): if kb.comp_type == "experience": raise HTTPException( status_code=status.HTTP_409_CONFLICT, detail="Only one experience class can be created." @@ -80,6 +84,7 @@ def create_knowledge_base(self, knowledge: KnowledgeBaseCreateIn) -> Knowledge: experience_active=knowledge.experience_active, all_document_maps=knowledge.all_document_maps, file_paths=knowledge.file_paths, + origin_json=origin_json, ) self.add(kb) if knowledge.active: @@ -90,16 +95,26 @@ def create_knowledge_base(self, knowledge: KnowledgeBaseCreateIn) -> Knowledge: def delete_knowledge_base(self, name: str): kb = self.get_knowledge_base_by_name_or_id(name) + kb.node_parser = None + kb.indexer = None self.remove(kb.idx) + del kb + gc.collect() return "Knowledge base removed successfully" - def update_knowledge_base(self, knowledge) -> Knowledge: + def update_knowledge_base(self, knowledge, active_pl=None) -> Knowledge: kb = self.get_knowledge_base_by_name_or_id(knowledge.name) + if knowledge.active is None: + knowledge.active = False + kb.update_knowledge_json({"active": knowledge.active}) if kb.comp_type == "knowledge": if knowledge.description is not None: kb.description = knowledge.description - if knowledge.active is not None and kb.active != knowledge.active: - kb = self.active_knowledge(knowledge) + if kb.active != knowledge.active: + kb = self.active_knowledge(knowledge) if knowledge.active else self.deactive_knowledge(knowledge) + if active_pl: + active_pl.update_retriever_list(self.get_active_knowledge_base()) + if kb.comp_type == "experience": if knowledge.description is not None: kb.description = knowledge.description diff --git a/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py b/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py index 4b296c92ab..7eb00e386c 100644 --- a/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py +++ b/EdgeCraftRAG/edgecraftrag/controllers/pipelinemgr.py @@ -3,13 +3,12 @@ import asyncio import gc -from typing import Any, List +from typing import Any from comps.cores.proto.api_protocol import ChatCompletionRequest from edgecraftrag.base import BaseMgr, CallbackType +from edgecraftrag.components.knowledge_base import Knowledge from edgecraftrag.components.pipeline import Pipeline -from edgecraftrag.controllers.nodemgr import NodeMgr -from llama_index.core.schema import Document class PipelineMgr(BaseMgr): @@ -23,12 +22,11 @@ def __init__(self): def create_pipeline(self, request, origin_json: str): if isinstance(request, str): name = request - idx, documents_cache = None, None + idx = None else: name = request.name idx = request.idx - documents_cache = request.documents_cache - pl = Pipeline(name, origin_json, idx, documents_cache) + pl = Pipeline(name, origin_json, idx) self.add(pl) return pl @@ -46,8 +44,6 @@ def remove_pipeline_by_name_or_id(self, name: str): raise Exception("Unable to remove an active pipeline...") if self._prev_active_pipeline_name and pl.name == self._prev_active_pipeline_name: raise Exception("Pipeline is currently cached, unable to remove...") - pl.node_parser = None - pl.indexer = None pl.retriever = None pl.postprocessor = None pl.generator = None @@ -55,8 +51,6 @@ def remove_pipeline_by_name_or_id(self, name: str): pl.status = None pl.run_pipeline_cb = None pl.run_retriever_cb = None - pl.run_data_prepare_cb = None - pl._node_changed = None self.remove(pl.idx) del pl gc.collect() @@ -67,7 +61,7 @@ def get_pipelines(self, gen_type: str = None): return [pl for _, pl in self.components.items() if (pl.get_generator(gen_type) is not None)] return [pl for _, pl in self.components.items()] - def activate_pipeline(self, name: str, active: bool, nm: NodeMgr, kb_name: None, cache_prev: bool = False): + def activate_pipeline(self, name: str, active: bool, active_kbs: list[Knowledge], cache_prev: bool = False): pl = self.get_pipeline_by_name_or_id(name) if pl is None: return @@ -77,10 +71,10 @@ def activate_pipeline(self, name: str, active: bool, nm: NodeMgr, kb_name: None, self._active_pipeline = None return - nodelist = None - # if pl.node_changed: - # nodelist = nm.get_nodes(pl.node_parser.idx) - pl.check_active(nodelist, kb_name) + # update activate indexers for pipeline retriever + pl.update_retriever_list(active_kbs) + + # set previous active pipeline to inactive prevactive = self._active_pipeline if prevactive: prevactive.status.active = False @@ -99,10 +93,6 @@ def get_prev_active_pipeline_name(self) -> str: def clear_prev_active_pipeline_name(self): self._prev_active_pipeline_name = None - def notify_node_change(self): - for _, pl in self.components.items(): - pl.set_node_change() - async def run_pipeline(self, chat_request: ChatCompletionRequest) -> Any: ap = self.get_active_pipeline() if ap is not None: @@ -132,9 +122,3 @@ async def run_postprocess(self, chat_request: ChatCompletionRequest, contexts) - out = await ap.run(cbtype=CallbackType.POSTPROCESS, chat_request=chat_request, contexts=contexts) return out return -1 - - async def run_data_prepare(self, docs: List[Document]) -> Any: - ap = self.get_active_pipeline() - if ap is not None: - return await ap.run(cbtype=CallbackType.DATAPREP, docs=docs) - return -1 diff --git a/EdgeCraftRAG/tests/common.sh b/EdgeCraftRAG/tests/common.sh index 448238f48d..67388822c5 100644 --- a/EdgeCraftRAG/tests/common.sh +++ b/EdgeCraftRAG/tests/common.sh @@ -2,6 +2,10 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +ip_address=$(hostname -I | awk '{print $1}') +HOST_IP=$ip_address +EC_RAG_SERVICE_PORT=16010 + function validate_services() { local URL="$1" local EXPECTED_RESULT="$2" @@ -44,3 +48,20 @@ function check_gpu_usage() { exit 1 fi } + +function validate_knowledge() { + # add data to knowledge base + validate_services \ + "${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/knowledge" \ + "Done" \ + "data" \ + "edgecraftrag-server" \ + '@configs/test_kb.json' + + validate_services \ + "${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/knowledge/default_kb/files" \ + "Done" \ + "data" \ + "edgecraftrag-server" \ + '{"local_path":"/home/user/ui_cache"}' +} diff --git a/EdgeCraftRAG/tests/configs/test_data.json b/EdgeCraftRAG/tests/configs/test_data.json deleted file mode 100644 index 648ae9624d..0000000000 --- a/EdgeCraftRAG/tests/configs/test_data.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "text": "A test case for the rag pipeline. The test id is 1234567890. There are several tests in this test case. The first test is for node parser. There are 3 types of node parsers. Their names are Aa, Bb and Cc. The second test is for indexer. The indexer will do the indexing for the given nodes. The last test is for retriever. Retrieving text is based on similarity search." -} diff --git a/EdgeCraftRAG/tests/configs/test_kb.json b/EdgeCraftRAG/tests/configs/test_kb.json new file mode 100644 index 0000000000..576cabbb1d --- /dev/null +++ b/EdgeCraftRAG/tests/configs/test_kb.json @@ -0,0 +1,20 @@ +{ + "name": "default_kb", + "comp_subtype": "origin_kb", + "comp_type": "knowledge", + "node_parser": { + "chunk_size": 250, + "chunk_overlap": 48, + "parser_type": "simple" + }, + "indexer": { + "indexer_type": "faiss_vector", + "embedding_model": { + "model_id": "BAAI/bge-small-en-v1.5", + "model_path": "./models/BAAI/bge-small-en-v1.5", + "device": "auto", + "weight": "INT4" + } + }, + "active": "True" +} diff --git a/EdgeCraftRAG/tests/configs/test_pipeline_ipex_vllm.json b/EdgeCraftRAG/tests/configs/test_pipeline_ipex_vllm.json index e76922c154..bafe62350f 100644 --- a/EdgeCraftRAG/tests/configs/test_pipeline_ipex_vllm.json +++ b/EdgeCraftRAG/tests/configs/test_pipeline_ipex_vllm.json @@ -1,19 +1,5 @@ { "name": "rag_test_local_llm", - "node_parser": { - "chunk_size": 400, - "chunk_overlap": 48, - "parser_type": "simple" - }, - "indexer": { - "indexer_type": "faiss_vector", - "embedding_model": { - "model_id": "BAAI/bge-small-en-v1.5", - "model_path": "./models/BAAI/bge-small-en-v1.5", - "device": "auto", - "weight": "INT4" - } - }, "retriever": { "retriever_type": "vectorsimilarity", "retrieve_topk": 30 diff --git a/EdgeCraftRAG/tests/configs/test_pipeline_local_llm.json b/EdgeCraftRAG/tests/configs/test_pipeline_local_llm.json index c07e498169..985152ce2c 100644 --- a/EdgeCraftRAG/tests/configs/test_pipeline_local_llm.json +++ b/EdgeCraftRAG/tests/configs/test_pipeline_local_llm.json @@ -1,19 +1,5 @@ { "name": "rag_test_local_llm", - "node_parser": { - "chunk_size": 400, - "chunk_overlap": 48, - "parser_type": "simple" - }, - "indexer": { - "indexer_type": "faiss_vector", - "embedding_model": { - "model_id": "BAAI/bge-small-en-v1.5", - "model_path": "./models/BAAI/bge-small-en-v1.5", - "device": "auto", - "weight": "INT4" - } - }, "retriever": { "retriever_type": "vectorsimilarity", "retrieve_topk": 30 diff --git a/EdgeCraftRAG/tests/test_compose_on_arc.sh b/EdgeCraftRAG/tests/test_compose_on_arc.sh index 0e2148dc28..cdb2cd7e31 100755 --- a/EdgeCraftRAG/tests/test_compose_on_arc.sh +++ b/EdgeCraftRAG/tests/test_compose_on_arc.sh @@ -67,13 +67,8 @@ function validate_rag() { "edgecraftrag-server" \ '@configs/test_pipeline_local_llm.json' - # add data - validate_services \ - "${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/data" \ - "Done" \ - "data" \ - "edgecraftrag-server" \ - '@configs/test_data.json' + # add data to knowledge base + validate_knowledge # query validate_services \ diff --git a/EdgeCraftRAG/tests/test_compose_vllm_on_arc.sh b/EdgeCraftRAG/tests/test_compose_vllm_on_arc.sh index 50f458ae8a..ada4cdb7da 100755 --- a/EdgeCraftRAG/tests/test_compose_vllm_on_arc.sh +++ b/EdgeCraftRAG/tests/test_compose_vllm_on_arc.sh @@ -28,7 +28,7 @@ UI_UPLOAD_PATH="$WORKPATH/tests" HF_ENDPOINT=https://hf-mirror.com VLLM_SERVICE_PORT_A770=8086 -TENSOR_PARALLEL_SIZE=1 +TP=1 vLLM_ENDPOINT="http://${HOST_IP}:${VLLM_SERVICE_PORT_A770}" LLM_MODEL="Qwen/Qwen3-8B" LLM_MODEL_PATH="${MODEL_PATH}/${LLM_MODEL}" @@ -106,13 +106,8 @@ function validate_rag() { "edgecraftrag-server" \ '@configs/test_pipeline_ipex_vllm.json' - # add data - validate_services \ - "${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/data" \ - "Done" \ - "data" \ - "edgecraftrag-server" \ - '@configs/test_data.json' + # add data to knowledge base + validate_knowledge # query validate_services \ diff --git a/EdgeCraftRAG/tests/test_compose_vllm_on_arc_b60.sh b/EdgeCraftRAG/tests/test_compose_vllm_on_arc_b60.sh index 0d443b391e..d7245a9ca9 100755 --- a/EdgeCraftRAG/tests/test_compose_vllm_on_arc_b60.sh +++ b/EdgeCraftRAG/tests/test_compose_vllm_on_arc_b60.sh @@ -107,13 +107,8 @@ function validate_rag() { "edgecraftrag-server" \ '@configs/test_pipeline_ipex_vllm.json' - # add data - validate_services \ - "${HOST_IP}:${EC_RAG_SERVICE_PORT}/v1/data" \ - "Done" \ - "data" \ - "edgecraftrag-server" \ - '@configs/test_data.json' + # add data to knowledge base + validate_knowledge # query validate_services \ diff --git a/EdgeCraftRAG/tests/test_pipeline_ipex_vllm.json b/EdgeCraftRAG/tests/test_pipeline_ipex_vllm.json index e76922c154..bafe62350f 100644 --- a/EdgeCraftRAG/tests/test_pipeline_ipex_vllm.json +++ b/EdgeCraftRAG/tests/test_pipeline_ipex_vllm.json @@ -1,19 +1,5 @@ { "name": "rag_test_local_llm", - "node_parser": { - "chunk_size": 400, - "chunk_overlap": 48, - "parser_type": "simple" - }, - "indexer": { - "indexer_type": "faiss_vector", - "embedding_model": { - "model_id": "BAAI/bge-small-en-v1.5", - "model_path": "./models/BAAI/bge-small-en-v1.5", - "device": "auto", - "weight": "INT4" - } - }, "retriever": { "retriever_type": "vectorsimilarity", "retrieve_topk": 30 diff --git a/EdgeCraftRAG/tests/test_pipeline_local_llm.json b/EdgeCraftRAG/tests/test_pipeline_local_llm.json index c07e498169..985152ce2c 100644 --- a/EdgeCraftRAG/tests/test_pipeline_local_llm.json +++ b/EdgeCraftRAG/tests/test_pipeline_local_llm.json @@ -1,19 +1,5 @@ { "name": "rag_test_local_llm", - "node_parser": { - "chunk_size": 400, - "chunk_overlap": 48, - "parser_type": "simple" - }, - "indexer": { - "indexer_type": "faiss_vector", - "embedding_model": { - "model_id": "BAAI/bge-small-en-v1.5", - "model_path": "./models/BAAI/bge-small-en-v1.5", - "device": "auto", - "weight": "INT4" - } - }, "retriever": { "retriever_type": "vectorsimilarity", "retrieve_topk": 30 diff --git a/EdgeCraftRAG/tools/README.md b/EdgeCraftRAG/tools/README.md index 4299580577..0b2c2bde6d 100755 --- a/EdgeCraftRAG/tools/README.md +++ b/EdgeCraftRAG/tools/README.md @@ -1,28 +1,93 @@ -# EdgeCraftRAG one-click deployment script +# EdgeCraftRAG tool scripts -## run quick_start.sh +[中文版](README_zh.md) -### Run with Non-Interactive Mode: +This directory contains helper scripts for building images and starting EC-RAG services. -You need to set environment before execute script, you can refer to [Prepare env variables and configurations](../docker_compose/intel/gpu/arc/README.md#4-prepare-env-variables-and-configurations) for env details. +## Scripts + +- `quick_start.sh`: one-click startup for OpenVINO or vLLM deployment +- `build_images.sh`: build EC-RAG Docker images + +--- + +## quick_start.sh + +Run from the `EdgeCraftRAG` root directory: + +```bash +./tools/quick_start.sh +``` + +### Default behavior + +If no environment variables are provided, the script uses these defaults: + +```bash +MODEL_PATH=${WORKSPACE}/workspace/models +DOC_PATH=${WORKSPACE}/workspace +TMPFILE_PATH=${WORKSPACE}/workspace +LLM_MODEL=Qwen/Qwen3-8B +``` + +The script will also: + +- create and activate a Python virtual environment automatically +- install `python3-venv` if needed +- check whether required models exist under `MODEL_PATH` +- automatically download missing embedding, reranker, and LLM models +- print the UI access URL after startup completes + +### Non-interactive mode + +By default, non-interactive mode starts local OpenVINO services. + +```bash +./tools/quick_start.sh +``` + +You can override defaults with environment variables: + +```bash +export MODEL_PATH="${PWD}/workspace/models" +export DOC_PATH="${PWD}/workspace" +export TMPFILE_PATH="${PWD}/workspace" +export LLM_MODEL="Qwen/Qwen3-8B" +export HOST_IP="$(hostname -I | awk '{print $1}')" + +./tools/quick_start.sh +``` + +### Select deployment mode with `COMPOSE_PROFILES` + +#### OpenVINO on Core Ultra, B60 or A770 + +```bash +./tools/quick_start.sh +``` + +#### vLLM on Intel Arc A770 + +```bash +export COMPOSE_PROFILES=vLLM_A770 +./tools/quick_start.sh +``` + +#### vLLM on Intel Arc B60 + +```bash +export COMPOSE_PROFILES=vLLM_B60 +./tools/quick_start.sh +``` + +Optional B60/vLLM variables: ```bash -# set start up env, below is an example: -export MODEL_PATH="${PWD}/models" -export LLM_MODEL="Qwen/Qwen3-8B" # Your model id -export HOST_IP=$ip_address # Your host ip -export VIDEOGROUPID=$(getent group video | cut -d: -f3) -export RENDERGROUPID=$(getent group render | cut -d: -f3) -export no_proxy=${no_proxy},${HOST_IP},edgecraftrag,edgecraftrag-server -export NO_PROXY=${NO_PROXY},${HOST_IP},edgecraftrag,edgecraftrag-server -export DOC_PATH=${PWD}/tests # your DOC_PATH -export TMPFILE_PATH=${PWD}/tests # your TMPFILE_PATH -# set vllm launch env, below is an example: export VLLM_SERVICE_PORT_B60=8086 export DTYPE=float16 -export TP=1 # for multi GPU, you can change TP value +export TP=1 export DP=1 -export ZE_AFFINITY_MASK=0 # for multi GPU, you can export ZE_AFFINITY_MASK=0,1,2... +export ZE_AFFINITY_MASK=0 export ENFORCE_EAGER=1 export TRUST_REMOTE_CODE=1 export DISABLE_SLIDING_WINDOW=1 @@ -33,12 +98,84 @@ export DISABLE_LOG_REQUESTS=1 export MAX_MODEL_LEN=49152 export BLOCK_SIZE=64 export QUANTIZATION=fp8 -bash quick_start.sh ``` -### Run with Interactive Mode: +### Interactive mode + +```bash +bash -i ./tools/quick_start.sh +``` + +In interactive mode, the script prompts for: + +- deployment mode: `vLLM_A770`, `vLLM_B60`, or `ov` +- `HOST_IP` +- `DOC_PATH` +- `TMPFILE_PATH` +- `MODEL_PATH` +- `LLM_MODEL` +- optional vLLM runtime settings + +### Model check and auto-download + +The script checks these model locations automatically: + +#### Shared models + +```text +${MODEL_PATH}/BAAI/bge-small-en-v1.5 +${MODEL_PATH}/BAAI/bge-reranker-large +``` + +#### vLLM mode + +```text +${MODEL_PATH}/${LLM_MODEL} +``` + +#### OpenVINO mode + +```text +${MODEL_PATH}/${LLM_MODEL}/INT4_compressed_weights +``` + +If a required model is missing, the script downloads it automatically and prints a message. + +### UI access output + +After startup completes, the script prints: + +```text +Service launched successfully. +UI access URL: http://${HOST_IP}:8082 +If you are accessing from another machine, replace ${HOST_IP} with the server's reachable IP or hostname. +``` + +### Cleanup + +To stop and remove the deployed containers: + +```bash +./tools/quick_start.sh cleanup +``` + +--- + +## build_images.sh + +Build all images: ```bash -bash -i quick_start.sh -# In this mode, you can follow the Interactive guide to set env and finish start up +./tools/build_images.sh ``` + +Build selected images only: + +```bash +./tools/build_images.sh mega +./tools/build_images.sh server +./tools/build_images.sh ui +./tools/build_images.sh all +``` + +For full deployment details, refer to [../docs/Advanced_Setup.md](../docs/Advanced_Setup.md). diff --git a/EdgeCraftRAG/tools/README_zh.md b/EdgeCraftRAG/tools/README_zh.md new file mode 100644 index 0000000000..4a37187eb6 --- /dev/null +++ b/EdgeCraftRAG/tools/README_zh.md @@ -0,0 +1,181 @@ +# EdgeCraftRAG 工具脚本 + +[English](README.md) + +本目录包含用于构建镜像和启动 EC-RAG 服务的辅助脚本。 + +## 脚本 + +- `quick_start.sh`:一键启动 OpenVINO 或 vLLM 部署 +- `build_images.sh`:构建 EC-RAG Docker 镜像 + +--- + +## quick_start.sh + +请在 `EdgeCraftRAG` 根目录下运行: + +```bash +./tools/quick_start.sh +``` + +### 默认行为 + +如果未提供环境变量,脚本会使用以下默认值: + +```bash +MODEL_PATH=${WORKSPACE}/workspace/models +DOC_PATH=${WORKSPACE}/workspace +TMPFILE_PATH=${WORKSPACE}/workspace +LLM_MODEL=Qwen/Qwen3-8B +``` + +脚本还会自动执行以下操作: + +- 自动创建并激活 Python 虚拟环境 +- 在需要时安装 `python3-venv` +- 检查 `MODEL_PATH` 下必需模型是否存在 +- 自动下载缺失的 embedding、reranker 和 LLM 模型 +- 在启动完成后输出 UI 访问地址 + +### 非交互模式 + +默认情况下,非交互模式启动本地 OpenVINO 服务。 + +```bash +./tools/quick_start.sh +``` + +你也可以通过环境变量覆盖默认值: + +```bash +export MODEL_PATH="${PWD}/workspace/models" +export DOC_PATH="${PWD}/workspace" +export TMPFILE_PATH="${PWD}/workspace" +export LLM_MODEL="Qwen/Qwen3-8B" +export HOST_IP="$(hostname -I | awk '{print $1}')" + +./tools/quick_start.sh +``` + +### 使用 `COMPOSE_PROFILES` 选择部署模式 + +#### Core Ultra、B60 或 A770 上的 OpenVINO + +```bash +./tools/quick_start.sh +``` + +#### Intel Arc A770 上的 vLLM + +```bash +export COMPOSE_PROFILES=vLLM_A770 +./tools/quick_start.sh +``` + +#### Intel Arc B60 上的 vLLM + +```bash +export COMPOSE_PROFILES=vLLM_B60 +./tools/quick_start.sh +``` + +可选的 B60/vLLM 环境变量: + +```bash +export VLLM_SERVICE_PORT_B60=8086 +export DTYPE=float16 +export TP=1 +export DP=1 +export ZE_AFFINITY_MASK=0 +export ENFORCE_EAGER=1 +export TRUST_REMOTE_CODE=1 +export DISABLE_SLIDING_WINDOW=1 +export GPU_MEMORY_UTIL=0.8 +export NO_ENABLE_PREFIX_CACHING=1 +export MAX_NUM_BATCHED_TOKENS=8192 +export DISABLE_LOG_REQUESTS=1 +export MAX_MODEL_LEN=49152 +export BLOCK_SIZE=64 +export QUANTIZATION=fp8 +``` + +### 交互模式 + +```bash +bash -i ./tools/quick_start.sh +``` + +在交互模式下,脚本会提示你输入: + +- 部署模式:`vLLM_A770`、`vLLM_B60` 或 `ov` +- `HOST_IP` +- `DOC_PATH` +- `TMPFILE_PATH` +- `MODEL_PATH` +- `LLM_MODEL` +- 可选的 vLLM 运行参数 + +### 模型检查与自动下载 + +脚本会自动检查以下模型路径: + +#### 公共模型 + +```text +${MODEL_PATH}/BAAI/bge-small-en-v1.5 +${MODEL_PATH}/BAAI/bge-reranker-large +``` + +#### vLLM 模式 + +```text +${MODEL_PATH}/${LLM_MODEL} +``` + +#### OpenVINO 模式 + +```text +${MODEL_PATH}/${LLM_MODEL}/INT4_compressed_weights +``` + +如果缺少必需模型,脚本会自动下载并输出提示信息。 + +### UI 访问输出 + +启动完成后,脚本会输出: + +```text +Service launched successfully. +UI access URL: http://${HOST_IP}:8082 +If you are accessing from another machine, replace ${HOST_IP} with your server's reachable IP or hostname. +``` + +### 清理 + +停止并移除已部署容器: + +```bash +./tools/quick_start.sh cleanup +``` + +--- + +## build_images.sh + +构建全部镜像: + +```bash +./tools/build_images.sh +``` + +只构建指定镜像: + +```bash +./tools/build_images.sh mega +./tools/build_images.sh server +./tools/build_images.sh ui +./tools/build_images.sh all +``` + +完整部署说明请参考 [../docs/Advanced_Setup_zh.md](../docs/Advanced_Setup_zh.md). diff --git a/EdgeCraftRAG/tools/build_images.sh b/EdgeCraftRAG/tools/build_images.sh new file mode 100755 index 0000000000..abfb0a42dc --- /dev/null +++ b/EdgeCraftRAG/tools/build_images.sh @@ -0,0 +1,85 @@ +#!/usr/bin/env bash +# Copyright (C) 2026 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +PROJECT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" + +cd "${PROJECT_DIR}" + +build_mega() { + echo "Building opea/edgecraftrag:latest" + docker build --no-cache --pull \ + --build-arg http_proxy="${http_proxy-}" \ + --build-arg https_proxy="${https_proxy-}" \ + --build-arg no_proxy="${no_proxy-}" \ + -t opea/edgecraftrag:latest \ + -f Dockerfile . +} + +build_server() { + echo "Building opea/edgecraftrag-server:latest" + docker build --no-cache --pull \ + --build-arg http_proxy="${http_proxy-}" \ + --build-arg https_proxy="${https_proxy-}" \ + --build-arg no_proxy="${no_proxy-}" \ + -t opea/edgecraftrag-server:latest \ + -f Dockerfile.server . +} + +build_ui() { + echo "Building opea/edgecraftrag-ui:latest" + docker build --no-cache --pull \ + --build-arg http_proxy="${http_proxy-}" \ + --build-arg https_proxy="${https_proxy-}" \ + --build-arg no_proxy="${no_proxy-}" \ + -t opea/edgecraftrag-ui:latest \ + -f ui/docker/Dockerfile.ui . +} + +print_usage() { + echo "Usage: ./tools/build_images.sh [mega|server|ui|all] ..." + echo "Examples:" + echo " ./tools/build_images.sh" + echo " ./tools/build_images.sh ui" + echo " ./tools/build_images.sh mega server" +} + +if [ "$#" -eq 0 ]; then + build_mega + build_server + build_ui + echo "All images built successfully." + exit 0 +fi + +for target in "$@"; do + case "$target" in + mega) + build_mega + ;; + server) + build_server + ;; + ui) + build_ui + ;; + all) + build_mega + build_server + build_ui + ;; + -h|--help|help) + print_usage + exit 0 + ;; + *) + echo "Unknown target: $target" + print_usage + exit 1 + ;; + esac +done + +echo "Requested image build(s) completed successfully." diff --git a/EdgeCraftRAG/tools/quick_start.sh b/EdgeCraftRAG/tools/quick_start.sh index 0d82bd6ab8..76da043fd0 100755 --- a/EdgeCraftRAG/tools/quick_start.sh +++ b/EdgeCraftRAG/tools/quick_start.sh @@ -4,27 +4,53 @@ set -e -WORKPATH=$(dirname "$(pwd)") +SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +WORKPATH=$(cd "${SCRIPT_DIR}/.." && pwd) ip_address=$(hostname -I | awk '{print $1}') HOST_IP=$ip_address +# global defaults to avoid docker compose warnings on unset variables +export HOST_IP=${HOST_IP:-"${ip_address}"} +export MODEL_PATH=${MODEL_PATH:-"${WORKPATH}/workspace/models"} +export LLM_MODEL=${LLM_MODEL:-"Qwen/Qwen3-8B"} +export DOC_PATH=${DOC_PATH:-"${WORKPATH}/workspace"} +export TMPFILE_PATH=${TMPFILE_PATH:-"${WORKPATH}/workspace"} +export MILVUS_ENABLED=${MILVUS_ENABLED:-"0"} +export CHAT_HISTORY_ROUND=${CHAT_HISTORY_ROUND:-"0"} + #use python venv -ENV_NAME="ecrag_venv" -python -m venv $ENV_NAME +ENV_NAME="${WORKPATH}/ecrag_venv" + +# check if python3-venv (ensurepip) is fully available; install if missing +if ! python3 -c "import ensurepip" &>/dev/null; then + echo "python3-venv (ensurepip) not found, installing..." + PY_VER=$(python3 -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')") + if command -v apt-get &>/dev/null; then + sudo apt-get install -y "python${PY_VER}-venv" + elif command -v dnf &>/dev/null; then + sudo dnf install -y python3-virtualenv + elif command -v yum &>/dev/null; then + sudo yum install -y python3-virtualenv + else + echo "ERROR: Cannot install python3-venv: unsupported package manager. Please install it manually." + exit 1 + fi +fi -# check venv -if [ ! -d "$ENV_NAME" ]; then - echo "Failed to create virtual environment" - exit 1 +# create venv if missing or broken (activate script absent) +if [ ! -f "${ENV_NAME}/bin/activate" ] && [ ! -f "${ENV_NAME}/Scripts/activate" ]; then + echo "Creating virtual environment at ${ENV_NAME}..." + rm -rf "${ENV_NAME}" + python3 -m venv "${ENV_NAME}" fi # activate venv -if [ -f "$ENV_NAME/bin/activate" ]; then - source $ENV_NAME/bin/activate -elif [ -f "$ENV_NAME/Scripts/activate" ]; then - source $ENV_NAME/Scripts/activate +if [ -f "${ENV_NAME}/bin/activate" ]; then + source "${ENV_NAME}/bin/activate" +elif [ -f "${ENV_NAME}/Scripts/activate" ]; then + source "${ENV_NAME}/Scripts/activate" else - echo "Failed to activate virtual environment" + echo "ERROR: Failed to activate virtual environment at ${ENV_NAME}" exit 1 fi @@ -42,6 +68,13 @@ get_enable_function() { echo ${user_input:-$default_value} } +print_ui_access_info() { + echo "" + echo "Service launched successfully." + echo "UI access URL: http://${HOST_IP}:8082" + echo "If you are accessing from another machine, replace ${HOST_IP} with the server's reachable IP or hostname." +} + function start_vllm_services() { COMPOSE_FILE="compose.yaml" echo "stop former service..." @@ -49,12 +82,12 @@ function start_vllm_services() { ip_address=$(hostname -I | awk '{print $1}') HOST_IP=$(get_user_input "host ip" "${ip_address}") - DOC_PATH=$(get_user_input "DOC_PATH" "$WORKPATH/tests") - TMPFILE_PATH=$(get_user_input "TMPFILE_PATH" "$WORKPATH/tests") + DOC_PATH=$(get_user_input "DOC_PATH" "$WORKPATH/workspace") + TMPFILE_PATH=$(get_user_input "TMPFILE_PATH" "$WORKPATH/workspace") MILVUS_ENABLED=$(get_enable_function "MILVUS DB(Enter 1 for enable)" "0") CHAT_HISTORY_ROUND=$(get_user_input "chat history round" "0") LLM_MODEL=$(get_user_input "your LLM model" "Qwen/Qwen3-8B") - MODEL_PATH=$(get_user_input "your model path" "${PWD}/models") + MODEL_PATH=$(get_user_input "your model path" "${WORKPATH}/workspace/models") read -p "Have you prepare models in ${MODEL_PATH}:(yes/no) [yes]" user_input user_input=${user_input:-"yes"} @@ -64,6 +97,7 @@ function start_vllm_services() { # Reranker: ${MODEL_PATH}/BAAI/bge-reranker-large # llm :${MODEL_PATH}/${LLM_MODEL} (从huggingface或modelscope下载的原始模型,而不是经过OpenVINO转换的模型!) echo "you skipped model downloading, please make sure you have prepared all models under ${MODEL_PATH}" + ensure_required_models_for_vllm else echo "you have not prepare models, starting to download models into ${MODEL_PATH}..." mkdir -p $MODEL_PATH @@ -85,7 +119,7 @@ function start_vllm_services() { # vllm ENV export VLLM_SERVICE_PORT_A770=8086 - read -p "Tensor parallel size(your tp size [1]), press Enter to confirm, or type a new value:" TENSOR_PARALLEL_SIZE; TENSOR_PARALLEL_SIZE=${TENSOR_PARALLEL_SIZE:-1} + read -p "Tensor parallel size(your tp size [1]), press Enter to confirm, or type a new value:" TENSOR_PARALLEL_SIZE; TP=${TP:-1} CCL_DG2_USM=$(get_user_input "Set USM (Core=1, Xeon=0, default=0)" 0) export HOST_IP=${HOST_IP} # export ENV @@ -97,7 +131,7 @@ function start_vllm_services() { export no_proxy="localhost, 127.0.0.1, 192.168.1.1, ${HOST_IP}" export MILVUS_ENABLED=${MILVUS_ENABLED} export CHAT_HISTORY_ROUND=${CHAT_HISTORY_ROUND} - export TENSOR_PARALLEL_SIZE=${TENSOR_PARALLEL_SIZE} + export TP=${TP} export CCL_DG2_USM=${CCL_DG2_USM} export VIDEOGROUPID=$(getent group video | cut -d: -f3) export RENDERGROUPID=$(getent group render | cut -d: -f3) @@ -116,7 +150,7 @@ function start_vllm_services() { n=$((n+1)) done rm -rf ipex-llm-serving-xpu-container.log - echo "service launched, please visit UI at ${HOST_IP}:8082" + print_ui_access_info } @@ -127,12 +161,12 @@ function start_services() { ip_address=$(hostname -I | awk '{print $1}') HOST_IP=$(get_user_input "host ip" "${ip_address}") - DOC_PATH=$(get_user_input "DOC_PATH" "$WORKPATH/tests") - TMPFILE_PATH=$(get_user_input "TMPFILE_PATH" "$WORKPATH/tests") + DOC_PATH=$(get_user_input "DOC_PATH" "$WORKPATH/workspace") + TMPFILE_PATH=$(get_user_input "TMPFILE_PATH" "$WORKPATH/workspace") MILVUS_ENABLED=$(get_enable_function "MILVUS DB(Enter 1 for enable)" "0") CHAT_HISTORY_ROUND=$(get_user_input "chat history round" "0") LLM_MODEL=$(get_user_input "your LLM model" "Qwen/Qwen3-8B") - MODEL_PATH=$(get_user_input "your model path" "${PWD}/models") + MODEL_PATH=$(get_user_input "your model path" "${WORKPATH}/workspace/models") read -p "Have you prepare models in ${MODEL_PATH}:(yes/no) [yes]" user_input user_input=${user_input:-"yes"} @@ -142,6 +176,7 @@ function start_services() { # Reranker: ${MODEL_PATH}/BAAI/bge-reranker-large # llm :${MODEL_PATH}/${LLM_MODEL}/INT4_compressed_weights echo "you skipped model downloading, please make sure you have prepared all models under ${MODEL_PATH}" + ensure_required_models_for_ov else read -p "you have not prepare models, do you need one-click model downloading into ${MODEL_PATH}:(yes/no) [yes]" your_input your_input=${your_input:-"yes"} @@ -185,6 +220,7 @@ function start_services() { COMPOSE_FILE="compose.yaml" echo "starting service..." docker compose -f $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE up -d + print_ui_access_info } @@ -200,35 +236,109 @@ function check_baai_folder() { fi } +ensure_openvino_tooling() { + if ! command -v optimum-cli >/dev/null 2>&1; then + echo "[Model Check] 'optimum-cli' not found, installing optimum-intel[openvino]..." + python -m pip install --upgrade-strategy eager "optimum-intel[openvino]" + fi +} + +ensure_huggingface_tooling() { + if ! command -v huggingface-cli >/dev/null 2>&1; then + echo "[Model Check] 'huggingface-cli' not found, installing huggingface_hub..." + python -m pip install huggingface_hub + fi +} + +ensure_embedding_and_reranker_models() { + local embedding_dir="${MODEL_PATH}/BAAI/bge-small-en-v1.5" + local reranker_dir="${MODEL_PATH}/BAAI/bge-reranker-large" + + if [ ! -f "${embedding_dir}/openvino_model.xml" ]; then + echo "[Model Check] Embedding model missing: ${embedding_dir}" + echo "[Model Check] Downloading embedding model..." + ensure_openvino_tooling + mkdir -p "${embedding_dir}" + optimum-cli export openvino -m BAAI/bge-small-en-v1.5 "${embedding_dir}" --task sentence-similarity + else + echo "[Model Check] Embedding model exists: ${embedding_dir}" + fi + + if [ ! -f "${reranker_dir}/openvino_model.xml" ]; then + echo "[Model Check] Reranker model missing: ${reranker_dir}" + echo "[Model Check] Downloading reranker model..." + ensure_openvino_tooling + mkdir -p "${reranker_dir}" + optimum-cli export openvino -m BAAI/bge-reranker-large "${reranker_dir}" --task text-classification + else + echo "[Model Check] Reranker model exists: ${reranker_dir}" + fi +} + +ensure_llm_model_for_vllm() { + local llm_dir="${MODEL_PATH}/${LLM_MODEL}" + if [ ! -f "${llm_dir}/config.json" ]; then + echo "[Model Check] vLLM LLM model missing: ${llm_dir}" + echo "[Model Check] Downloading LLM model '${LLM_MODEL}'..." + ensure_huggingface_tooling + mkdir -p "${llm_dir}" + huggingface-cli download "${LLM_MODEL}" --local-dir "${llm_dir}" + else + echo "[Model Check] vLLM LLM model exists: ${llm_dir}" + fi +} + +ensure_llm_model_for_ov() { + local ov_llm_dir="${MODEL_PATH}/${LLM_MODEL}/INT4_compressed_weights" + if [ ! -f "${ov_llm_dir}/openvino_model.xml" ]; then + echo "[Model Check] OpenVINO LLM model missing: ${ov_llm_dir}" + echo "[Model Check] Downloading and converting LLM model '${LLM_MODEL}' to INT4 OpenVINO..." + ensure_openvino_tooling + mkdir -p "${ov_llm_dir}" + optimum-cli export openvino --model "${LLM_MODEL}" "${ov_llm_dir}" --task text-generation-with-past --weight-format int4 --group-size 128 --ratio 0.8 + else + echo "[Model Check] OpenVINO LLM model exists: ${ov_llm_dir}" + fi +} + +ensure_required_models_for_vllm() { + ensure_embedding_and_reranker_models + ensure_llm_model_for_vllm +} + +ensure_required_models_for_ov() { + ensure_embedding_and_reranker_models + ensure_llm_model_for_ov +} + function quick_start_vllm_services() { - WORKPATH=$(dirname "$PWD") COMPOSE_FILE="compose.yaml" EC_RAG_SERVICE_PORT=16010 docker compose -f $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE down ip_address=$(hostname -I | awk '{print $1}') export HOST_IP=${HOST_IP:-"${ip_address}"} - export MODEL_PATH=${MODEL_PATH:-"${PWD}/models"} - export DOC_PATH=${DOC_PATH:-"$WORKPATH/tests"} - export TMPFILE_PATH=${TMPFILE_PATH:-"$WORKPATH/tests"} + export MODEL_PATH=${MODEL_PATH:-"${WORKPATH}/workspace/models"} + export DOC_PATH=${DOC_PATH:-"$WORKPATH/workspace"} + export TMPFILE_PATH=${TMPFILE_PATH:-"$WORKPATH/workspace"} export DP_NUM=${DP_NUM:-1} export MILVUS_ENABLED=${MILVUS_ENABLED:-1} export CHAT_HISTORY_ROUND=${CHAT_HISTORY_ROUND:-2} export HF_ENDPOINT=${HF_ENDPOINT:-https://hf-mirror.com} - export TENSOR_PARALLEL_SIZE=${TENSOR_PARALLEL_SIZE:-1} + export TP=${TP:-1} export MAX_NUM_SEQS=${MAX_NUM_SEQS:-64} export MAX_MODEL_LEN=${MAX_MODEL_LEN:-10240} export MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS:-10240} - export LOAD_IN_LOW_BIT=${LOAD_IN_LOW_BIT:-fp8} + export QUANTIZATION=${QUANTIZATION:-fp8} export CCL_DG2_USM=${CCL_DG2_USM:-0} export LLM_MODEL=${LLM_MODEL:-Qwen/Qwen3-8B} - export LLM_MODEL_PATH=${LLM_MODEL_PATH:-"${MODEL_PATH}/Qwen/Qwen3-8B"} + export LLM_MODEL_PATH=${LLM_MODEL_PATH:-"${MODEL_PATH}/${LLM_MODEL}"} export VIDEOGROUPID=$(getent group video | cut -d: -f3) export RENDERGROUPID=$(getent group render | cut -d: -f3) export VLLM_SERVICE_PORT_A770=8086 - check_baai_folder + ensure_required_models_for_vllm export HF_CACHE=${HF_CACHE:-"${HOME}/.cache"} export no_proxy="localhost, 127.0.0.1, 192.168.1.1, ${HOST_IP}" if [ ! -d "${HF_CACHE}" ]; then @@ -251,7 +361,7 @@ function quick_start_vllm_services() { n=$((n+1)) done rm -rf ipex-llm-serving-xpu-container.log - echo "service launched, please visit UI at ${HOST_IP}:8082" + print_ui_access_info } @@ -262,17 +372,17 @@ function quick_start_ov_services() { ip_address=$(hostname -I | awk '{print $1}') export HOST_IP=${HOST_IP:-"${ip_address}"} - export DOC_PATH=${DOC_PATH:-"$WORKPATH/tests"} - export TMPFILE_PATH=${TMPFILE_PATH:-"$WORKPATH/tests"} + export DOC_PATH=${DOC_PATH:-"$WORKPATH/workspace"} + export TMPFILE_PATH=${TMPFILE_PATH:-"$WORKPATH/workspace"} export MILVUS_ENABLED=${MILVUS_ENABLED:-1} export CHAT_HISTORY_ROUND=${CHAT_HISTORY_ROUND:-"0"} export LLM_MODEL=${LLM_MODEL:-"Qwen/Qwen3-8B"} - export MODEL_PATH=${MODEL_PATH:-"${PWD}/models"} + export MODEL_PATH=${MODEL_PATH:-"${WORKPATH}/workspace/models"} export VIDEOGROUPID=$(getent group video | cut -d: -f3) export RENDERGROUPID=$(getent group render | cut -d: -f3) export MAX_MODEL_LEN=5000 - check_baai_folder + ensure_required_models_for_ov export HF_CACHE=${HF_CACHE:-"${HOME}/.cache"} if [ ! -d "${HF_CACHE}" ]; then mkdir -p "${HF_CACHE}" @@ -287,23 +397,24 @@ function quick_start_ov_services() { echo "Starting service..." docker compose -f "$WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE" up -d + print_ui_access_info } function start_vLLM_B60_services() { COMPOSE_FILE="compose.yaml" echo "stop former service..." - export MODEL_PATH=${MODEL_PATH:-"${PWD}/models"} + export MODEL_PATH=${MODEL_PATH:-"${WORKPATH}/models"} docker compose -f $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE down ip_address=$(hostname -I | awk '{print $1}') HOST_IP=$(get_user_input "host ip" "${ip_address}") - DOC_PATH=$(get_user_input "DOC_PATH" "$WORKPATH/tests") - TMPFILE_PATH=$(get_user_input "TMPFILE_PATH" "$WORKPATH/tests") + DOC_PATH=$(get_user_input "DOC_PATH" "$WORKPATH/workspace") + TMPFILE_PATH=$(get_user_input "TMPFILE_PATH" "$WORKPATH/workspace") MILVUS_ENABLED=$(get_enable_function "MILVUS DB(Enter 1 for enable)" "0") CHAT_HISTORY_ROUND=$(get_user_input "chat history round" "0") LLM_MODEL=$(get_user_input "your LLM model" "Qwen/Qwen3-8B") - MODEL_PATH=$(get_user_input "your model path" "${PWD}/models") + MODEL_PATH=$(get_user_input "your model path" "${WORKPATH}/workspace/models") read -p "Have you prepare models in ${MODEL_PATH}:(yes/no) [yes]" user_input user_input=${user_input:-"yes"} @@ -313,6 +424,7 @@ function start_vLLM_B60_services() { # Reranker: ${MODEL_PATH}/BAAI/bge-reranker-large # llm :${MODEL_PATH}/${LLM_MODEL} (从huggingface或modelscope下载的原始模型,而不是经过OpenVINO转换的模型!) echo "you skipped model downloading, please make sure you have prepared all models under ${MODEL_PATH}" + ensure_required_models_for_vllm else echo "you have not prepare models, starting to download models into ${MODEL_PATH}..." mkdir -p $MODEL_PATH @@ -350,7 +462,7 @@ function start_vLLM_B60_services() { export no_proxy="localhost, 127.0.0.1, 192.168.1.1, ${HOST_IP}" export MILVUS_ENABLED=${MILVUS_ENABLED} export CHAT_HISTORY_ROUND=${CHAT_HISTORY_ROUND} - export SELECTED_XPU_0=${SELECTED_XPU_0} + export ZE_AFFINITY_MASK=${ZE_AFFINITY_MASK} export VIDEOGROUPID=$(getent group video | cut -d: -f3) export RENDERGROUPID=$(getent group render | cut -d: -f3) # export vllm ENV @@ -382,21 +494,20 @@ function start_vLLM_B60_services() { n=$((n+1)) done rm -rf ipex-llm-serving-xpu-container.log - echo "service launched, please visit UI at ${HOST_IP}:8082" + print_ui_access_info } function quick_start_vllm_B60_services() { - WORKPATH=$(dirname "$PWD") COMPOSE_FILE="compose.yaml" EC_RAG_SERVICE_PORT=16010 docker compose -f $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE down ip_address=$(hostname -I | awk '{print $1}') export HOST_IP=${HOST_IP:-"${ip_address}"} - export MODEL_PATH=${MODEL_PATH:-"${PWD}/models"} - export DOC_PATH=${DOC_PATH:-"$WORKPATH/tests"} - export TMPFILE_PATH=${TMPFILE_PATH:-"$WORKPATH/tests"} + export MODEL_PATH=${MODEL_PATH:-"${WORKPATH}/workspace/models"} + export DOC_PATH=${DOC_PATH:-"$WORKPATH/workspace"} + export TMPFILE_PATH=${TMPFILE_PATH:-"$WORKPATH/workspace"} export MILVUS_ENABLED=${MILVUS_ENABLED:-1} export CHAT_HISTORY_ROUND=${CHAT_HISTORY_ROUND:-2} export LLM_MODEL=${LLM_MODEL:-Qwen/Qwen3-8B} @@ -419,7 +530,7 @@ function quick_start_vllm_B60_services() { export QUANTIZATION=${QUANTIZATION:-fp8} - check_baai_folder + ensure_required_models_for_vllm export no_proxy="localhost, 127.0.0.1, 192.168.1.1, ${HOST_IP}" sudo chown -R 1000:1000 ${MODEL_PATH} ${DOC_PATH} ${TMPFILE_PATH} docker compose --profile b60 -f $WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE up -d @@ -434,14 +545,27 @@ function quick_start_vllm_B60_services() { n=$((n+1)) done rm -rf ipex-llm-serving-xpu-container.log - echo "service launched, please visit UI at ${HOST_IP}:8082" + print_ui_access_info +} + + +function quick_cleanup_services() { + COMPOSE_FILE="compose.yaml" + echo "Stopping EdgeCraftRAG services..." + docker compose -f "$WORKPATH/docker_compose/intel/gpu/arc/$COMPOSE_FILE" down + echo "Cleanup completed." } function main { + if [[ "${1:-}" == "cleanup" ]]; then + quick_cleanup_services + exit 0 + fi + if [[ $- == *i* ]]; then - read -p "Do you want to start vLLM or local OpenVINO services? (vLLM_A770/vLLM_B60/ov) [vLLM_A770]: " user_input - user_input=${user_input:-"vLLM_A770"} + read -p "Do you want to start vLLM or local OpenVINO services? (vLLM_A770/vLLM_B60/ov) [ov]: " user_input + user_input=${user_input:-"ov"} if [[ "$user_input" == "vLLM_A770" ]]; then start_vllm_services elif [[ "$user_input" == "vLLM_B60" ]]; then diff --git a/EdgeCraftRAG/ui/docker/Dockerfile.ui b/EdgeCraftRAG/ui/docker/Dockerfile.ui index 80cda697b3..1296f51c98 100644 --- a/EdgeCraftRAG/ui/docker/Dockerfile.ui +++ b/EdgeCraftRAG/ui/docker/Dockerfile.ui @@ -1,25 +1,23 @@ -FROM node:20.18.1 AS vue-app - +FROM node:22.14.0 AS vue-app + RUN apt-get update -y && apt-get install -y git - -RUN useradd -m -s /bin/bash user && \ - mkdir -p /home/user && \ - chown -R user /home/user/ - -COPY ./ui/vue /home/user/ui - -RUN mkdir -p /home/user/ui -ENV TMPFILE_PATH=/home/user/ui - + WORKDIR /home/user/ui - + +ENV TMPFILE_PATH=/home/user/ui +ENV NODE_OPTIONS=--max-old-space-size=4096 +ENV NPM_CONFIG_AUDIT=false +ENV NPM_CONFIG_FUND=false + +COPY ./ui/vue/package*.json ./ RUN npm install - + +COPY ./ui/vue ./ RUN npm run build FROM nginx:stable-alpine -COPY --from=vue-app home/user/ui/dist /usr/share/nginx/html +COPY --from=vue-app /home/user/ui/dist /usr/share/nginx/html COPY ./ui/vue/nginx.conf /etc/nginx/nginx.conf diff --git a/EdgeCraftRAG/ui/vue/components.d.ts b/EdgeCraftRAG/ui/vue/components.d.ts index fa4a8b942a..5c31b7cc21 100644 --- a/EdgeCraftRAG/ui/vue/components.d.ts +++ b/EdgeCraftRAG/ui/vue/components.d.ts @@ -12,7 +12,6 @@ declare module 'vue' { export interface GlobalComponents { AButton: typeof import('ant-design-vue/es')['Button'] ACheckbox: typeof import('ant-design-vue/es')['Checkbox'] - ACheckboxGroup: typeof import('ant-design-vue/es')['CheckboxGroup'] ACol: typeof import('ant-design-vue/es')['Col'] ACollapse: typeof import('ant-design-vue/es')['Collapse'] ACollapsePanel: typeof import('ant-design-vue/es')['CollapsePanel'] diff --git a/EdgeCraftRAG/ui/vue/src/api/agent/index.ts b/EdgeCraftRAG/ui/vue/src/api/agent/index.ts index ee6fc29b3e..9bf55f33ac 100644 --- a/EdgeCraftRAG/ui/vue/src/api/agent/index.ts +++ b/EdgeCraftRAG/ui/vue/src/api/agent/index.ts @@ -5,14 +5,14 @@ import request from "../request"; export const getAgentList = () => { return request({ - url: "/v1/settings/agents", + url: "/v1/agents", method: "get", }); }; export const getAgentDetailByName = (name: String) => { return request({ - url: `/v1/settings/agents/${name}`, + url: `/v1/agents/${name}`, method: "get", }); }; @@ -27,7 +27,7 @@ export const getFreechatList = (params: Object) => { export const requestAgentCreate = (data: Object) => { return request({ - url: "/v1/settings/agents", + url: "/v1/agents", method: "post", data, showLoading: true, @@ -37,7 +37,7 @@ export const requestAgentCreate = (data: Object) => { }; export const requestAgentUpdate = (name: String, data: Object) => { return request({ - url: `/v1/settings/agents/${name}`, + url: `/v1/agents/${name}`, method: "patch", data, showLoading: true, @@ -48,7 +48,7 @@ export const requestAgentUpdate = (name: String, data: Object) => { export const requestAgentDelete = (name: String) => { return request({ - url: `/v1/settings/agents/${name}`, + url: `/v1/agents/${name}`, method: "delete", showLoading: true, showSuccessMsg: true, @@ -58,7 +58,7 @@ export const requestAgentDelete = (name: String) => { export const getAgentConfigs = (type: String) => { return request({ - url: `/v1/settings/agents/configs/${type}`, + url: `/v1/agents/configs/${type}`, method: "get", }); }; diff --git a/EdgeCraftRAG/ui/vue/src/api/knowledgeBase/index.ts b/EdgeCraftRAG/ui/vue/src/api/knowledgeBase/index.ts index fe300d6b33..4b3d4ceaa2 100644 --- a/EdgeCraftRAG/ui/vue/src/api/knowledgeBase/index.ts +++ b/EdgeCraftRAG/ui/vue/src/api/knowledgeBase/index.ts @@ -17,6 +17,20 @@ export const getKnowledgeBaseDetailByName = (kbName: String) => { }); }; +export const getKnowledgeBaseFilesByName = (kbName: String, params: Object) => { + return request({ + url: `/v1/knowledge/${kbName}/filemap`, + method: "get", + params, + }); +}; + +export const getKnowledgeBaseJsonByName = (kbName: String) => { + return request({ + url: `/v1/knowledge/${kbName}/json`, + method: "get", + }); +}; export const requestKnowledgeBaseCreate = (data: Object) => { return request({ url: "/v1/knowledge", @@ -136,10 +150,12 @@ export const requestExperienceRelation = (data: Object) => { }); }; -export const getkbadminList = () => { +export const getkbadminList = (params: Object) => { return request({ url: "/v1/kbadmin/kbs_list", method: "get", + params, + showLoading: true, }); }; diff --git a/EdgeCraftRAG/ui/vue/src/i18n/en.ts b/EdgeCraftRAG/ui/vue/src/i18n/en.ts index a2be5f5894..3fad3ff70c 100644 --- a/EdgeCraftRAG/ui/vue/src/i18n/en.ts +++ b/EdgeCraftRAG/ui/vue/src/i18n/en.ts @@ -8,6 +8,7 @@ export default { edit: "Edit", add: "Add", delete: "Delete", + details: "Details", active: "Activate", deactivate: "Deactivate", cancel: "Cancel", @@ -283,6 +284,7 @@ export default { notFileTip: "The knowledge base is empty. Go upload your files.", name: "Name", des: "Description", + detail: "Knowledge Base Details", activated: "Activated", nameValid1: "Please input knowledge base name", nameValid2: "Name should be between 2 and 30 characters", @@ -299,7 +301,7 @@ export default { "Experience refers to the knowledge and skills acquired through practical involvement, trial, and reflection, serving as a key foundation for solving real-world problems.", kbDes: "A Knowledge Base is a centralized repository for storing organized information such as documents, FAQs, and guides, enabling teams or users to quickly access and share knowledge.", - type: "Type", + type: "Knowledge Base Type", original: "Original", kbadmin: "kbadmin", typeValid: "Please select knowledge base type", @@ -311,6 +313,13 @@ export default { totalTip: "files", failedFile: "Failed Files", retryFailed: "Upload failed !", + general: "General", + class: "Type", + desc: { + name: "The name identifier of the knowledge base.", + type: "The type identifier of the knowledge base.", + description: "Briefly describe the purpose, content scope, or intended use of this knowledge base.", + }, }, request: { pipeline: { diff --git a/EdgeCraftRAG/ui/vue/src/i18n/zh.ts b/EdgeCraftRAG/ui/vue/src/i18n/zh.ts index 7a3fca252e..d2ddbb7d14 100644 --- a/EdgeCraftRAG/ui/vue/src/i18n/zh.ts +++ b/EdgeCraftRAG/ui/vue/src/i18n/zh.ts @@ -7,6 +7,7 @@ export default { update: "更新", edit: "编辑", add: "新增", + details: "详情", delete: "删除", active: "启用", deactivate: "停用", @@ -99,7 +100,7 @@ export default { validErr: "表单验证失败!", urlValidTip: "URL 或模型测试通过后方可继续", config: { - basic: "基础", + basic: "基本信息", nodeParser: "节点解析器", nodeParserType: "节点解析器类型", chunkSize: "分块大小", @@ -278,6 +279,7 @@ export default { notFileTip: "您还没有上传任何文件,点击“上传”按钮开始添加内容吧~", name: "名称", des: "描述", + detail: "知识库详情", activated: "激活状态", nameValid1: "请输入知识库名称", nameValid2: "请输入 2 到 30 个字符的名称", @@ -294,7 +296,7 @@ export default { "Experience是指个人或团队在实践过程中积累的知识和技能,通常通过实际操作、试错和反思获得,是解决实际问题的重要依据", kbDes: "知识库是系统化存储信息的集合,用于集中管理文档、常见问题、操作指南等知识内容,便于团队或用户快速查找和共享信息。", - type: "类型", + type: "知识库类型", original: "原始的", kbadmin: "kbadmin", typeValid: "请选择知识库类型", @@ -306,6 +308,13 @@ export default { totalTip: "个文件", failedFile: "失败文件", retryFailed: "上传失败!", + general: "通用的", + class: "类型", + desc: { + name: "知识库的名称标识", + type: "知识库的类型标识", + description: "简要说明该知识库的用途、内容范围或适用场景", + }, }, request: { pipeline: { diff --git a/EdgeCraftRAG/ui/vue/src/theme/common.less b/EdgeCraftRAG/ui/vue/src/theme/common.less index bc46197cb2..e4d9a17650 100644 --- a/EdgeCraftRAG/ui/vue/src/theme/common.less +++ b/EdgeCraftRAG/ui/vue/src/theme/common.less @@ -1,5 +1,6 @@ .card-shadow { - box-shadow: 0px 1px 2px -1px var(--bg-box-shadow), + box-shadow: + 0px 1px 2px -1px var(--bg-box-shadow), 0px 1px 3px 0px var(--bg-box-shadow); } .flex-left { @@ -96,11 +97,7 @@ } } .special-button-primary { - background: linear-gradient( - to bottom, - var(--color-primary-hover), - var(--color-primary) - ); + background: linear-gradient(to bottom, var(--color-primary-hover), var(--color-primary)); color: var(--color-white); border: none; border-radius: 20px; @@ -241,6 +238,19 @@ color: var(--font-tip-color); .ml-12; } + +:deep(.horizontal-form-item) { + // .intel-form-item-row { + // display: flex; + // flex-direction: row; + // } + // .intel-form-item-label { + // width: 100px; + // } + // .intel-form-item-control { + // flex: 1; + // } +} .loopStyle(@counter) when (@counter > 0) { .p-@{counter} { padding: (1px * @counter); diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/DetailDrawer.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/DetailDrawer.vue new file mode 100644 index 0000000000..2500d375c5 --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/DetailDrawer.vue @@ -0,0 +1,214 @@ + + + diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/KnowledgeDetail.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/KnowledgeDetail.vue index ec8aab9ee6..d737b901b0 100644 --- a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/KnowledgeDetail.vue +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/KnowledgeDetail.vue @@ -115,32 +115,37 @@ :description="$t('knowledge.notFileTip')" />
- - -
-
- - -
{{ key }}
-
-
-
- - - -
+
+
+
+ + +
{{ file.name }}
+
- - +
+ + + +
+
+
+
+ +
@@ -148,7 +153,7 @@ diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/UpdateExperienceDialog.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/UpdateExperienceDialog.vue new file mode 100644 index 0000000000..1b11542c3c --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/UpdateExperienceDialog.vue @@ -0,0 +1,175 @@ + + + + + diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/UpdateKBDialog/Activated.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/UpdateKBDialog/Activated.vue new file mode 100644 index 0000000000..3816e72e99 --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/UpdateKBDialog/Activated.vue @@ -0,0 +1,66 @@ + + + + + diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/UpdateKBDialog/Basic.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/UpdateKBDialog/Basic.vue new file mode 100644 index 0000000000..0b20f99abe --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/UpdateKBDialog/Basic.vue @@ -0,0 +1,314 @@ + + + + + diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/UpdateKBDialog/CreateDialog.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/UpdateKBDialog/CreateDialog.vue new file mode 100644 index 0000000000..268a8d17c0 --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/UpdateKBDialog/CreateDialog.vue @@ -0,0 +1,263 @@ + + + + + diff --git a/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/UpdateKBDialog/EditDialog.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/UpdateKBDialog/EditDialog.vue new file mode 100644 index 0000000000..9d804e823a --- /dev/null +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/UpdateKBDialog/EditDialog.vue @@ -0,0 +1,241 @@ + + + + + diff --git a/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/components/UpdateDialog/Indexer.vue b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/UpdateKBDialog/Indexer.vue similarity index 89% rename from EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/components/UpdateDialog/Indexer.vue rename to EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/UpdateKBDialog/Indexer.vue index ba1e4776cd..a57a63b74d 100644 --- a/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/components/UpdateDialog/Indexer.vue +++ b/EdgeCraftRAG/ui/vue/src/views/chatbot/components/KnowledgeBase/UpdateKBDialog/Indexer.vue @@ -166,7 +166,7 @@ - + - + {{ $t("common.edit") }} + + + {{ $t("common.details") }} - + + + + + + + import { - getKnowledgeBaseDetailByName, - getKnowledgeBaseList, - requestKnowledgeBaseDelete, - requestKnowledgeBaseUpdate, - } from "@/api/knowledgeBase"; - import eventBus from "@/utils/mitt"; - import { - CheckCircleFilled, - CheckOutlined, - CloseCircleFilled, - DeleteFilled, - EditFilled, - PauseCircleFilled, - PlusOutlined, - } from "@ant-design/icons-vue"; - import { Modal } from "ant-design-vue"; - import { computed, createVNode, onMounted, reactive, ref } from "vue"; - import { useI18n } from "vue-i18n"; - import { SelectTypeDialog, UpdateDialog } from "./index"; + getKnowledgeBaseDetailByName, + getKnowledgeBaseJsonByName, + getKnowledgeBaseList, + requestKnowledgeBaseDelete, + requestKnowledgeBaseUpdate, +} from "@/api/knowledgeBase"; +import eventBus from "@/utils/mitt"; +import { + CheckCircleFilled, + CheckOutlined, + CloseCircleFilled, + DeleteFilled, + EditFilled, + EyeFilled, + PauseCircleFilled, + PlusOutlined, +} from "@ant-design/icons-vue"; +import { Modal } from "ant-design-vue"; +import { computed, createVNode, onMounted, reactive, ref } from "vue"; +import { useI18n } from "vue-i18n"; +import { + CreateDialog, + DetailDrawer, + EditDialog, + SelectTypeDialog, + UpdateExperienceDialog, +} from "./index"; const emit = defineEmits(["view"]); const { t } = useI18n(); @@ -200,10 +236,21 @@ flag: "knowledge", data: {}, }); + const createDialog = reactive({ + visible: false, + }); + const editDialog = reactive({ + visible: false, + id: "", + data: {}, + }); const selectTypeDialog = reactive({ visible: false, }); - + const detailDrawer = reactive({ + visible: false, + data: {}, + }); const kbList = ref([]); const isCreated = computed(() => kbList.value.some(item => item.comp_type === "experience")); @@ -217,19 +264,36 @@ }; //create const handleCreate = (flag = "create") => { - updateDialog.type = "create"; - updateDialog.flag = flag; - updateDialog.data = {}; - updateDialog.visible = true; + if (flag === "knowledge") { + createDialog.visible = true; + } else { + updateDialog.type = "create"; + updateDialog.flag = flag; + updateDialog.data = {}; + updateDialog.visible = true; + } }; //edit const handleUpdate = async (row: EmptyObjectType) => { - const data: any = await getKnowledgeBaseDetailByName(row.name); + if (row.comp_type === "knowledge") { + const data: any = await getKnowledgeBaseJsonByName(row.name); + editDialog.id = row.idx; + editDialog.data = JSON.parse(data); + editDialog.visible = true; + } else { + const data: any = await getKnowledgeBaseDetailByName(row.name); + updateDialog.data = data; + updateDialog.type = "edit"; + updateDialog.flag = row.comp_type; + updateDialog.visible = true; + } + }; + //configuration + const handleViewConfiguration = async (row: EmptyObjectType) => { + const data: any = await getKnowledgeBaseJsonByName(row.name); - updateDialog.data = data; - updateDialog.type = "edit"; - updateDialog.flag = row.comp_type; - updateDialog.visible = true; + detailDrawer.data = JSON.parse(data); + detailDrawer.visible = true; }; //detail const handleView = async (row: EmptyObjectType) => { diff --git a/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/columnsList.ts b/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/columnsList.ts index e8e1a6e1ad..56d3010ca8 100644 --- a/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/columnsList.ts +++ b/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/columnsList.ts @@ -19,40 +19,19 @@ const getTableColumns = (t: (key: string) => string): TableColumns[] => [ ellipsis: true, visible: true, }, - { - title: t("pipeline.config.nodeParser"), - key: "parser_type", - dataIndex: ["node_parser", "parser_type"], - minWidth: 100, - visible: false, - }, - { - title: t("pipeline.config.indexer"), - key: "indexer_type", - dataIndex: ["indexer", "indexer_type"], - minWidth: 100, - visible: false, - }, - { - title: t("pipeline.config.embedding"), - key: "embedding", - dataIndex: ["indexer", "model", "model_id"], - minWidth: 180, - visible: true, - }, { title: t("pipeline.config.retriever"), key: "retriever_type", dataIndex: ["retriever", "retriever_type"], minWidth: 100, - visible: false, + visible: true, }, { title: t("pipeline.config.postProcessor"), key: "postProcessor", dataIndex: "postProcessor", minWidth: 220, - visible: false, + visible: true, }, { title: t("pipeline.config.rerank"), diff --git a/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/components/DetailDrawer.vue b/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/components/DetailDrawer.vue index b15dafee37..b29596bbb0 100644 --- a/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/components/DetailDrawer.vue +++ b/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/components/DetailDrawer.vue @@ -19,84 +19,6 @@ }}

- -
- - -
    -
  • - {{ $t("pipeline.config.nodeParser") }} - {{ formData.node_parser.parser_type }} -
  • - - -
-
-
-
- -
- - -
    -
  • - {{ $t("pipeline.config.indexerType") }} - {{ formData.indexer.indexer_type }} -
  • -
  • - {{ $t("pipeline.config.llm") }} - {{ formData.indexer.inference_type }} -
  • -
  • - {{ $t("pipeline.config.embeddingUrl") }} - {{ formData.indexer?.embedding_url }} - {{ - formData.indexer.embedding_model.api_base - }} -
  • -
  • - {{ $t("pipeline.config.embedding") }} - {{ formData.indexer.embedding_model.model_id }} -
  • -
  • - {{ $t("pipeline.config.embeddingDevice") }} - {{ formData.indexer.embedding_model.device }} -
  • -
  • - {{ $t("pipeline.config.vector_url") }} - {{ formData.indexer?.vector_url }} -
  • -
-
-
-
@@ -189,7 +111,7 @@ diff --git a/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/components/UpdateDialog/NodeParser.vue b/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/components/UpdateDialog/NodeParser.vue deleted file mode 100644 index a1ae33c207..0000000000 --- a/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/components/UpdateDialog/NodeParser.vue +++ /dev/null @@ -1,319 +0,0 @@ - - - - - diff --git a/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/components/UpdateDialog/Retriever.vue b/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/components/UpdateDialog/Retriever.vue index 7f77011cd6..6587827c3b 100644 --- a/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/components/UpdateDialog/Retriever.vue +++ b/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/components/UpdateDialog/Retriever.vue @@ -9,22 +9,16 @@ class="form-wrap" >
-
- {{ item.name }} + {{ + item.name + }}
@@ -35,11 +29,7 @@
- + diff --git a/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/components/UpdateDialog/index.ts b/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/components/UpdateDialog/index.ts index 9540289fc5..80c7a63c6c 100644 --- a/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/components/UpdateDialog/index.ts +++ b/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/components/UpdateDialog/index.ts @@ -4,9 +4,7 @@ import Activated from "./Activated.vue"; import Basic from "./Basic.vue"; import Generator from "./Generator.vue"; -import Indexer from "./Indexer.vue"; -import NodeParser from "./NodeParser.vue"; import PostProcessor from "./PostProcessor.vue"; import Retriever from "./Retriever.vue"; -export { Activated, Basic, Generator, Indexer, NodeParser, PostProcessor, Retriever }; +export { Activated, Basic, Generator, PostProcessor, Retriever }; diff --git a/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/enum.ts b/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/enum.ts index cd076672cc..8252e4ecd9 100644 --- a/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/enum.ts +++ b/EdgeCraftRAG/ui/vue/src/views/settings/components/Pipeline/enum.ts @@ -22,11 +22,6 @@ export const NodeParser = [ value: "unstructured", describe: "pipeline.desc.unstructured", }, - { - name: "kbadmin", - value: "kbadmin_parser", - describe: "pipeline.desc.kbadmin", - }, ] as const; export const Indexer = [ @@ -45,11 +40,6 @@ export const Indexer = [ value: "milvus_vector", describe: "pipeline.desc.milvusVector", }, - { - name: "kbadmin", - value: "kbadmin_indexer", - describe: "pipeline.desc.kbadmin", - }, ] as const; export const Retriever = [ { @@ -67,11 +57,6 @@ export const Retriever = [ value: "bm25", describe: "pipeline.desc.bm25", }, - { - name: "kbadmin", - value: "kbadmin_retriever", - describe: "pipeline.desc.kbadmin", - }, ] as const; export const PostProcessor = [