diff --git a/Makefile b/Makefile
index 2a38d7496..3abbae657 100644
--- a/Makefile
+++ b/Makefile
@@ -1,10 +1,9 @@
# Configuration variables
-VERSION ?= v0.5.0-alpha.29
+VERSION ?= nightly
VERSION_FILE ?= aperag/version/__init__.py
BUILDX_PLATFORM ?= linux/amd64,linux/arm64
BUILDX_ARGS ?= --sbom=false --provenance=false
REGISTRY ?= registry.cn-hangzhou.aliyuncs.com
-DOCRAY_VERSION ?= v0.1.1
# Image names
APERAG_IMAGE = apecloud/aperag
@@ -23,11 +22,11 @@ else
endif
##################################################
-# Users - Local Development and Deployment
+# Environment & Dependencies
##################################################
-# Environment setup
-.PHONY: install-uv venv install
+# Python environment setup
+.PHONY: install-uv venv install clean
install-uv:
@if [ -z "$$(which uv)" ]; then \
echo "Installing uv..."; \
@@ -44,50 +43,66 @@ install: venv
@echo "Installing Python dependencies..."
uv sync --all-groups --all-extras
-# Database management
-.PHONY: makemigration migrate
-makemigration:
- @alembic -c aperag/alembic.ini revision --autogenerate
-
-migrate:
- @alembic -c aperag/alembic.ini upgrade head
-
-# Local services
-.PHONY: run-backend run-frontend run-db run-celery run-flower
-run-backend: migrate
- uvicorn aperag.app:app --host 0.0.0.0 --reload --log-config scripts/uvicorn-log-config.yaml
+# Development environment setup
+.PHONY: dev install-hooks
+dev: install-uv venv install-addlicense install-hooks
+ @echo "Installing development tools..."
+ @command -v redocly >/dev/null || npm install @redocly/cli -g
+ @command -v openapi-generator-cli >/dev/null || npm install @openapitools/openapi-generator-cli -g
+ @command -v datamodel-codegen >/dev/null || uv tool install datamodel-code-generator
+ @echo ""
+ @echo "β
Development environment ready!"
+ @echo "π Next steps:"
+ @echo " 1. Activate virtual environment: source .venv/bin/activate"
+ @echo " 2. Install dependencies: make install"
+ @echo " 3. Start databases: make compose-infra"
+ @echo " 4. Apply migrations: make migrate"
+ @echo " 5. Run services: make run-backend, make run-celery"
-run-celery:
- celery -A config.celery worker -B -l INFO --pool=threads --concurrency=16
+install-hooks:
+ @echo "Installing git hooks..."
+ @./scripts/install-hooks.sh
-run-beat:
- celery -A config.celery beat -l INFO
+# Environment cleanup
+clean:
+ @echo "Cleaning development environment..."
+ @rm -f db.sqlite3
+ @$(MAKE) compose-down REMOVE_VOLUMES=1
-run-flower:
- celery -A config.celery flower --conf/flowerconfig.py
+##################################################
+# Database & Infrastructure
+##################################################
-run-frontend:
- cp ./frontend/deploy/env.local.template ./frontend/.env
- cd ./frontend && yarn dev
+# Database schema management
+.PHONY: makemigration migrate
+makemigration:
+ @uv run alembic -c aperag/alembic.ini revision --autogenerate
-run-db:
- @echo "Starting all database services..."
- @$(MAKE) run-redis run-postgres run-qdrant run-es run-minio
+migrate:
+ @uv run alembic -c aperag/alembic.ini upgrade head
-# Docker Compose deployment
+# Docker Compose infrastructure
# Variables for compose command based on environment flags
# Usage examples:
-# make compose-up
-# make compose-up WITH_DOCRAY=1
-# make compose-up WITH_DOCRAY=1 WITH_GPU=1
-# make compose-down
-# make compose-down REMOVE_VOLUMES=1
+# make compose-up # Full application
+# make compose-up WITH_NEO4J=1 # Full application + Neo4j
+# make compose-up WITH_DOCRAY=1 # Full application + DocRay
+# make compose-up WITH_NEO4J=1 WITH_DOCRAY=1 # Full application + Neo4j + DocRay
+# make compose-up WITH_NEO4J=1 WITH_DOCRAY=1 WITH_GPU=1 # All features
+# make compose-infra # Infrastructure only (databases)
+# make compose-infra WITH_NEO4J=1 # Infrastructure + Neo4j
+# make compose-down # Stop all services
+# make compose-down REMOVE_VOLUMES=1 # Stop and remove volumes
_PROFILES_TO_ACTIVATE :=
_EXTRA_ENVS :=
_COMPOSE_DOWN_FLAGS :=
-# Determine which docray profile to use for 'compose-up'
+# Determine which additional profiles to activate
+ifeq ($(WITH_NEO4J),1)
+ _PROFILES_TO_ACTIVATE += --profile neo4j
+endif
+
ifeq ($(WITH_DOCRAY),1)
ifeq ($(WITH_GPU),1)
_PROFILES_TO_ACTIVATE += --profile docray-gpu
@@ -103,41 +118,49 @@ ifeq ($(REMOVE_VOLUMES),1)
_COMPOSE_DOWN_FLAGS += -v
endif
-.PHONY: compose-up compose-down compose-logs
+.PHONY: compose-up compose-down compose-logs compose-infra
+# Full application startup
compose-up:
- VERSION=$(VERSION) REGISTRY=$(REGISTRY) DOCRAY_VERSION=$(DOCRAY_VERSION) $(_EXTRA_ENVS) docker-compose $(_PROFILES_TO_ACTIVATE) -f docker-compose.yml up -d
+ VERSION=v0.5.0-alpha.30 DOCRAY_VERSION=v0.1.1 $(_EXTRA_ENVS) docker-compose --profile app $(_PROFILES_TO_ACTIVATE) -f docker-compose.yml up -d
+
+# Infrastructure only (databases + supporting services)
+compose-infra:
+ VERSION=v0.5.0-alpha.30 DOCRAY_VERSION=v0.1.1 docker-compose $(_PROFILES_TO_ACTIVATE) -f docker-compose.yml up -d
compose-down:
- VERSION=$(VERSION) REGISTRY=$(REGISTRY) DOCRAY_VERSION=$(DOCRAY_VERSION) docker-compose --profile docray --profile docray-gpu -f docker-compose.yml down $(_COMPOSE_DOWN_FLAGS)
+ VERSION=v0.5.0-alpha.30 DOCRAY_VERSION=v0.1.1 docker-compose --profile app --profile docray --profile docray-gpu --profile neo4j -f docker-compose.yml down $(_COMPOSE_DOWN_FLAGS)
compose-logs:
- VERSION=$(VERSION) REGISTRY=$(REGISTRY) DOCRAY_VERSION=$(DOCRAY_VERSION) docker-compose -f docker-compose.yml logs -f
-
-# Environment cleanup
-.PHONY: clean
-clean:
- @echo "Cleaning development environment..."
- @rm -f db.sqlite3
- @docker rm -fv aperag-postgres-dev aperag-redis-dev aperag-qdrant-dev aperag-es-dev aperag-minio-dev aperag-neo4j-dev 2>/dev/null || true
- @if [ -f "nebula-docker-compose.yml" ]; then \
- echo "Stopping NebulaGraph containers..."; \
- docker-compose -f nebula-docker-compose.yml down 2>/dev/null || true; \
- fi
+ VERSION=v0.5.0-alpha.30 DOCRAY_VERSION=v0.1.1 docker-compose -f docker-compose.yml logs -f
##################################################
-# Developers - Code Quality and Tools
+# Development Services
##################################################
-# Development tools installation
-.PHONY: dev install-hooks
-dev: install-uv install-addlicense install-hooks
- @echo "Installing development tools..."
- @command -v redocly >/dev/null || npm install @redocly/cli -g
- @command -v openapi-generator-cli >/dev/null || npm install @openapitools/openapi-generator-cli -g
- @command -v datamodel-codegen >/dev/null || uv tool install datamodel-code-generator
+# Local development services
+.PHONY: run-backend run-frontend run-celery run-flower run-beat
+run-backend: migrate
+ uvicorn aperag.app:app --host 0.0.0.0 --reload --log-config scripts/uvicorn-log-config.yaml
+
+run-celery:
+ celery -A config.celery worker -B -l INFO --pool=threads --concurrency=16
+
+run-beat:
+ celery -A config.celery beat -l INFO
+
+run-flower:
+ celery -A config.celery flower --conf/flowerconfig.py
+
+run-frontend:
+ cp ./frontend/deploy/env.local.template ./frontend/.env
+ cd ./frontend && yarn dev
+
+##################################################
+# Code Quality & Testing
+##################################################
# Code quality checks
-.PHONY: format lint static-check test unit-test e2e-test
+.PHONY: format lint static-check
format:
uvx ruff check --fix ./aperag ./tests
uvx ruff format ./aperag ./tests
@@ -149,6 +172,8 @@ lint:
static-check:
uvx mypy ./aperag
+# Testing suite
+.PHONY: test unit-test e2e-test e2e-performance-test
test:
uv run pytest tests/ -v
@@ -169,14 +194,18 @@ e2e-performance-test:
--benchmark-save=benchmark-result-$$(date +%Y%m%d%H%M%S) \
tests/e2e_test/
-# Evaluation
+# RAG evaluation
.PHONY: evaluate
evaluate:
@echo "Running RAG evaluation..."
@python -m aperag.evaluation.run
-# Code generation
-.PHONY: merge-openapi generate-models generate-frontend-sdk llm_provider
+##################################################
+# Code Generation & API
+##################################################
+
+# OpenAPI and model generation
+.PHONY: merge-openapi generate-models generate-frontend-sdk
merge-openapi:
@cd aperag && redocly bundle ./api/openapi.yaml > ./api/openapi.merged.yaml
@@ -195,10 +224,12 @@ generate-models: merge-openapi
generate-frontend-sdk:
cd ./frontend && yarn api:build
+# LLM configuration generation
+.PHONY: llm_provider
llm_provider:
python ./models/generate_model_configs.py
-# Version management and licensing
+# Version management
.PHONY: version
version:
@git rev-parse HEAD | cut -c1-7 > commit_id.txt
@@ -206,56 +237,8 @@ version:
@echo "GIT_COMMIT_ID = \"$$(cat commit_id.txt)\"" >> $(VERSION_FILE)
@rm commit_id.txt
-.PHONY: add-license
-add-license: install-addlicense
- ./downloads/addlicense -c "ApeCloud, Inc." -y 2025 -l apache \
- -ignore "aperag/readers/**" \
- -ignore "aperag/vectorstore/**" \
- aperag/**/*.py
-
-.PHONY: check-license
-check-license: install-addlicense
- ./downloads/addlicense -check \
- -c "ApeCloud, Inc." -y 2025 -l apache \
- -ignore "aperag/readers/**" \
- -ignore "aperag/vectorstore/**" \
- aperag/**/*.py
-
-.PHONY: install-addlicense
-install-addlicense:
- @mkdir -p ./downloads
- @if [ ! -f ./downloads/addlicense ]; then \
- echo "Installing addlicense..."; \
- OS=$$(uname -s); \
- ARCH=$$(uname -m); \
- case $$OS in \
- Darwin) OS=macOS ;; \
- Linux) OS=Linux ;; \
- MINGW*|CYGWIN*) OS=Windows ;; \
- esac; \
- case $$ARCH in \
- x86_64) ARCH=x86_64 ;; \
- aarch64) ARCH=arm64 ;; \
- arm64) ARCH=arm64 ;; \
- esac; \
- echo "Detected platform: $$OS/$$ARCH"; \
- if [ "$$OS" = "Windows" ]; then \
- curl -L https://github.com/google/addlicense/releases/download/v1.1.1/addlicense_1.1.1_$${OS}_$${ARCH}.zip -o /tmp/addlicense.zip; \
- unzip -j /tmp/addlicense.zip -d ./downloads; \
- rm /tmp/addlicense.zip; \
- else \
- curl -L https://github.com/google/addlicense/releases/download/v1.1.1/addlicense_1.1.1_$${OS}_$${ARCH}.tar.gz | tar -xz -C ./downloads; \
- fi; \
- chmod +x ./downloads/addlicense; \
- echo "addlicense installed to ./downloads/addlicense"; \
- fi
-
-install-hooks:
- @echo "Installing git hooks..."
- @./scripts/install-hooks.sh
-
##################################################
-# Build and CI/CD
+# Build & Deploy
##################################################
# Docker builder setup
@@ -272,7 +255,7 @@ clean-builder:
docker buildx rm multi-platform; \
fi
-# Image builds - multi-platform
+# Production builds (multi-platform with registry push)
.PHONY: build build-aperag build-aperag-frontend
build: build-aperag build-aperag-frontend
@@ -287,7 +270,7 @@ build-aperag-frontend: setup-builder
--platform=$(BUILDX_PLATFORM) -f Dockerfile.prod --push \
-t $(REGISTRY)/$(APERAG_FRONTEND_IMG):$(VERSION) .
-# Image builds - local platform
+# Local builds (single platform for testing)
.PHONY: build-local build-aperag-local build-aperag-frontend-local
build-local: build-aperag-local build-aperag-frontend-local
@@ -302,109 +285,8 @@ build-aperag-frontend-local: setup-builder
--platform=$(LOCAL_PLATFORM) -f Dockerfile.prod --load \
-t $(APERAG_FRONTEND_IMG):$(VERSION) .
-##################################################
-# Utilities and Information
-##################################################
-
-# Configuration info
-.PHONY: info
-info:
- @echo "VERSION: $(VERSION)"
- @echo "DOCRAY_VERSION: $(DOCRAY_VERSION)"
- @echo "BUILDX_PLATFORM: $(BUILDX_PLATFORM)"
- @echo "LOCAL_PLATFORM: $(LOCAL_PLATFORM)"
- @echo "REGISTRY: $(REGISTRY)"
- @echo "HOST ARCH: $(UNAME_M)"
-
-# Database connection tools
-.PHONY: connect-metadb
-connect-metadb:
- @docker exec -it aperag-postgres-dev psql -p 5432 -U postgres
-
-# Individual service startup (for advanced users)
-.PHONY: run-redis run-postgres run-qdrant run-es run-minio run-neo4j run-nebula stop-nebula
-run-redis:
- @docker inspect aperag-redis-dev >/dev/null 2>&1 || docker run -d --name aperag-redis-dev -p 6379:6379 redis:latest
- @docker start aperag-redis-dev
-
-run-postgres:
- @docker inspect aperag-postgres-dev >/dev/null 2>&1 || \
- docker run -d --name aperag-postgres-dev -p 5432:5432 -e POSTGRES_PASSWORD=postgres pgvector/pgvector:pg16
- @docker start aperag-postgres-dev
- @sleep 3
- @docker exec aperag-postgres-dev psql -U postgres -c "CREATE EXTENSION IF NOT EXISTS vector;" 2>/dev/null || true
-
-run-qdrant:
- @docker inspect aperag-qdrant-dev >/dev/null 2>&1 || docker run -d --name aperag-qdrant-dev -p 6333:6333 qdrant/qdrant
- @docker start aperag-qdrant-dev
-
-run-es:
- @echo "Starting Elasticsearch (development mode)"
- @docker inspect aperag-es-dev > /dev/null 2>&1 || \
- docker run -d \
- --name aperag-es-dev \
- -p 9200:9200 \
- -e discovery.type=single-node \
- -e ES_JAVA_OPTS="-Xms1g -Xmx1g" \
- -e xpack.security.enabled=false \
- -v esdata:/usr/share/elasticsearch/data \
- apecloud/elasticsearch:8.8.2
- @docker start aperag-es-dev || true
- @echo "Checking if IK Analyzer is installed..."
- @docker exec aperag-es-dev bash -c \
- "if [ ! -d plugins/analysis-ik ]; then \
- echo 'Installing IK Analyzer from get.infini.cloud...'; \
- curl -L --output /tmp/analysis-ik.zip https://get.infini.cloud/elasticsearch/analysis-ik/8.8.2; \
- echo 'y' | bin/elasticsearch-plugin install file:///tmp/analysis-ik.zip; \
- echo 'Restarting Elasticsearch to apply changes...'; \
- else \
- echo 'IK Analyzer is already installed.'; \
- fi"
- @docker restart aperag-es-dev > /dev/null
- @echo "Elasticsearch is ready with IK Analyzer!"
-
-run-minio:
- @docker inspect aperag-minio-dev >/dev/null 2>&1 || \
- docker run -d --name aperag-minio-dev -p 9000:9000 -p 9001:9001 \
- quay.io/minio/minio server /data --console-address ":9001"
- @docker start aperag-minio-dev
-
-run-neo4j:
- @docker inspect aperag-neo4j-dev >/dev/null 2>&1 || \
- docker run -d --name aperag-neo4j-dev -p 7474:7474 -p 7687:7687 \
- -e NEO4J_AUTH=neo4j/password \
- -e NEO4J_PLUGINS=\[\"apoc\"\] \
- -e NEO4J_ACCEPT_LICENSE_AGREEMENT=yes \
- -e NEO4J_apoc_export_file_enabled=true \
- neo4j:5.26.5-enterprise
- @docker start aperag-neo4j-dev
-
-run-nebula:
- @echo "Setting up NebulaGraph with docker-compose (no persistence)..."
- @TZ=UTC docker-compose -f nebula-docker-compose.yml up -d
- @echo "NebulaGraph is starting up..."
- @echo ""
- @echo "β
Graph service available at: localhost:9669"
- @echo ""
- @echo "π Studio Web UI: http://localhost:7001"
- @echo " π Connection Info:"
- @echo " β’ Graphd IP address: graphd"
- @echo " β’ Port: 9669"
- @echo " β’ Username: root"
- @echo " β’ Password: nebula (or any password)"
- @echo ""
- @echo "π» Console: docker run --rm -ti --network host vesoft/nebula-console:nightly -addr 127.0.0.1 -port 9669 -u root -p nebula"
- @echo ""
- @echo "π Check status: docker-compose -f nebula-docker-compose.yml ps"
- @echo "π Stop: make stop-nebula"
-
-stop-nebula:
- @echo "Stopping NebulaGraph..."
- @docker-compose -f nebula-docker-compose.yml down
- @echo "NebulaGraph stopped."
-
-
-.PHONY: load-images-to-minikube
+# Kubernetes deployment helpers
+.PHONY: load-images-to-minikube load-images-to-kind
load-images-to-minikube:
@echo "Start To Load Image To Minikube"
docker save $(APERAG_IMAGE):$(VERSION) -o aperag.tar
@@ -415,15 +297,64 @@ load-images-to-minikube:
rm aperag-frontend.tar
@echo "Already Load Image To Minikube"
-.PHONY: load-images-to-kind
load-images-to-kind:
@echo "Start To Load Image To KinD"
kind load docker-image $(APERAG_IMAGE):$(VERSION) --name $(KIND_CLUSTER_NAME)
kind load docker-image $(APERAG_FRONTEND_IMG):$(VERSION) --name $(KIND_CLUSTER_NAME)
@echo "Already Load Image To KinD"
-# Compatibility aliases
-.PHONY: image celery flower
-image: build
-celery: run-celery
-flower: run-flower
+##################################################
+# Utilities & Tools
+##################################################
+
+# System information
+.PHONY: info
+info:
+ @echo "VERSION: $(VERSION)"
+ @echo "BUILDX_PLATFORM: $(BUILDX_PLATFORM)"
+ @echo "LOCAL_PLATFORM: $(LOCAL_PLATFORM)"
+ @echo "REGISTRY: $(REGISTRY)"
+ @echo "HOST ARCH: $(UNAME_M)"
+
+# License management
+.PHONY: add-license check-license install-addlicense
+add-license: install-addlicense
+ ./downloads/addlicense -c "ApeCloud, Inc." -y 2025 -l apache \
+ -ignore "aperag/readers/**" \
+ -ignore "aperag/vectorstore/**" \
+ aperag/**/*.py
+
+check-license: install-addlicense
+ ./downloads/addlicense -check \
+ -c "ApeCloud, Inc." -y 2025 -l apache \
+ -ignore "aperag/readers/**" \
+ -ignore "aperag/vectorstore/**" \
+ aperag/**/*.py
+
+install-addlicense:
+ @mkdir -p ./downloads
+ @if [ ! -f ./downloads/addlicense ]; then \
+ echo "Installing addlicense..."; \
+ OS=$$(uname -s); \
+ ARCH=$$(uname -m); \
+ case $$OS in \
+ Darwin) OS=macOS ;; \
+ Linux) OS=Linux ;; \
+ MINGW*|CYGWIN*) OS=Windows ;; \
+ esac; \
+ case $$ARCH in \
+ x86_64) ARCH=x86_64 ;; \
+ aarch64) ARCH=arm64 ;; \
+ arm64) ARCH=arm64 ;; \
+ esac; \
+ echo "Detected platform: $$OS/$$ARCH"; \
+ if [ "$$OS" = "Windows" ]; then \
+ curl -L https://github.com/google/addlicense/releases/download/v1.1.1/addlicense_1.1.1_$${OS}_$${ARCH}.zip -o /tmp/addlicense.zip; \
+ unzip -j /tmp/addlicense.zip -d ./downloads; \
+ rm /tmp/addlicense.zip; \
+ else \
+ curl -L https://github.com/google/addlicense/releases/download/v1.1.1/addlicense_1.1.1_$${OS}_$${ARCH}.tar.gz | tar -xz -C ./downloads; \
+ fi; \
+ chmod +x ./downloads/addlicense; \
+ echo "addlicense installed to ./downloads/addlicense"; \
+ fi
diff --git a/README.md b/README.md
index f32a5fd05..272effff2 100644
--- a/README.md
+++ b/README.md
@@ -1,246 +1,174 @@
# ApeRAG
-[ι
θ―»δΈζζζ‘£](./README_zh.md)
-
-
-
-## Table of Contents
-
-- [Getting Started](#getting-started)
- - [Getting Started with Kubernetes (Recommend for Production)](#getting-started-with-kubernetes)
- - [Getting Started with Source Code](#getting-started-with-source-code)
- - [Getting Started with Docker Compose](#getting-started-with-docker-compose)
+- [Quick Start](#quick-start)
+- [Key Features](#key-features)
+- [Kubernetes Deployment (Recommended for Production)](#kubernetes-deployment-recommended-for-production)
- [Development](./docs/development-guide.md)
- [Build Docker Image](./docs/build-docker-image.md)
- [Acknowledgments](#acknowledgments)
- [License](#license)
-ApeRAG is a production-ready, comprehensive RAG (Retrieval-Augmented Generation) platform designed for building advanced, enterprise-grade AI applications. It empowers developers to create sophisticated **Agentic RAG** systems with a powerful, hybrid retrieval engine.
+ApeRAG is a production-ready RAG (Retrieval-Augmented Generation) platform that combines Graph RAG, vector search, and full-text search. Build sophisticated AI applications with hybrid retrieval, multimodal document processing, and enterprise-grade management features.
+
+## Quick Start
+
+> Before installing ApeRAG, make sure your machine meets the following minimum system requirements:
+>
+> - CPU >= 2 Core
+> - RAM >= 4 GiB
+> - Docker & Docker Compose
+
+The easiest way to start ApeRAG is through Docker Compose. Before running the following commands, make sure that [Docker](https://docs.docker.com/get-docker/) and [Docker Compose](https://docs.docker.com/compose/install/) are installed on your machine:
+
+```bash
+git clone https://github.com/apecloud/ApeRAG.git
+cd ApeRAG
+cp envs/env.template .env
+cp frontend/deploy/env.local.template frontend/.env
+make compose-up
+```
+
+After running, you can access ApeRAG in your browser at:
+- **Web Interface**: http://localhost:3000/web/
+- **API Documentation**: http://localhost:8000/docs
-Key features include:
+#### Enhanced Document Parsing
-* **Advanced Hybrid Retrieval**: Go beyond simple vector search. ApeRAG integrates three powerful indexing strategies:
- * **Vector Index**: For semantic similarity search.
- * **Full-Text Index**: For precise keyword-based retrieval.
- * **Graph Knowledge Index**: Powered by an integrated and enhanced version of **[LightRAG](https://github.com/HKUDS/LightRAG)**, enabling deep relational and contextual queries.
+For enhanced document parsing capabilities, ApeRAG supports an **advanced document parsing service** powered by MinerU, which provides superior parsing for complex documents, tables, and formulas.
-* **Multimodal Document Processing**: Ingest and understand a wide array of document formats, extracting not just text but also tables, images, and complex structures from files like PDFs and DOCX.
+
+Enhanced Document Parsing Commands
-* **Enterprise-Grade Management**: ApeRAG is built for production environments with a suite of essential features:
- * **Audit Logging**: Track all critical system and user activities.
- * **LLM Model Management**: Easily configure and switch between various Large Language Models.
- * **Graph Visualization**: Visually explore and understand the knowledge graph.
- * **Comprehensive Document Management**: A user-friendly interface to manage document collections, track processing status, and inspect content.
+```bash
+# Enable advanced document parsing service
+make compose-up WITH_DOCRAY=1
-## Getting Started
+# Enable advanced parsing with GPU acceleration (recommended)
+make compose-up WITH_DOCRAY=1 WITH_GPU=1
+```
-This section will guide you through setting up ApeRAG using different methods.
+
-### Getting Started with Kubernetes
+#### Development & Contributing
-This guide covers deploying ApeRAG to Kubernetes using the provided Helm chart. It involves two main phases: setting up databases (optional if you have them) and deploying the ApeRAG application.
+For developers interested in source code development, advanced configurations, or contributing to ApeRAG, please refer to our [Development Guide](./docs/development-guide.md) for detailed setup instructions.
-**Phase 1: Deploy Databases with KubeBlocks (Optional)**
+## Key Features
-ApeRAG needs PostgreSQL, Redis, Qdrant, and Elasticsearch. If you don't have these, use the KubeBlocks scripts in `deploy/databases/`.
+**1. Hybrid Retrieval Engine**:
+Combines Graph RAG, vector search, and full-text search for comprehensive document understanding and retrieval.
-*Skip this phase if your databases are already available in your Kubernetes cluster.*
+**2. Graph RAG with LightRAG**:
+Enhanced version of LightRAG for advanced graph-based knowledge extraction, enabling deep relational and contextual queries.
-1. **Prerequisites**:
- * Kubernetes cluster.
- * `kubectl` configured.
- * Helm v3+.
+**3. MinerU Integration**:
+Advanced document parsing service powered by MinerU technology, providing superior parsing for complex documents, tables, formulas, and scientific content with optional GPU acceleration.
-2. **Database Configuration (`deploy/databases/00-config.sh`)**:
- This script controls database deployment (defaults: PostgreSQL, Redis, Qdrant, Elasticsearch in the `default` namespace). **Defaults are usually fine; no changes needed for a standard setup.** Edit only for advanced cases (e.g., changing namespace, enabling optional databases like Neo4j).
+**4. Production-Grade Deployment**:
+Full Kubernetes support with Helm charts and KubeBlocks integration for simplified deployment of production-grade databases (PostgreSQL, Redis, Qdrant, Elasticsearch, Neo4j).
-3. **Run Database Deployment Scripts**:
- ```bash
- cd deploy/databases/
- bash ./01-prepare.sh # Prepares KubeBlocks environment.
- bash ./02-install-database.sh # Deploys database clusters.
- cd ../.. # Back to project root.
- ```
- Monitor pods in the `default` namespace (or your custom one) until ready:
- ```bash
- kubectl get pods -n default
- ```
+**5. Multimodal Document Processing**:
+Supports various document formats (PDF, DOCX, etc.) with intelligent content extraction and structure recognition.
-**Phase 2: Deploy ApeRAG Application**
+**6. Enterprise Management**:
+Built-in audit logging, LLM model management, graph visualization, and comprehensive document management interface.
-With databases running:
+**7. Developer Friendly**:
+FastAPI backend, React frontend, async task processing with Celery, extensive testing, and comprehensive development guides for easy contribution and customization.
-1. **Helm Chart Configuration (`deploy/aperag/values.yaml`)**:
- * **Using KubeBlocks (Phase 1 in `default` namespace)?** Database connections in `values.yaml` are likely pre-configured. **No changes usually needed.**
- * **Using your own databases?** You MUST update `values.yaml` with your database connection details.
- * By default, this Helm chart deploys the [`doc-ray`](https://github.com/apecloud/doc-ray) service for advanced document parsing, which requires at least 4 CPU cores and 8GB of memory. If your Kubernetes cluster has insufficient resources, you can disable the `doc-ray` deployment by setting `docray.enabled` to `false`. In this case, a basic document parser will be used.
- * Optionally, review other settings (images, resources, Ingress, etc.).
+## Kubernetes Deployment (Recommended for Production)
-2. **Deploy ApeRAG with Helm**:
- This installs ApeRAG to the `default` namespace:
- ```bash
- helm install aperag ./deploy/aperag --namespace default --create-namespace
- ```
- Monitor ApeRAG pods until `Running`:
- ```bash
- kubectl get pods -n default -l app.kubernetes.io/instance=aperag
- ```
+> **Enterprise-grade deployment with high availability and scalability**
-3. **Access ApeRAG UI**:
- Use `kubectl port-forward` for quick access:
- ```bash
- kubectl port-forward svc/aperag-frontend 3000:3000 -n default
- ```
- Open `http://localhost:3000` in your browser.
+Deploy ApeRAG to Kubernetes using our provided Helm chart. This approach offers high availability, scalability, and production-grade management capabilities.
-For KubeBlocks details (credentials, uninstall), see `deploy/databases/README.md`.
+### Prerequisites
-### Getting Started with Source Code
+* [Kubernetes cluster](https://kubernetes.io/docs/setup/) (v1.20+)
+* [`kubectl`](https://kubernetes.io/docs/tasks/tools/) configured and connected to your cluster
+* [Helm v3+](https://helm.sh/docs/intro/install/) installed
-This guide is for developers looking to contribute to ApeRAG or run it locally for development. Follow these steps to get ApeRAG running from the source code:
+### Clone the Repository
-**1. Clone the Repository**
+First, clone the ApeRAG repository to get the deployment files:
-First, get the source code:
```bash
git clone https://github.com/apecloud/ApeRAG.git
cd ApeRAG
```
-**2. System Prerequisites**
-
-Before you begin, ensure your system has:
+### Step 1: Deploy Database Services
-* **Python 3.11**: The project uses Python 3.11. If it's not your system default, `uv` (see below) will attempt to use it when creating the virtual environment if available.
-* **Node.js**: Version 20 or higher is recommended for frontend development.
-* **`uv`**: This is a fast Python package installer and virtual environment manager.
- * If you don't have `uv`, the `make install` command (Step 3) will try to install it via `pip`.
-* **Docker**: (Recommended for local databases) If you plan to run dependent services like PostgreSQL, Redis, etc., locally, Docker is the easiest way. The `make run-db` command uses Docker Compose.
+ApeRAG requires PostgreSQL, Redis, Qdrant, and Elasticsearch. You have two options:
-**3. Install Dependencies & Setup Virtual Environment**
+**Option A: Use existing databases** - If you already have these databases running in your cluster, edit `deploy/aperag/values.yaml` to configure your database connection details, then skip to Step 2.
-This crucial `make` command automates several setup tasks:
+**Option B: Deploy databases with KubeBlocks** - Use our automated database deployment (database connections are pre-configured):
```bash
-make install
+# Navigate to database deployment scripts
+cd deploy/databases/
+
+# (Optional) Review configuration - defaults work for most cases
+# edit 00-config.sh
+
+# Install KubeBlocks and deploy databases
+bash ./01-prepare.sh # Installs KubeBlocks
+bash ./02-install-database.sh # Deploys PostgreSQL, Redis, Qdrant, Elasticsearch
+
+# Monitor database deployment
+kubectl get pods -n default
+
+# Return to project root for Step 2
+cd ../../
```
-This command will:
-* Verify or install `uv`.
-* Create a Python 3.11 virtual environment (located in `.venv/`) using `uv`.
-* Install all Python backend dependencies (including development tools) from `pyproject.toml` into the virtual environment.
-* Install frontend Node.js dependencies using `yarn`.
+Wait for all database pods to be in `Running` status before proceeding.
+
+### Step 2: Deploy ApeRAG Application
-**4. Configure Environment Variables**
+```bash
+# If you deployed databases with KubeBlocks in Step 1, database connections are pre-configured
+# If you're using existing databases, edit deploy/aperag/values.yaml with your connection details
-ApeRAG uses `.env` files for configuration.
+# Deploy ApeRAG
+helm install aperag ./deploy/aperag --namespace default --create-namespace
-* **Backend (`.env`)**: Copy the template and customize it for your setup.
- ```bash
- cp envs/env.template .env
- ```
- Then, edit the newly created `.env` file.
+# Monitor ApeRAG deployment
+kubectl get pods -n default -l app.kubernetes.io/instance=aperag
+```
- **Note**: If you start the required database services using the `make run-db` command (see Step 5), the default connection settings in the `.env` file (copied from `envs/env.template`) are pre-configured to work with these services, and you typically won't need to change them. You would only need to modify these if you are connecting to externally managed databases or have custom configurations.
+### Configuration Options
-* **Frontend (`frontend/.env`)** (Optional - if you are developing the frontend):
- ```bash
- cp frontend/deploy/env.local.template frontend/.env
- ```
- Edit `frontend/.env` if you need to change frontend-specific settings, such as the backend API URL (though defaults usually work for local development).
+**Resource Requirements**: By default, includes [`doc-ray`](https://github.com/apecloud/doc-ray) service (requires 4+ CPU cores, 8GB+ RAM). To disable: set `docray.enabled: false` in `values.yaml`.
-**5. Start Databases & Apply Migrations**
+**Advanced Settings**: Review `values.yaml` for additional configuration options including images, resources, and Ingress settings.
-* **Start Database Services**:
- If you're using Docker for local databases, the `Makefile` provides a convenient command:
- ```bash
- make run-db
- ```
+### Access Your Deployment
-* **Apply Database Migrations**:
- Once your databases are running and configured in `.env`, set up the database schema:
- ```bash
- make migrate
- ```
+Once deployed, access ApeRAG using port forwarding:
-**6. Run ApeRAG Backend Services**
+```bash
+# Forward ports for quick access
+kubectl port-forward svc/aperag-frontend 3000:3000 -n default
+kubectl port-forward svc/aperag-api 8000:8000 -n default
-These should typically be run in separate terminal windows/tabs. The `make` commands will automatically use the correct Python virtual environment.
+# Access in browser
+# Web Interface: http://localhost:3000
+# API Documentation: http://localhost:8000/docs
+```
-* **FastAPI Development Server**:
- ```bash
- make run-backend
- ```
- This starts the main backend application. It will typically be accessible at `http://localhost:8000` and features auto-reload on code changes.
+For production environments, configure Ingress in `values.yaml` for external access.
-* **Celery Worker & Beat**:
- ```bash
- make run-celery
- ```
- This starts the Celery worker for processing asynchronous background tasks.
+### Troubleshooting
-**7. Run Frontend Development Server (Optional)**
+**Database Issues**: See `deploy/databases/README.md` for KubeBlocks management, credentials, and uninstall procedures.
-If you need to work on or view the frontend:
+**Pod Status**: Check pod logs for any deployment issues:
```bash
-make run-frontend
+kubectl logs -f deployment/aperag-api -n default
+kubectl logs -f deployment/aperag-frontend -n default
```
-This will start the frontend development server, usually available at `http://localhost:3000`. It's configured to proxy API requests to the backend running on port 8000.
-
-**8. Access ApeRAG**
-
-With the backend (and optionally frontend) services running:
-* Access the **Frontend UI** at `http://localhost:3000` (if started).
-* The **Backend API** is available at `http://localhost:8000`.
-
-Now you have ApeRAG running locally from the source code, ready for development or testing!
-
-For detailed development workflows, see the [Development Guide](./docs/DEVELOPMENT.md).
-
-### Getting Started with Docker Compose
-
-To get started with ApeRAG using Docker Compose, follow these steps:
-
-1. **Prerequisites**:
- * Docker & Docker Compose
- * Git
-
-2. **Environment Setup**:
- Configure environment variables by copying the template files:
- ```bash
- cp envs/env.template .env
- cp frontend/deploy/env.local.template frontend/.env
- ```
- Then, **edit the `.env` file** to configure your AI service settings and other necessary configurations according to your needs.
-
-3. **Start Services**:
- You can start all ApeRAG services using the following `make` command:
- ```bash
- # Optional: Use Aliyun registry if in China
- # export REGISTRY=apecloud-registry.cn-zhangjiakou.cr.aliyuncs.com
-
- # Start ApeRAG services
- make compose-up
- ```
- If you need to use the `doc-ray` service for advanced document parsing (recommended for complex documents, tables, or formulas), you can start it along with other services:
- ```bash
- make compose-up WITH_DOCRAY=1
- ```
- If your environment has GPUs, you can enable GPU support for `doc-ray` for better performance:
- ```bash
- make compose-up WITH_DOCRAY=1 WITH_GPU=1
- ```
- > **About the doc-ray parsing service**
- >
- > ApeRAG includes a basic built-in parser for extracting text from documents like PDFs and DOCX files for RAG indexing. However, this parser may not optimally handle complex document structures, tables, or formulas.
- >
- > For enhanced document parsing capabilities and more accurate content extraction, we recommend deploying the [doc-ray](https://github.com/apecloud/doc-ray) service. `doc-ray` leverages **MinerU** for advanced document analysis.
- >
- > * When `WITH_GPU=1` is not specified, `doc-ray` will run using only the CPU. In this case, it is recommended to allocate at least 4 CPU cores and 8GB+ of RAM for it.
- > * When `WITH_GPU=1` is specified, `doc-ray` will run using the GPU. It requires approximately 6GB of VRAM, along with 2 CPU cores and 8GB of RAM.
-
-4. **Access ApeRAG**:
- Once the services are up and running, open your browser and navigate to: http://localhost:3000/web/
## Acknowledgments
diff --git a/aperag/migration/sql/extensions_init.sql b/aperag/migration/sql/extensions_init.sql
new file mode 100644
index 000000000..199fd123e
--- /dev/null
+++ b/aperag/migration/sql/extensions_init.sql
@@ -0,0 +1,15 @@
+-- PostgreSQL Extensions Initialization
+-- This script creates necessary extensions for ApeRAG
+-- Extensions must be created before schema tables that use them
+
+-- Create pgvector extension for vector operations
+-- Used by LightRAG tables: lightrag_doc_chunks, lightrag_vdb_entity, lightrag_vdb_relation
+CREATE EXTENSION IF NOT EXISTS vector;
+
+-- Optional: Create other useful extensions
+-- Uncomment as needed based on project requirements
+
+-- CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; -- For UUID generation
+-- CREATE EXTENSION IF NOT EXISTS pg_trgm; -- For trigram text search
+-- CREATE EXTENSION IF NOT EXISTS btree_gin; -- For GIN indexes on btree data
+-- CREATE EXTENSION IF NOT EXISTS btree_gist; -- For GIST indexes on btree data
\ No newline at end of file
diff --git a/aperag/migration/versions/20250630123213-495840dd6ff9.py b/aperag/migration/versions/20250630123213-495840dd6ff9.py
deleted file mode 100644
index 9110c266b..000000000
--- a/aperag/migration/versions/20250630123213-495840dd6ff9.py
+++ /dev/null
@@ -1,107 +0,0 @@
-"""empty message
-
-Revision ID: 495840dd6ff9
-Revises: 66b96592c84a
-Create Date: 2025-06-30 12:32:13.869581
-
-"""
-from typing import Sequence, Union
-
-from alembic import op
-import sqlalchemy as sa
-from sqlalchemy.dialects import postgresql
-
-# revision identifiers, used by Alembic.
-revision: str = '495840dd6ff9'
-down_revision: Union[str, None] = '66b96592c84a'
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
-
-
-def upgrade() -> None:
- """Upgrade schema."""
- # ### commands auto generated by Alembic - please adjust! ###
- op.create_table('lightrag_graph_edges',
- sa.Column('id', sa.BigInteger(), autoincrement=True, nullable=False),
- sa.Column('source_entity_id', sa.String(length=255), nullable=False),
- sa.Column('target_entity_id', sa.String(length=255), nullable=False),
- sa.Column('weight', sa.Numeric(precision=10, scale=6), nullable=False),
- sa.Column('keywords', sa.Text(), nullable=True),
- sa.Column('description', sa.Text(), nullable=True),
- sa.Column('source_id', sa.Text(), nullable=True),
- sa.Column('file_path', sa.Text(), nullable=True),
- sa.Column('workspace', sa.String(length=255), nullable=False),
- sa.Column('createtime', sa.DateTime(timezone=True), nullable=False),
- sa.Column('updatetime', sa.DateTime(timezone=True), nullable=False),
- sa.PrimaryKeyConstraint('id'),
- sa.UniqueConstraint('workspace', 'source_entity_id', 'target_entity_id', name='uq_lightrag_graph_edges_workspace_source_target')
- )
- op.create_index('idx_lightrag_edges_weight', 'lightrag_graph_edges', ['workspace', 'weight'], unique=False)
- op.create_index('idx_lightrag_edges_workspace_source', 'lightrag_graph_edges', ['workspace', 'source_entity_id'], unique=False)
- op.create_index('idx_lightrag_edges_workspace_target', 'lightrag_graph_edges', ['workspace', 'target_entity_id'], unique=False)
- op.create_table('lightrag_graph_nodes',
- sa.Column('id', sa.BigInteger(), autoincrement=True, nullable=False),
- sa.Column('entity_id', sa.String(length=256), nullable=False),
- sa.Column('entity_name', sa.String(length=255), nullable=True),
- sa.Column('entity_type', sa.String(length=255), nullable=True),
- sa.Column('description', sa.Text(), nullable=True),
- sa.Column('source_id', sa.Text(), nullable=True),
- sa.Column('file_path', sa.Text(), nullable=True),
- sa.Column('workspace', sa.String(length=255), nullable=False),
- sa.Column('createtime', sa.DateTime(timezone=True), nullable=False),
- sa.Column('updatetime', sa.DateTime(timezone=True), nullable=False),
- sa.PrimaryKeyConstraint('id'),
- sa.UniqueConstraint('workspace', 'entity_id', name='uq_lightrag_graph_nodes_workspace_entity')
- )
- op.create_index('idx_lightrag_nodes_entity_name', 'lightrag_graph_nodes', ['workspace', 'entity_name'], unique=False)
- op.create_index('idx_lightrag_nodes_entity_type', 'lightrag_graph_nodes', ['workspace', 'entity_type'], unique=False)
- op.drop_table('lightrag_doc_status')
- op.drop_table('lightrag_doc_full')
- op.drop_table('lightrag_llm_cache')
- # ### end Alembic commands ###
-
-
-def downgrade() -> None:
- """Downgrade schema."""
- # ### commands auto generated by Alembic - please adjust! ###
- op.create_table('lightrag_llm_cache',
- sa.Column('workspace', sa.VARCHAR(length=255), autoincrement=False, nullable=False),
- sa.Column('id', sa.VARCHAR(length=255), autoincrement=False, nullable=False),
- sa.Column('mode', sa.VARCHAR(length=32), autoincrement=False, nullable=False),
- sa.Column('original_prompt', sa.TEXT(), autoincrement=False, nullable=True),
- sa.Column('return_value', sa.TEXT(), autoincrement=False, nullable=True),
- sa.Column('create_time', postgresql.TIMESTAMP(timezone=True), autoincrement=False, nullable=False),
- sa.Column('update_time', postgresql.TIMESTAMP(timezone=True), autoincrement=False, nullable=True),
- sa.PrimaryKeyConstraint('workspace', 'id', 'mode', name=op.f('lightrag_llm_cache_pkey'))
- )
- op.create_table('lightrag_doc_full',
- sa.Column('id', sa.VARCHAR(length=255), autoincrement=False, nullable=False),
- sa.Column('workspace', sa.VARCHAR(length=255), autoincrement=False, nullable=False),
- sa.Column('doc_name', sa.VARCHAR(length=1024), autoincrement=False, nullable=True),
- sa.Column('content', sa.TEXT(), autoincrement=False, nullable=True),
- sa.Column('meta', postgresql.JSON(astext_type=sa.Text()), autoincrement=False, nullable=True),
- sa.Column('create_time', postgresql.TIMESTAMP(timezone=True), autoincrement=False, nullable=False),
- sa.Column('update_time', postgresql.TIMESTAMP(timezone=True), autoincrement=False, nullable=False),
- sa.PrimaryKeyConstraint('id', 'workspace', name=op.f('lightrag_doc_full_pkey'))
- )
- op.create_table('lightrag_doc_status',
- sa.Column('workspace', sa.VARCHAR(length=255), autoincrement=False, nullable=False),
- sa.Column('id', sa.VARCHAR(length=255), autoincrement=False, nullable=False),
- sa.Column('content', sa.TEXT(), autoincrement=False, nullable=True),
- sa.Column('content_summary', sa.VARCHAR(length=255), autoincrement=False, nullable=True),
- sa.Column('content_length', sa.INTEGER(), autoincrement=False, nullable=True),
- sa.Column('chunks_count', sa.INTEGER(), autoincrement=False, nullable=True),
- sa.Column('status', postgresql.ENUM('pending', 'processing', 'processed', 'failed', name='lightragdocstatus'), autoincrement=False, nullable=True),
- sa.Column('file_path', sa.VARCHAR(length=512), autoincrement=False, nullable=True),
- sa.Column('created_at', postgresql.TIMESTAMP(timezone=True), autoincrement=False, nullable=False),
- sa.Column('updated_at', postgresql.TIMESTAMP(timezone=True), autoincrement=False, nullable=False),
- sa.PrimaryKeyConstraint('workspace', 'id', name=op.f('uq_lightrag_doc_status_workspace_id'))
- )
- op.drop_index('idx_lightrag_nodes_entity_type', table_name='lightrag_graph_nodes')
- op.drop_index('idx_lightrag_nodes_entity_name', table_name='lightrag_graph_nodes')
- op.drop_table('lightrag_graph_nodes')
- op.drop_index('idx_lightrag_edges_workspace_target', table_name='lightrag_graph_edges')
- op.drop_index('idx_lightrag_edges_workspace_source', table_name='lightrag_graph_edges')
- op.drop_index('idx_lightrag_edges_weight', table_name='lightrag_graph_edges')
- op.drop_table('lightrag_graph_edges')
- # ### end Alembic commands ###
diff --git a/aperag/migration/versions/20250630151946-97d64d3fe985.py b/aperag/migration/versions/20250630151946-97d64d3fe985.py
deleted file mode 100644
index 18589926c..000000000
--- a/aperag/migration/versions/20250630151946-97d64d3fe985.py
+++ /dev/null
@@ -1,46 +0,0 @@
-"""empty message
-
-Revision ID: 97d64d3fe985
-Revises: 495840dd6ff9
-Create Date: 2025-06-30 15:19:46.211896
-
-"""
-from typing import Sequence, Union
-
-from alembic import op
-import sqlalchemy as sa
-
-
-# revision identifiers, used by Alembic.
-revision: str = '97d64d3fe985'
-down_revision: Union[str, None] = '495840dd6ff9'
-branch_labels: Union[str, Sequence[str], None] = None
-depends_on: Union[str, Sequence[str], None] = None
-
-
-def upgrade() -> None:
- """Upgrade schema."""
- # ### commands auto generated by Alembic - please adjust! ###
- op.create_index('idx_lightrag_edges_degree_calc', 'lightrag_graph_edges', ['workspace', 'source_entity_id', 'target_entity_id', 'weight'], unique=False)
- op.create_index('idx_lightrag_edges_metadata', 'lightrag_graph_edges', ['workspace', 'source_entity_id', 'target_entity_id', 'weight', 'keywords'], unique=False)
- op.create_index('idx_lightrag_edges_workspace_createtime', 'lightrag_graph_edges', ['workspace', 'createtime'], unique=False)
- op.create_index('idx_lightrag_edges_workspace_source_target', 'lightrag_graph_edges', ['workspace', 'source_entity_id', 'target_entity_id'], unique=False)
- op.create_index('idx_lightrag_edges_workspace_target_source', 'lightrag_graph_edges', ['workspace', 'target_entity_id', 'source_entity_id'], unique=False)
- op.create_index('idx_lightrag_nodes_entity_type_createtime', 'lightrag_graph_nodes', ['workspace', 'entity_type', 'createtime'], unique=False)
- op.create_index('idx_lightrag_nodes_workspace_createtime', 'lightrag_graph_nodes', ['workspace', 'createtime'], unique=False)
- op.create_index('idx_lightrag_nodes_workspace_type_id', 'lightrag_graph_nodes', ['workspace', 'entity_type', 'entity_id'], unique=False)
- # ### end Alembic commands ###
-
-
-def downgrade() -> None:
- """Downgrade schema."""
- # ### commands auto generated by Alembic - please adjust! ###
- op.drop_index('idx_lightrag_nodes_workspace_type_id', table_name='lightrag_graph_nodes')
- op.drop_index('idx_lightrag_nodes_workspace_createtime', table_name='lightrag_graph_nodes')
- op.drop_index('idx_lightrag_nodes_entity_type_createtime', table_name='lightrag_graph_nodes')
- op.drop_index('idx_lightrag_edges_workspace_target_source', table_name='lightrag_graph_edges')
- op.drop_index('idx_lightrag_edges_workspace_source_target', table_name='lightrag_graph_edges')
- op.drop_index('idx_lightrag_edges_workspace_createtime', table_name='lightrag_graph_edges')
- op.drop_index('idx_lightrag_edges_metadata', table_name='lightrag_graph_edges')
- op.drop_index('idx_lightrag_edges_degree_calc', table_name='lightrag_graph_edges')
- # ### end Alembic commands ###
diff --git a/aperag/migration/versions/20250703133046-db9c88848f52.py b/aperag/migration/versions/20250703133046-db9c88848f52.py
new file mode 100644
index 000000000..613ce95fa
--- /dev/null
+++ b/aperag/migration/versions/20250703133046-db9c88848f52.py
@@ -0,0 +1,33 @@
+"""Create PostgreSQL extensions (pgvector)
+
+Revision ID: db9c88848f52
+Revises:
+Create Date: 2025-07-03 13:30:46.635272
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+
+from aperag.migration.utils import execute_sql_file
+
+
+# revision identifiers, used by Alembic.
+revision: str = 'db9c88848f52'
+down_revision: Union[str, None] = None
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+ """Create PostgreSQL extensions."""
+ # Execute extensions initialization SQL
+ execute_sql_file("extensions_init.sql")
+
+
+def downgrade() -> None:
+ """Drop PostgreSQL extensions."""
+ # Note: Dropping extensions should be done carefully in production
+ # as it may affect existing data and other databases
+ op.execute(sa.text("DROP EXTENSION IF EXISTS vector CASCADE"))
diff --git a/aperag/migration/versions/20250624132425-850b2c5dc08f.py b/aperag/migration/versions/20250703133208-0b274fcc91e2.py
similarity index 83%
rename from aperag/migration/versions/20250624132425-850b2c5dc08f.py
rename to aperag/migration/versions/20250703133208-0b274fcc91e2.py
index 14861723c..16686c9d0 100644
--- a/aperag/migration/versions/20250624132425-850b2c5dc08f.py
+++ b/aperag/migration/versions/20250703133208-0b274fcc91e2.py
@@ -1,8 +1,8 @@
"""empty message
-Revision ID: 850b2c5dc08f
-Revises:
-Create Date: 2025-06-24 13:24:25.714734
+Revision ID: 0b274fcc91e2
+Revises: db9c88848f52
+Create Date: 2025-07-03 13:32:08.830672
"""
from typing import Sequence, Union
@@ -12,8 +12,8 @@
from pgvector.sqlalchemy import Vector
# revision identifiers, used by Alembic.
-revision: str = '850b2c5dc08f'
-down_revision: Union[str, None] = None
+revision: str = '0b274fcc91e2'
+down_revision: Union[str, None] = 'db9c88848f52'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
@@ -197,40 +197,48 @@ def upgrade() -> None:
sa.Column('update_time', sa.DateTime(timezone=True), nullable=False),
sa.PrimaryKeyConstraint('id', 'workspace')
)
- op.create_table('lightrag_doc_full',
- sa.Column('id', sa.String(length=255), nullable=False),
- sa.Column('workspace', sa.String(length=255), nullable=False),
- sa.Column('doc_name', sa.String(length=1024), nullable=True),
- sa.Column('content', sa.Text(), nullable=True),
- sa.Column('meta', sa.JSON(), nullable=True),
- sa.Column('create_time', sa.DateTime(timezone=True), nullable=False),
- sa.Column('update_time', sa.DateTime(timezone=True), nullable=False),
- sa.PrimaryKeyConstraint('id', 'workspace')
- )
- op.create_table('lightrag_doc_status',
+ op.create_table('lightrag_graph_edges',
+ sa.Column('id', sa.BigInteger(), autoincrement=True, nullable=False),
+ sa.Column('source_entity_id', sa.String(length=255), nullable=False),
+ sa.Column('target_entity_id', sa.String(length=255), nullable=False),
+ sa.Column('weight', sa.Numeric(precision=10, scale=6), nullable=False),
+ sa.Column('keywords', sa.Text(), nullable=True),
+ sa.Column('description', sa.Text(), nullable=True),
+ sa.Column('source_id', sa.Text(), nullable=True),
+ sa.Column('file_path', sa.Text(), nullable=True),
sa.Column('workspace', sa.String(length=255), nullable=False),
- sa.Column('id', sa.String(length=255), nullable=False),
- sa.Column('content', sa.Text(), nullable=True),
- sa.Column('content_summary', sa.String(length=255), nullable=True),
- sa.Column('content_length', sa.Integer(), nullable=True),
- sa.Column('chunks_count', sa.Integer(), nullable=True),
- sa.Column('status', sa.Enum('pending', 'processing', 'processed', 'failed', name='lightragdocstatus'), nullable=True),
- sa.Column('file_path', sa.String(length=512), nullable=True),
- sa.Column('created_at', sa.DateTime(timezone=True), nullable=False),
- sa.Column('updated_at', sa.DateTime(timezone=True), nullable=False),
- sa.PrimaryKeyConstraint('workspace', 'id'),
- sa.UniqueConstraint('workspace', 'id', name='uq_lightrag_doc_status_workspace_id')
+ sa.Column('createtime', sa.DateTime(timezone=True), nullable=False),
+ sa.Column('updatetime', sa.DateTime(timezone=True), nullable=False),
+ sa.PrimaryKeyConstraint('id'),
+ sa.UniqueConstraint('workspace', 'source_entity_id', 'target_entity_id', name='uq_lightrag_graph_edges_workspace_source_target')
)
- op.create_table('lightrag_llm_cache',
+ op.create_index('idx_lightrag_edges_degree_calc', 'lightrag_graph_edges', ['workspace', 'source_entity_id', 'target_entity_id', 'weight'], unique=False)
+ op.create_index('idx_lightrag_edges_metadata', 'lightrag_graph_edges', ['workspace', 'source_entity_id', 'target_entity_id', 'weight', 'keywords'], unique=False)
+ op.create_index('idx_lightrag_edges_weight', 'lightrag_graph_edges', ['workspace', 'weight'], unique=False)
+ op.create_index('idx_lightrag_edges_workspace_createtime', 'lightrag_graph_edges', ['workspace', 'createtime'], unique=False)
+ op.create_index('idx_lightrag_edges_workspace_source', 'lightrag_graph_edges', ['workspace', 'source_entity_id'], unique=False)
+ op.create_index('idx_lightrag_edges_workspace_source_target', 'lightrag_graph_edges', ['workspace', 'source_entity_id', 'target_entity_id'], unique=False)
+ op.create_index('idx_lightrag_edges_workspace_target', 'lightrag_graph_edges', ['workspace', 'target_entity_id'], unique=False)
+ op.create_index('idx_lightrag_edges_workspace_target_source', 'lightrag_graph_edges', ['workspace', 'target_entity_id', 'source_entity_id'], unique=False)
+ op.create_table('lightrag_graph_nodes',
+ sa.Column('id', sa.BigInteger(), autoincrement=True, nullable=False),
+ sa.Column('entity_id', sa.String(length=256), nullable=False),
+ sa.Column('entity_name', sa.String(length=255), nullable=True),
+ sa.Column('entity_type', sa.String(length=255), nullable=True),
+ sa.Column('description', sa.Text(), nullable=True),
+ sa.Column('source_id', sa.Text(), nullable=True),
+ sa.Column('file_path', sa.Text(), nullable=True),
sa.Column('workspace', sa.String(length=255), nullable=False),
- sa.Column('id', sa.String(length=255), nullable=False),
- sa.Column('mode', sa.String(length=32), nullable=False),
- sa.Column('original_prompt', sa.Text(), nullable=True),
- sa.Column('return_value', sa.Text(), nullable=True),
- sa.Column('create_time', sa.DateTime(timezone=True), nullable=False),
- sa.Column('update_time', sa.DateTime(timezone=True), nullable=True),
- sa.PrimaryKeyConstraint('workspace', 'id', 'mode')
+ sa.Column('createtime', sa.DateTime(timezone=True), nullable=False),
+ sa.Column('updatetime', sa.DateTime(timezone=True), nullable=False),
+ sa.PrimaryKeyConstraint('id'),
+ sa.UniqueConstraint('workspace', 'entity_id', name='uq_lightrag_graph_nodes_workspace_entity')
)
+ op.create_index('idx_lightrag_nodes_entity_name', 'lightrag_graph_nodes', ['workspace', 'entity_name'], unique=False)
+ op.create_index('idx_lightrag_nodes_entity_type', 'lightrag_graph_nodes', ['workspace', 'entity_type'], unique=False)
+ op.create_index('idx_lightrag_nodes_entity_type_createtime', 'lightrag_graph_nodes', ['workspace', 'entity_type', 'createtime'], unique=False)
+ op.create_index('idx_lightrag_nodes_workspace_createtime', 'lightrag_graph_nodes', ['workspace', 'createtime'], unique=False)
+ op.create_index('idx_lightrag_nodes_workspace_type_id', 'lightrag_graph_nodes', ['workspace', 'entity_type', 'entity_id'], unique=False)
op.create_table('lightrag_vdb_entity',
sa.Column('id', sa.String(length=255), nullable=False),
sa.Column('workspace', sa.String(length=255), nullable=False),
@@ -391,9 +399,21 @@ def downgrade() -> None:
op.drop_table('llm_provider')
op.drop_table('lightrag_vdb_relation')
op.drop_table('lightrag_vdb_entity')
- op.drop_table('lightrag_llm_cache')
- op.drop_table('lightrag_doc_status')
- op.drop_table('lightrag_doc_full')
+ op.drop_index('idx_lightrag_nodes_workspace_type_id', table_name='lightrag_graph_nodes')
+ op.drop_index('idx_lightrag_nodes_workspace_createtime', table_name='lightrag_graph_nodes')
+ op.drop_index('idx_lightrag_nodes_entity_type_createtime', table_name='lightrag_graph_nodes')
+ op.drop_index('idx_lightrag_nodes_entity_type', table_name='lightrag_graph_nodes')
+ op.drop_index('idx_lightrag_nodes_entity_name', table_name='lightrag_graph_nodes')
+ op.drop_table('lightrag_graph_nodes')
+ op.drop_index('idx_lightrag_edges_workspace_target_source', table_name='lightrag_graph_edges')
+ op.drop_index('idx_lightrag_edges_workspace_target', table_name='lightrag_graph_edges')
+ op.drop_index('idx_lightrag_edges_workspace_source_target', table_name='lightrag_graph_edges')
+ op.drop_index('idx_lightrag_edges_workspace_source', table_name='lightrag_graph_edges')
+ op.drop_index('idx_lightrag_edges_workspace_createtime', table_name='lightrag_graph_edges')
+ op.drop_index('idx_lightrag_edges_weight', table_name='lightrag_graph_edges')
+ op.drop_index('idx_lightrag_edges_metadata', table_name='lightrag_graph_edges')
+ op.drop_index('idx_lightrag_edges_degree_calc', table_name='lightrag_graph_edges')
+ op.drop_table('lightrag_graph_edges')
op.drop_table('lightrag_doc_chunks')
op.drop_table('invitation')
op.drop_index(op.f('ix_document_index_status'), table_name='document_index')
diff --git a/aperag/migration/versions/20250624132601-66b96592c84a.py b/aperag/migration/versions/20250703133304-b598e645b2ba.py
similarity index 65%
rename from aperag/migration/versions/20250624132601-66b96592c84a.py
rename to aperag/migration/versions/20250703133304-b598e645b2ba.py
index 0c6c1c551..ad004be7b 100644
--- a/aperag/migration/versions/20250624132601-66b96592c84a.py
+++ b/aperag/migration/versions/20250703133304-b598e645b2ba.py
@@ -1,8 +1,8 @@
-"""empty message
+"""Initialize LLM model configurations
-Revision ID: 66b96592c84a
-Revises: 850b2c5dc08f
-Create Date: 2025-06-24 13:26:01.031627
+Revision ID: b598e645b2ba
+Revises: 0b274fcc91e2
+Create Date: 2025-07-03 13:33:04.315167
"""
from typing import Sequence, Union
@@ -14,19 +14,20 @@
# revision identifiers, used by Alembic.
-revision: str = '66b96592c84a'
-down_revision: Union[str, None] = '850b2c5dc08f'
+revision: str = 'b598e645b2ba'
+down_revision: Union[str, None] = '0b274fcc91e2'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
+
def upgrade() -> None:
- """Initialize model configurations data."""
+ """Initialize LLM model configurations data."""
# Execute model configurations initialization SQL
execute_sql_file("model_configs_init.sql")
def downgrade() -> None:
- """Remove model configurations data."""
+ """Remove LLM model configurations data."""
# Clean up model configurations data
op.execute(sa.text("DELETE FROM llm_provider_models"))
op.execute(sa.text("DELETE FROM llm_provider"))
diff --git a/deploy/aperag/values.yaml b/deploy/aperag/values.yaml
index 8bbc4167a..f7afd9459 100644
--- a/deploy/aperag/values.yaml
+++ b/deploy/aperag/values.yaml
@@ -1,6 +1,6 @@
image:
repository: "docker.io/apecloud/aperag" # Full image name including registry
- tag: "v0.5.0-alpha.28"
+ tag: "v0.5.0-alpha.30"
pullPolicy: IfNotPresent
nameOverride: ""
@@ -255,7 +255,7 @@ frontend:
replicaCount: 1
image:
repository: "docker.io/apecloud/aperag-frontend" # Full image name including registry
- tag: "v0.5.0-alpha.28"
+ tag: "v0.5.0-alpha.30"
pullPolicy: IfNotPresent
resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
diff --git a/docker-compose.yml b/docker-compose.yml
index 664be6a84..bad297891 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -3,14 +3,18 @@ volumes:
aperag-qdrant-data: {}
aperag-redis-data: {}
aperag-es-data: {}
+ aperag-neo4j-data: {}
aperag-shared-data: {}
services:
+ # ==============================================
+ # Application Services (Default startup, excluded from infra)
+ # ==============================================
api: &api
build:
context: .
dockerfile: ./Dockerfile
- image: ${REGISTRY:-docker.io}/apecloud/aperag:${VERSION:-latest}
+ image: ${REGISTRY:-docker.io}/apecloud/aperag:${VERSION:-v0.5.0-alpha.30}
container_name: aperag-api
depends_on:
redis:
@@ -33,12 +37,13 @@ services:
ports:
- "8000:8000"
command: ["/app/scripts/entrypoint.sh", "/app/scripts/start-api.sh"]
+ profiles: ["app"]
frontend:
build:
context: ./frontend
dockerfile: ./Dockerfile
- image: ${REGISTRY:-docker.io}/apecloud/aperag-frontend:${VERSION:-latest}
+ image: ${REGISTRY:-docker.io}/apecloud/aperag-frontend:${VERSION:-v0.5.0-alpha.30}
container_name: aperag-frontend
depends_on:
- api
@@ -49,9 +54,10 @@ services:
- APERAG_CONSOLE_SERVICE_PORT=8000
ports:
- "3000:3000"
+ profiles: ["app"]
celeryworker:
- image: ${REGISTRY:-docker.io}/apecloud/aperag:${VERSION:-latest}
+ image: ${REGISTRY:-docker.io}/apecloud/aperag:${VERSION:-v0.5.0-alpha.30}
build:
context: .
dockerfile: ./Dockerfile
@@ -73,9 +79,10 @@ services:
- NODE_IP=aperag-celeryworker
- DOCRAY_HOST=${DOCRAY_HOST}
command: ["/app/scripts/entrypoint.sh", "/app/scripts/start-celery-worker.sh"]
+ profiles: ["app"]
celerybeat:
- image: ${REGISTRY:-docker.io}/apecloud/aperag:${VERSION:-latest}
+ image: ${REGISTRY:-docker.io}/apecloud/aperag:${VERSION:-v0.5.0-alpha.30}
build:
context: .
dockerfile: ./Dockerfile
@@ -89,10 +96,11 @@ services:
environment:
- NODE_IP=aperag-celerybeat
command: ["/app/scripts/entrypoint.sh", "/app/scripts/start-celery-beat.sh"]
+ profiles: ["app"]
flower:
<<: *api
- image: ${REGISTRY:-docker.io}/apecloud/aperag:${VERSION:-latest}
+ image: ${REGISTRY:-docker.io}/apecloud/aperag:${VERSION:-v0.5.0-alpha.30}
build:
context: .
dockerfile: ./Dockerfile
@@ -105,7 +113,11 @@ services:
environment:
- NODE_IP=aperag-flower
command: ["/app/scripts/entrypoint.sh", "/app/scripts/start-celery-flower.sh"]
+ profiles: ["app"]
+ # ==============================================
+ # Infrastructure Services (always available)
+ # ==============================================
postgres:
image: pgvector/pgvector:pg16
container_name: aperag-postgres
@@ -155,6 +167,27 @@ services:
command: bash /usr/share/elasticsearch/bin/init-es.sh
restart: on-failure
+ # ==============================================
+ # Optional Services
+ # ==============================================
+ neo4j:
+ image: neo4j:5.26.5-enterprise
+ container_name: aperag-neo4j
+ ports:
+ - "7474:7474" # HTTP
+ - "7687:7687" # Bolt
+ environment:
+ - NEO4J_AUTH=neo4j/password
+ - NEO4J_PLUGINS=["apoc"]
+ - NEO4J_ACCEPT_LICENSE_AGREEMENT=yes
+ - NEO4J_apoc_export_file_enabled=true
+ volumes:
+ - aperag-neo4j-data:/data
+ profiles: ["neo4j"]
+
+ # ==============================================
+ # DocRay Services (existing profiles)
+ # ==============================================
docray:
image: ${REGISTRY:-docker.io}/apecloud/doc-ray:${DOCRAY_VERSION:-v0.1.1}
container_name: aperag-docray
diff --git a/docs/development-guide.md b/docs/development-guide.md
index 2649720ee..85184ad48 100644
--- a/docs/development-guide.md
+++ b/docs/development-guide.md
@@ -1,78 +1,394 @@
-# Development Guide
+# π οΈ Development Guide
-This section focuses on the development workflow and tools provided for ApeRAG.
+This guide focuses on setting up a development environment and the development workflow for ApeRAG. This is designed for developers looking to contribute to ApeRAG or run it locally for development purposes.
-## Development Environment
+## π Development Environment Setup
-It's recommended to use the "Getting Started with Source Code" approach for setting up a development environment. Ensure all prerequisites are met and dependencies are installed using `make install`.
+Follow these steps to set up ApeRAG from source code for development:
-## Key `make` Commands for Development
+### 1. π Clone the Repository and Setup Environment
-The `Makefile` at the root of the project provides several helpful commands to streamline development:
+First, get the source code and configure environment variables:
-* **Environment & Dependencies**:
- * `make install`: Installs all necessary backend (Python) and frontend (Node.js) dependencies. It sets up a Python 3.11 virtual environment using `uv`.
- * `make dev`: Installs development tools like pre-commit hooks to ensure code quality before commits.
+```bash
+git clone https://github.com/apecloud/ApeRAG.git
+cd ApeRAG
+cp envs/env.template .env
+cp frontend/deploy/env.local.template frontend/.env
+```
-* **Running Services**:
- * `make run-db`: (Uses Docker Compose) Starts all required database services (PostgreSQL, Redis, Qdrant, etc.) as defined in `docker-compose.yml`. Useful if you don't have these services running elsewhere.
- * `make run-backend`: Starts the FastAPI development server.
- * `make run-frontend`: Starts the UmiJS frontend development server.
- * `make run-celery`: Starts a Celery worker for processing background tasks (includes Celery Beat).
- * `make run-celery-beat`: (Note: `make run-celery` usually includes Beat due to the `-B` flag. This target might be redundant or for specific scenarios. Check Makefile if explicitly needed separate from worker).
+Edit the `.env` file to configure your AI service settings if needed. The default settings work with the local database services started in the next step.
-* **Code Quality & Testing**:
- * `make format`: Formats Python code using Ruff and frontend code using Prettier.
- * `make lint`: Lints Python code with Ruff and frontend code.
- * `make static-check`: Performs static type checking for Python code using Mypy (if configured).
- * `make test`: Runs all automated tests (Python unit tests, integration tests).
+### 2. π System Prerequisites
-* **Database Management**:
- * `make makemigration`: Creates new database migration files based on changes to SQLAlchemy models.
- * `make migrate`: Applies pending database migrations to your connected database.
- * `make connect-metadb`: Provides a command to connect to the primary PostgreSQL database (usually for inspection, if run via `make run-db`).
+Before you begin, ensure your system has:
-* **Generators**:
- * `make generate-models`: Generates Pydantic models from the OpenAPI schema.
- * `make generate-frontend-sdk`: Generates the frontend API client/SDK from the OpenAPI specification. **Run this command whenever backend API definitions change.**
+* **Node.js**: Version 20 or higher is recommended for frontend development. [Download Node.js](https://nodejs.org/)
+* **Docker & Docker Compose**: Required for running database services locally. [Download Docker](https://docs.docker.com/get-docker/)
-* **Docker Compose (for local full-stack testing)**:
- * `make compose-up`: Starts all services (backend, frontend, databases, Celery) using Docker Compose.
- * `make compose-down`: Stops all services started with `make compose-up`.
- * `make compose-logs`: Tails the logs from all services running under Docker Compose.
+**Note**: Python 3.11 is required but will be automatically managed by `uv` in the next steps.
-* **Cleanup**:
- * `make clean`: Removes temporary files, build artifacts, and caches from the development environment.
+### 3. ποΈ Start Database Services
-## Typical Development Workflow
+Use Docker Compose to start the essential database services:
-Contributing to ApeRAG involves the following typical workflow. Before starting significant work, it's a good idea to open an issue to discuss your proposed changes with the maintainers.
+```bash
+# Start core databases: PostgreSQL, Redis, Qdrant, Elasticsearch
+make compose-infra
+```
-1. **Fork and Branch**:
- * Fork the official ApeRAG repository to your GitHub account.
- * Create a new branch from `main` for your feature or bug fix. Use a descriptive branch name (e.g., `feat/add-new-parser` or `fix/login-bug`).
+This will start all required database services in the background. The default connection settings in your `.env` file are pre-configured to work with these services.
-2. **Environment Setup**: Ensure your development environment is set up as described in [Development Environment](#development-environment) and [Getting Started with Source Code](../README.md#getting-started-with-source-code) (dependencies installed, databases running/accessible).
+
+Advanced Database Options
-3. **Code Implementation**:
- * Make your code changes in the backend (`aperag/`) or frontend (`frontend/src/`) directories.
- * **Follow Code Style**: Adhere to PEP 8 for Python and standard practices for TypeScript/React. Use English for all code, comments, and documentation.
- * Regularly use `make format` and `make lint` to ensure code consistency and quality.
+```bash
+# Use Neo4j instead of PostgreSQL for graph storage
+make compose-infra WITH_NEO4J=1
-4. **Handle API and Model Changes**:
- * If you change backend API endpoints (add, remove, modify parameters/responses): Update the OpenAPI specification (usually in `aperag/api/openapi.yaml`) and then run `make generate-frontend-sdk` to update the frontend client. Also, run `make generate-models` if schema components are affected.
- * If you change SQLAlchemy models: Run `make makemigration` to create migration files, and then `make migrate` to apply changes to your development database.
+# Add advanced document parsing service (DocRay)
+make compose-infra WITH_DOCRAY=1
-5. **Testing**: Add unit tests for new backend logic and integration tests for API changes. Ensure all existing tests pass by running `make test`.
+# Combine multiple options
+make compose-infra WITH_NEO4J=1 WITH_DOCRAY=1
-6. **Documentation**: If your changes affect API specifications, user guides, or deployment processes, update the relevant documentation (e.g., OpenAPI specs, this README, files in `docs/`).
+# GPU-accelerated document parsing (requires ~6GB VRAM)
+make compose-infra WITH_DOCRAY=1 WITH_GPU=1
+```
-7. **Commit and Push**:
- * Make clear and concise commit messages.
- * Push your branch to your fork on GitHub.
+**Note**: DocRay provides enhanced document parsing for complex PDFs, tables, and formulas. CPU mode requires 4+ cores and 8GB+ RAM.
-8. **Submit a Pull Request (PR)**:
- * Submit a PR from your branch to the `main` branch of the official ApeRAG repository.
- * Provide a clear description of your changes in the PR and link any relevant issues.
+
-9. **Code Review**: Your PR will be reviewed by maintainers. Be prepared to address feedback and make further changes if necessary.
\ No newline at end of file
+### 4. βοΈ Setup Development Environment
+
+Create Python virtual environment and setup development tools:
+
+```bash
+make dev
+```
+
+This command will:
+* Install `uv` if not already available
+* Create a Python 3.11 virtual environment (located in `.venv/`)
+* Install development tools (redocly, openapi-generator-cli, etc.)
+* Install pre-commit hooks for code quality
+* Install addlicense tool for license management
+
+**Activate the virtual environment:**
+```bash
+source .venv/bin/activate
+```
+
+You'll know it's active when you see `(.venv)` in your terminal prompt.
+
+### 5. π¦ Install Dependencies
+
+Install all backend and frontend dependencies:
+
+```bash
+make install
+```
+
+This command will:
+* Install all Python backend dependencies from `pyproject.toml` into the virtual environment
+* Install frontend Node.js dependencies using `yarn`
+
+### 6. π Apply Database Migrations
+
+Setup the database schema:
+
+```bash
+make migrate
+```
+
+### 7. βΆοΈ Start Development Services
+
+Now you can start the development services. Open separate terminal windows/tabs for each service:
+
+**Terminal 1 - Backend API Server:**
+```bash
+make run-backend
+```
+This starts the FastAPI development server at `http://localhost:8000` with auto-reload on code changes.
+
+**Terminal 2 - Celery Worker:**
+```bash
+make run-celery
+```
+This starts the Celery worker for processing asynchronous background tasks.
+
+**Terminal 3 - Frontend (Optional):**
+```bash
+make run-frontend
+```
+This starts the frontend development server at `http://localhost:3000` with hot reload.
+
+### 8. π Access ApeRAG
+
+With the services running, you can access:
+* **Frontend UI**: http://localhost:3000 (if started)
+* **Backend API**: http://localhost:8000
+* **API Documentation**: http://localhost:8000/docs
+
+### 9. βΉοΈ Stopping Services
+
+To stop the development environment:
+
+**Stop Database Services:**
+```bash
+# Stop database services (data preserved)
+make compose-down
+
+# Stop services and remove all data volumes
+make compose-down REMOVE_VOLUMES=1
+```
+
+**Stop Development Services:**
+- Backend API Server: Press `Ctrl+C` in the terminal running `make run-backend`
+- Celery Worker: Press `Ctrl+C` in the terminal running `make run-celery`
+- Frontend Server: Press `Ctrl+C` in the terminal running `make run-frontend`
+
+**Data Management:**
+- `make compose-down` - Stops services but preserves all data (PostgreSQL, Redis, Qdrant, etc.)
+- `make compose-down REMOVE_VOLUMES=1` - Stops services and **β οΈ permanently deletes all data**
+- You can run `make compose-down REMOVE_VOLUMES=1` even after already running `make compose-down`
+
+**Verify Data Removal:**
+```bash
+# Check if volumes still exist
+docker volume ls | grep aperag
+
+# Should return no results after REMOVE_VOLUMES=1
+```
+
+Now you have ApeRAG running locally from source code, ready for development! π
+
+## β Common Development Tasks
+
+### Q: π§ How do I add or modify a REST API endpoint?
+
+**Complete workflow:**
+1. Edit OpenAPI specification: `aperag/api/paths/[endpoint-name].yaml`
+2. Regenerate backend models:
+ ```bash
+ make generate-models # This runs merge-openapi internally
+ ```
+3. Implement backend view: `aperag/views/[module].py`
+4. Generate frontend TypeScript client:
+ ```bash
+ make generate-frontend-sdk # Updates frontend/src/api/
+ ```
+5. Test the API:
+ ```bash
+ make test
+ # β
Check live docs: http://localhost:8000/docs
+ ```
+
+### Q: ποΈ How do I modify database models/schema?
+
+**Database migration workflow:**
+1. Edit SQLModel classes in `aperag/db/models.py`
+2. Generate migration file:
+ ```bash
+ make makemigration # Creates new migration in migration/versions/
+ ```
+3. Apply migration to database:
+ ```bash
+ make migrate # Updates database schema
+ ```
+4. Update related code (repositories in `aperag/db/repositories/`, services in `aperag/service/`)
+5. Verify changes:
+ ```bash
+ make test # β
Ensure everything works
+ ```
+
+### Q: β‘ How do I add a new feature with background processing?
+
+**Feature implementation workflow:**
+1. Implement feature components:
+ - Backend logic: `aperag/[module]/`
+ - Async tasks: `aperag/tasks/`
+ - Database models: `aperag/db/models.py`
+2. Update API and generate code:
+ ```bash
+ make makemigration # Generate migration files
+ make migrate # Apply database changes
+ make generate-models # Update Pydantic models
+ make generate-frontend-sdk # Update TypeScript client
+ ```
+3. Quality assurance:
+ ```bash
+ make format && make lint && make test
+ ```
+
+### Q: π§ͺ How do I run unit tests and e2e tests?
+
+**Unit Tests (Fast, No External Dependencies):**
+```bash
+# Run all unit tests
+make unit-test
+
+# Run specific test file
+uv run pytest tests/unit_test/test_model_service.py -v
+
+# Run specific test class or function
+uv run pytest tests/unit_test/test_model_service.py::TestModelService::test_get_models -v
+
+# Run tests with coverage
+uv run pytest tests/unit_test/ --cov=aperag --cov-report=html
+```
+
+**E2E Tests (Require Running Services):**
+```bash
+# Setup: Start required services first
+make compose-infra # ποΈ Start databases
+make run-backend # π Start API server (separate terminal)
+
+# Run all e2e tests
+make e2e-test
+
+# Run specific e2e test modules
+uv run pytest tests/e2e_test/test_chat/ -v
+uv run pytest tests/e2e_test/graphstorage/ -v
+
+# Run with detailed output and no capture
+uv run pytest tests/e2e_test/test_specific.py -v -s
+
+# Performance benchmarks (with timing)
+make e2e-performance-test
+```
+
+**Complete Test Suite:**
+```bash
+# Run everything (unit + e2e)
+make test
+
+# Test with different configurations
+make compose-infra WITH_NEO4J=1 # Test with Neo4j instead of PostgreSQL
+make test
+```
+
+### Q: π How do I debug failing tests?
+
+**Debugging workflow:**
+1. Run failing test in isolation:
+ ```bash
+ # Single test with full output
+ uv run pytest tests/unit_test/test_failing.py::test_specific_function -v -s
+
+ # Stop on first failure
+ uv run pytest tests/unit_test/ -x --tb=short
+ ```
+2. For e2e test failures, ensure services are running:
+ ```bash
+ make compose-infra # Database services
+ make run-backend # API server
+ make run-celery # Background workers (if testing async tasks)
+ ```
+3. Use debugging tools:
+ ```bash
+ # Run with pdb debugger
+ uv run pytest tests/unit_test/test_failing.py --pdb
+
+ # Capture logs during test
+ uv run pytest tests/e2e_test/test_failing.py --log-cli-level=DEBUG
+ ```
+4. Fix and retest:
+ ```bash
+ make format # Auto-fix style issues
+ make lint # Check remaining issues
+ uv run pytest tests/path/to/fixed_test.py -v # Verify fix
+ ```
+
+### Q: π How do I run RAG evaluation and analysis?
+
+**Evaluation workflow:**
+```bash
+# Ensure environment is ready
+make compose-infra WITH_NEO4J=1 # Use Neo4j for better graph performance
+make run-backend
+make run-celery
+
+# Run comprehensive RAG evaluation
+make evaluate # π Runs aperag.evaluation.run module
+
+# π Check evaluation reports in tests/report/
+```
+
+### Q: π¦ How do I update dependencies safely?
+
+**Python dependencies:**
+1. Edit `pyproject.toml` (add/update packages)
+2. Update virtual environment:
+ ```bash
+ make install # Syncs all groups and extras with uv
+ make test # Verify compatibility
+ ```
+
+**Frontend dependencies:**
+1. Edit `frontend/package.json`
+2. Update and test:
+ ```bash
+ cd frontend && yarn install
+ make run-frontend # Test frontend compilation
+ make generate-frontend-sdk # Ensure API client still works
+ ```
+
+### Q: π How do I prepare code for production deployment?
+
+**Pre-deployment checklist:**
+1. Code quality validation:
+ ```bash
+ make format # Auto-fix all style issues
+ make lint # Verify no style violations
+ make static-check # MyPy type checking
+ ```
+2. Comprehensive testing:
+ ```bash
+ make test # All unit + e2e tests
+ make e2e-performance-test # Performance benchmarks
+ ```
+3. API consistency:
+ ```bash
+ make generate-models # Ensure models match OpenAPI spec
+ make generate-frontend-sdk # Update frontend client
+ ```
+4. Database migrations:
+ ```bash
+ make makemigration # Generate any pending migrations
+ ```
+5. Full-stack integration test:
+ ```bash
+ make compose-up WITH_NEO4J=1 WITH_DOCRAY=1 # Production-like setup
+ # Manual testing at http://localhost:3000/web/
+ make compose-down
+ ```
+
+### Q: π How do I completely reset my development environment?
+
+**Nuclear reset (destroys all data):**
+```bash
+make compose-down REMOVE_VOLUMES=1 # β οΈ Stop services + delete ALL data
+make clean # π§Ή Clean temporary files
+
+# Restart fresh
+make compose-infra # ποΈ Fresh databases
+make migrate # π Apply all migrations
+make run-backend # π Start API server
+make run-celery # β‘ Start background workers
+```
+
+**Soft reset (preserve data):**
+```bash
+make compose-down # βΉοΈ Stop services, keep data
+make compose-infra # ποΈ Restart databases
+make migrate # π Apply any new migrations
+```
+
+**Reset just Python environment:**
+```bash
+rm -rf .venv/ # ποΈ Remove virtual environment
+make dev # βοΈ Recreate everything
+source .venv/bin/activate # β
Reactivate
+```
\ No newline at end of file