From 9cf19afab307d0c9caef48030a96471da03beca5 Mon Sep 17 00:00:00 2001 From: Guo Ziang Date: Sun, 13 Jul 2025 11:11:12 +0800 Subject: [PATCH 1/2] feat: enhance configuration management for database and services - Added PostgreSQL, Redis, and Elasticsearch configuration fields to `Config` class in `config.py`. - Updated `env.template` to reflect new environment variables for Redis and Elasticsearch. - Removed redundant URL construction from `entrypoint.sh`, as URLs are now built within the `Config` class. - Adjusted `start-celery-flower.sh` to simplify Celery command execution. --- aperag/config.py | 63 ++++++++++++++++++++++++++++------ envs/env.template | 15 ++++---- scripts/entrypoint.sh | 24 ------------- scripts/start-celery-flower.sh | 2 +- 4 files changed, 63 insertions(+), 41 deletions(-) diff --git a/aperag/config.py b/aperag/config.py index 6e9df88af..e5e3236fe 100644 --- a/aperag/config.py +++ b/aperag/config.py @@ -48,8 +48,28 @@ class Config(BaseSettings): # Debug mode debug: bool = Field(False, alias="DEBUG") + # Postgres atomic fields + postgres_host: str = Field("127.0.0.1", alias="POSTGRES_HOST") + postgres_port: int = Field(5432, alias="POSTGRES_PORT") + postgres_db: str = Field("postgres", alias="POSTGRES_DB") + postgres_user: str = Field("postgres", alias="POSTGRES_USER") + postgres_password: str = Field("postgres", alias="POSTGRES_PASSWORD") + + # Redis atomic fields + redis_host: str = Field("127.0.0.1", alias="REDIS_HOST") + redis_port: int = Field(6379, alias="REDIS_PORT") + redis_user: str = Field("default", alias="REDIS_USER") + redis_password: str = Field("password", alias="REDIS_PASSWORD") + + # Elasticsearch atomic fields + es_host_name: str = Field("127.0.0.1", alias="ES_HOST_NAME") + es_port: int = Field(9200, alias="ES_PORT") + es_user: str = Field("", alias="ES_USER") + es_password: str = Field("", alias="ES_PASSWORD") + es_protocol: str = Field("http", alias="ES_PROTOCOL") + # Database - database_url: str = Field(f"sqlite:///{BASE_DIR}/db.sqlite3", alias="DATABASE_URL") + database_url: Optional[str] = Field(None, alias="DATABASE_URL") # Database connection pool settings db_pool_size: int = Field(20, alias="DB_POOL_SIZE") @@ -68,7 +88,7 @@ class Config(BaseSettings): logto_app_id: str = Field("", alias="LOGTO_APP_ID") # Celery - celery_broker_url: str = Field("redis://localhost:6379/0", alias="CELERY_BROKER_URL") + celery_broker_url: Optional[str] = Field(None, alias="CELERY_BROKER_URL") celery_result_backend: Optional[str] = None # Will be set in __post_init__ celery_beat_scheduler: str = "django_celery_beat.schedulers:DatabaseScheduler" celery_worker_send_task_events: bool = True @@ -84,7 +104,7 @@ class Config(BaseSettings): embedding_max_chunks_in_batch: int = Field(10, alias="EMBEDDING_MAX_CHUNKS_IN_BATCH") # Memory backend - memory_redis_url: str = Field("redis://127.0.0.1:6379/1", alias="MEMORY_REDIS_URL") + memory_redis_url: Optional[str] = Field(None, alias="MEMORY_REDIS_URL") # Vector DB vector_db_type: str = Field("qdrant", alias="VECTOR_DB_TYPE") @@ -108,14 +128,8 @@ class Config(BaseSettings): chunk_size: int = Field(400, alias="CHUNK_SIZE") chunk_overlap_size: int = Field(20, alias="CHUNK_OVERLAP_SIZE") - # Redis - redis_host: str = Field("localhost", alias="REDIS_HOST") - redis_port: str = Field("6379", alias="REDIS_PORT") - redis_username: str = Field("", alias="REDIS_USERNAME") - redis_password: str = Field("", alias="REDIS_PASSWORD") - # Fulltext search - es_host: str = Field("http://localhost:9200", alias="ES_HOST") + es_host: Optional[str] = Field(None, alias="ES_HOST") es_timeout: int = Field(30, alias="ES_TIMEOUT") # ES request timeout in seconds es_max_retries: int = Field(3, alias="ES_MAX_RETRIES") # Max retries for ES requests @@ -154,6 +168,35 @@ def __init__(self, **kwargs): with open(json_path, "r", encoding="utf-8") as f: self.model_configs = json.load(f) + # DATABASE_URL + if not self.database_url: + self.database_url = ( + f"postgresql://{self.postgres_user}:{self.postgres_password}" + f"@{self.postgres_host}:{self.postgres_port}/{self.postgres_db}" + ) + # CELERY_BROKER_URL + if not self.celery_broker_url: + self.celery_broker_url = ( + f"redis://{self.redis_user}:{self.redis_password}" + f"@{self.redis_host}:{self.redis_port}/0" + ) + # MEMORY_REDIS_URL + if not self.memory_redis_url: + self.memory_redis_url = ( + f"redis://{self.redis_user}:{self.redis_password}" + f"@{self.redis_host}:{self.redis_port}/1" + ) + # ES_HOST + if not self.es_host: + if self.es_user and self.es_password: + self.es_host = ( + f"{self.es_protocol}://{self.es_user}:{self.es_password}" + f"@{self.es_host_name}:{self.es_port}" + ) + else: + self.es_host = ( + f"{self.es_protocol}://{self.es_host_name}:{self.es_port}" + ) # Object store config if self.object_store_type == "local": self.object_store_local_config = LocalObjectStoreConfig() diff --git a/envs/env.template b/envs/env.template index 0726d29c8..8751dc6e3 100644 --- a/envs/env.template +++ b/envs/env.template @@ -4,7 +4,6 @@ POSTGRES_PORT=5432 POSTGRES_DB=postgres POSTGRES_USER=postgres POSTGRES_PASSWORD=postgres -DATABASE_URL=postgresql://postgres:postgres@127.0.0.1:5432/postgres # Database Connection Pool Settings # Adjust these values based on your server resources and expected load @@ -15,17 +14,21 @@ DB_POOL_RECYCLE=3600 # Recycle connections after 1 hour (in seconds) DB_POOL_PRE_PING=True # Validate connections before use # Redis -MEMORY_REDIS_URL=redis://default:password@127.0.0.1:6379 - -# Celery -CELERY_BROKER_URL=redis://default:password@127.0.0.1:6379/0 +REDIS_HOST=127.0.0.1 +REDIS_PORT=6379 +REDIS_USER=default +REDIS_PASSWORD=password # Vector DB VECTOR_DB_TYPE=qdrant VECTOR_DB_CONTEXT={"url":"http://127.0.0.1","port":6333,"distance":"Cosine","timeout":1000} # Elasticsearch -ES_HOST=http://127.0.0.1:9200 +ES_HOST_NAME=127.0.0.1 +ES_PORT=9200 +ES_USER= +ES_PASSWORD= +ES_PROTOCOL=http # Neo4J NEO4J_HOST=127.0.0.1 diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh index c6fae6b7b..6d7341902 100755 --- a/scripts/entrypoint.sh +++ b/scripts/entrypoint.sh @@ -58,28 +58,4 @@ except Exception as error: sys.stderr.write("Failed to create pgvector extension (this is non-critical): {}\n".format(error)) END -# Build DATABASE_URL from components -if [[ -n "${POSTGRES_HOST:-}" && -n "${POSTGRES_USER:-}" && -n "${POSTGRES_PASSWORD:-}" ]]; then - export DATABASE_URL="postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT:-5432}/${POSTGRES_DB:-postgres}" -fi - -# Build CELERY_BROKER_URL from Redis components -if [[ -n "${REDIS_HOST:-}" && -n "${REDIS_USER:-}" && -n "${REDIS_PASSWORD:-}" ]]; then - export CELERY_BROKER_URL="redis://${REDIS_USER}:${REDIS_PASSWORD}@${REDIS_HOST}:${REDIS_PORT:-6379}/0" -fi - -# Build MEMORY_REDIS_URL from Redis components -if [[ -n "${REDIS_HOST:-}" && -n "${REDIS_USER:-}" && -n "${REDIS_PASSWORD:-}" ]]; then - export MEMORY_REDIS_URL="redis://${REDIS_USER}:${REDIS_PASSWORD}@${REDIS_HOST}:${REDIS_PORT:-6379}/1" -fi - -# Build ES_HOST from Elasticsearch components -if [[ -n "${ES_HOST_NAME:-}" ]]; then - if [[ -n "${ES_USER:-}" && -n "${ES_PASSWORD:-}" ]]; then - export ES_HOST="${ES_PROTOCOL:-http}://${ES_USER}:${ES_PASSWORD}@${ES_HOST_NAME}:${ES_PORT:-9200}" - else - export ES_HOST="${ES_PROTOCOL:-http}://${ES_HOST_NAME}:${ES_PORT:-9200}" - fi -fi - exec "$@" diff --git a/scripts/start-celery-flower.sh b/scripts/start-celery-flower.sh index 7353dd300..938f6e3f7 100755 --- a/scripts/start-celery-flower.sh +++ b/scripts/start-celery-flower.sh @@ -5,4 +5,4 @@ set -o nounset exec watchfiles celery.__main__.main \ --args \ - "-A config.celery -b \"${CELERY_BROKER_URL}\" flower --basic_auth=\"${CELERY_FLOWER_USER}:${CELERY_FLOWER_PASSWORD}\"" + "-A config.celery flower --basic_auth=\"${CELERY_FLOWER_USER}:${CELERY_FLOWER_PASSWORD}\"" From 860985ce3d23f3b765c62e4029b27033b0279580 Mon Sep 17 00:00:00 2001 From: Guo Ziang Date: Sun, 13 Jul 2025 15:59:38 +0800 Subject: [PATCH 2/2] chore: tidy up --- aperag/config.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/aperag/config.py b/aperag/config.py index e5e3236fe..af5fd5cfc 100644 --- a/aperag/config.py +++ b/aperag/config.py @@ -155,10 +155,6 @@ class Config(BaseSettings): def __init__(self, **kwargs): super().__init__(**kwargs) - # Set celery_result_backend if not set - if not self.celery_result_backend: - self.celery_result_backend = self.celery_broker_url - # Load model configs from file import json import os @@ -180,6 +176,11 @@ def __init__(self, **kwargs): f"redis://{self.redis_user}:{self.redis_password}" f"@{self.redis_host}:{self.redis_port}/0" ) + + # CELERY_RESULT_BACKEND + if not self.celery_result_backend: + self.celery_result_backend = self.celery_broker_url + # MEMORY_REDIS_URL if not self.memory_redis_url: self.memory_redis_url = (