vectoria/.env.example at main · voidkey/vectoria · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# LLM (OpenAI-compatible)
OPENAI_BASE_URL=https://api.openai.com/v1
OPENAI_API_KEY=sk-your-api-key-here
LLM_MODEL=gpt-4o

# Embedding — leave blank to fall back to LLM settings above
EMBEDDING_BASE_URL=
EMBEDDING_API_KEY=
EMBEDDING_MODEL=text-embedding-3-small
EMBEDDING_DIMENSIONS=1536

# Redis (for distributed rate limiting across workers). Format:
#   redis://[user:pass@]host:port/db
# In compose the ``redis`` service is reachable as ``redis://redis:6379/0``.
REDIS_URL=redis://localhost:6379/0

# Vector store
VECTOR_STORE=pgvector
# Local dev (app runs on host, PG in Docker):
DATABASE_URL=postgresql+asyncpg://postgres:postgres@localhost/vectoria
# Docker Compose production (app + PG both in compose network):
# DATABASE_URL=postgresql+asyncpg://postgres:postgres@db/vectoria

# Postgres container credentials (consumed by compose.yaml). For any
# deploy that exposes the DB port or runs on a shared host, override
# these with strong values. Must match the user/password/db in DATABASE_URL.
# POSTGRES_USER=postgres
# POSTGRES_PASSWORD=postgres
# POSTGRES_DB=vectoria

# Host interface the DB port binds to. Default 127.0.0.1 keeps the
# port unreachable from outside the host. Set to 0.0.0.0 only behind
# a network-level firewall.
# PG_BIND=127.0.0.1
# PG_PORT=5433

# Object storage (S3-compatible: MinIO, TOS, AWS S3, etc.)
STORAGE_TYPE=s3
S3_ENDPOINT=http://localhost:9000
S3_REGION=
S3_ACCESS_KEY=minioadmin
S3_SECRET_KEY=minioadmin
S3_BUCKET=vectoria
S3_ADDRESSING_STYLE=auto
S3_PRESIGN_EXPIRES=3600
# TOS example:
# S3_ENDPOINT=https://tos-s3-cn-beijing.volces.com
# S3_REGION=cn-beijing
# S3_ADDRESSING_STYLE=virtual

# Parse engine default
DEFAULT_PARSE_ENGINE=auto

# ─── External parsers — region-aware tuning ──────────────────────────
# MinerU and Vision LLM are external HTTP dependencies. Co-locate them
# with the worker for best results: cross-region routing (e.g. an
# overseas worker calling a CN-hosted mineru) hits link-quality limits
# that no timeout can fix gracefully (we observed 12 MB PDF body upload
# WriteTimeout against a CN endpoint from an overseas node). The
# parser registry already auto-falls-back to in-process pdfium /
# rapidocr when these are unreachable, but the *quality* downgrades.
#
# Recommended per-region picks:
#   ── CN deploy (worker hosted in mainland China):
#      MINERU_API_URL=http://<cn-mineru-host>:8000
#      VISION_BASE_URL=https://api.modelverse.cn/v1   # or volcengine
#      VISION_MODEL=Qwen/Qwen2.5-VL-72B-Instruct      # (or similar)
#   ── Overseas deploy (NA / EU / SEA worker):
#      MINERU_API_URL=http://<overseas-mineru-or-self-host>:8000
#                                  (or leave blank → fall back to pdfium)
#      VISION_BASE_URL=https://api.openai.com/v1
#      VISION_MODEL=gpt-4o-mini
#
# Both models are accessed via OpenAI-compatible APIs, so swapping is
# just three env vars — no code change.

# PaddleOCR-VL gateway (GPU machine, optional). Primary PDF parser
# when configured; the registry chain falls through to MinerU when
# unavailable. Both URL and KEY must be set — either blank disables.
PADDLE_API_URL=https://your-paddle-gateway/vl
PADDLE_API_KEY=
# 600 s matches the gateway's own ceiling; long PDFs sit at 60-90 s
# routinely, a 120 s client cut would prematurely fail them.
PADDLE_TIMEOUT=600
# Per-process cap on in-flight VL calls. Single-card GPU serializes;
# >3 concurrent has been observed to drop connections on image-dense
# PDFs. Multi-worker hosts get N × ceiling; tune at worker count.
PADDLE_CONCURRENCY=3

# MinerU remote API (GPU machine, optional). Stays as PDF fallback
# under PaddleOCR-VL in the default chain.
MINERU_API_URL=http://your-gpu-machine:8000
MINERU_BACKEND=pipeline
MINERU_LANGUAGE=ch

# Vision LLM — for image description AND vision-native parser
# (whole-image markdown extraction). Leave blank to disable both —
# image uploads then route to ocr-native (rapidocr) and inline image
# enrichment is skipped.
VISION_BASE_URL=
VISION_API_KEY=
VISION_MODEL=gpt-4o
# Per-call cost estimate in USD — used by the spend counter and
# daily-budget guardrail. Tune per vendor:
#   gpt-4o-mini ≈ 0.005,  gpt-4o ≈ 0.02,  qwen-vl ≈ 0.002
VISION_COST_PER_CALL_USD=0.005
# Soft daily spend cap. When today's accumulated estimated cost
# crosses this, vision-native parser advertises is_available()=False
# and image uploads automatically fall back to ocr-native (free,
# in-process) until the UTC date rolls. 0 = no cap (default).
# State is per-process — multi-worker hosts get N×budget effective
# ceiling; size accordingly or rely on Prometheus alerting on the
# vectoria_vision_cost_usd_total counter for hard guarantees.
VISION_DAILY_BUDGET_USD=0.0

# Security
# API_KEY — leave blank to allow unauthenticated access (local dev).
# Set a strong random key in production; clients must send X-API-Key header.
API_KEY=
# CORS_ORIGINS — JSON list of allowed origins. Default is [] (no CORS).
# Use ["*"] for local development. Set explicit origins in production.
# Example: CORS_ORIGINS=["https://app.example.com","https://admin.example.com"]
CORS_ORIGINS=["*"]

# JWT auth (optional) — enables token-based auth alongside X-API-Key.
# Leave JWT_SECRET blank to disable. When set, clients may authenticate by
# sending a token via either:
#   - Authorization: Bearer <token>   (OAuth2/OIDC standard)
#   - X-Authorization-Token: <token>  (alternative custom header)
# The secret and algorithm must match whatever service issues the tokens.
JWT_SECRET=
JWT_ALGORITHM=HS256
# Optional: require tokens to carry a matching `iss` claim.
JWT_ISSUER=

# RAG pipeline
ENABLE_QUERY_REWRITE=true
ENABLE_RERANKER=false
# RERANKER_BASE_URL=http://your-reranker:8000