11# ============================================
22# Papr Memory - Open Source Configuration
33# ============================================
4- # Copy this file to .env and fill in your values
5- # Required variables are marked with [REQUIRED]
6- # Optional variables have sensible defaults
4+ #
5+ # SETUP INSTRUCTIONS:
6+ #
7+ # 1. Copy this file:
8+ # cp .env.example .env.opensource
9+ #
10+ # 2. Edit .env.opensource with your REAL API keys:
11+ # - OPENAI_API_KEY (required for LLM operations)
12+ # - GROQ_API_KEY (optional, for fast LLM)
13+ # - DEEPINFRA_TOKEN (only if USE_LOCAL_EMBEDDINGS=false)
14+ #
15+ # 3. Start services:
16+ # docker compose up -d
17+ #
18+ # 4. On first startup, the system auto-creates:
19+ # - Parse schemas
20+ # - Default user + workspace
21+ # - API key → saved to .env.generated
22+ # - Test credentials (TEST_* vars) → appended to .env.opensource
23+ #
24+ # 5. Your API key is in .env.generated (or docker logs papr-memory)
25+ #
26+ # FILES:
27+ # .env.example → Template (committed to git). DO NOT put secrets here.
28+ # .env.opensource → Your real config (gitignored). Used by docker-compose.
29+ # .env → Alternative config name (gitignored). Also works.
30+ # .env.generated → Auto-created on first run with your API key (gitignored).
31+ #
32+ # ============================================
733
834# ============================================
935# Edition Configuration
@@ -15,10 +41,9 @@ PAPR_EDITION=opensource
1541# ============================================
1642# API Key (For Testing)
1743# ============================================
18- # Generate your API key after starting services:
44+ # Auto-generated on first startup. Check .env.generated after docker compose up.
45+ # Or generate manually:
1946# python scripts/generate_api_key.py --email your@email.com --name "My Project"
20- #
21- # For quick testing, you can use this default key (change in production!)
2247PAPR_API_KEY = pmem_oss_default_testing_key_CHANGE_ME
2348
2449# ============================================
@@ -42,9 +67,9 @@ OPENAI_API_KEY=sk-your-openai-api-key
4267OPENAI_ORGANIZATION = org-your-org-id
4368
4469# LLM Model Configuration
45- LLM_MODEL = gpt-4o-mini
46- LLM_MODEL_MINI = gpt-4o -mini
47- LLM_MODEL_NANO = gpt-4o-mini
70+ LLM_MODEL = gpt-5-nano
71+ LLM_MODEL_MINI = gpt-5 -mini
72+ LLM_MODEL_NANO = gpt-5-nano
4873
4974# ============================================
5075# MongoDB Configuration [REQUIRED]
@@ -85,17 +110,21 @@ QDRANT_URL=http://qdrant:6333
85110# Leave empty for self-hosted Qdrant without auth
86111QDRANT_API_KEY =
87112
88- # Qdrant collection names
89- QDRANT_COLLECTION_QWEN0pt6B4B = Qwen0pt6B
113+ # Qdrant collection names (auto-selected based on embedding dimensions)
114+ # - Qwen0pt6B: 1024 dimensions (used with Qwen3-Embedding-0.6B)
115+ # - Qwen4B: 2560 dimensions (used with Qwen3-Embedding-4B)
116+ # The system automatically uses the correct collection based on LOCAL_EMBEDDING_DIMENSIONS
117+ QDRANT_COLLECTION_QWEN0pt6B = Qwen0pt6B
90118QDRANT_COLLECTION_QWEN4B = Qwen4B
91119
92120# ============================================
93121# Parse Server Configuration [REQUIRED]
94122# ============================================
95123# Parse Server is used for user management and ACL
96- # For Docker: http://parse-server:1337/parse
97- # For local: http://localhost:1337/parse
98- PARSE_SERVER_URL = http://parse-server:1337/parse
124+ # For Docker: http://parse-server:1337
125+ # For local: http://localhost:1337
126+ # NOTE: Do NOT include /parse in the URL - the code appends /parse to all API paths
127+ PARSE_SERVER_URL = http://parse-server:1337
99128
100129# Parse credentials (generate random UUIDs for security in production)
101130PARSE_APPLICATION_ID = papr-oss-app-id
@@ -107,6 +136,7 @@ PARSE_SERVER_APPLICATION_ID=papr-oss-app-id
107136PARSE_SERVER_MASTER_KEY = papr-oss-master-key
108137
109138# Parse Dashboard credentials (optional, for development only)
139+ # Dashboard URL: http://localhost:4040
110140PARSE_DASHBOARD_USER = admin
111141PARSE_DASHBOARD_PASSWORD = password
112142PARSE_DASHBOARD_SESSION_SECRET = your-dashboard-secret
@@ -139,12 +169,44 @@ POSTHOG_HOST=https://app.posthog.com
139169AMPLITUDE_API_KEY =
140170
141171# ============================================
142- # Embedding Configuration (Optional)
172+ # Embedding Configuration
143173# ============================================
174+ # Local Embeddings (RECOMMENDED for Open Source)
175+ # By default, we use local Qwen3-Embedding-0.6B model for embeddings
176+ # This runs entirely on your device without external API calls
177+ #
178+ # Set to "true" to use local embedding models (default for open source)
179+ # Set to "false" to use cloud embedding APIs (requires API tokens below)
180+ USE_LOCAL_EMBEDDINGS = true
181+
182+ # Local embedding model configuration
183+ # ⚠️ IMPORTANT: Choose ONE of the following models:
184+ #
185+ # Option 1: Qwen3-Embedding-0.6B (RECOMMENDED for most users)
186+ # - Smaller, faster model (~1.2GB download)
187+ # - 1024 dimensions, 32k context
188+ # - Good balance of speed and quality
189+ LOCAL_EMBEDDING_MODEL = Qwen/Qwen3-Embedding-0.6B
190+ LOCAL_EMBEDDING_DIMENSIONS = 1024
191+ #
192+ # Option 2: Qwen3-Embedding-4B (for higher quality)
193+ # - Larger, more accurate model (~8GB download)
194+ # - 2560 dimensions, 32k context
195+ # - Better embedding quality but slower
196+ # LOCAL_EMBEDDING_MODEL=Qwen/Qwen3-Embedding-4B
197+ # LOCAL_EMBEDDING_DIMENSIONS=2560
198+ #
199+ # ⚠️ NOTE: Qdrant collections are created per dimension size.
200+ # If you change models, you may need to:
201+ # 1. Delete old collection: curl -X DELETE http://localhost:6333/collections/Qwen0pt6B
202+ # 2. Restart services: docker compose restart
203+ # 3. Or use: docker compose down -v && docker compose up -d (clears all data)
204+
205+ # Cloud Embedding APIs (Optional - only needed if USE_LOCAL_EMBEDDINGS=false)
144206# Hugging Face for alternative embeddings
145207HUGGING_FACE_ACCESS_TOKEN =
146208
147- # DeepInfra for additional embedding models
209+ # DeepInfra for cloud-based Qwen embeddings (faster but requires API key)
148210DEEPINFRA_TOKEN =
149211DEEPINFRA_API_URL = https://api.deepinfra.com/v1/openai/embeddings
150212
@@ -164,7 +226,7 @@ MAX_TOTAL_BATCH_TOKENS=100000
164226# Logging Configuration
165227# ============================================
166228LOGGING_ENV = development
167- LoggingtoFile = false
229+ LoggingtoFile = true
168230
169231# ============================================
170232# Docker Image Configuration
@@ -190,18 +252,77 @@ IMAGE_TAG=latest
190252
191253# Enable schema selection via LLM
192254ENABLE_LLM_SCHEMA_SELECTION = true
193- OPENAI_SCHEMA_SELECTOR_MODEL = gpt-4o-mini
255+ OPENAI_SCHEMA_SELECTOR_MODEL = gpt-5-nano
194256
195257# Enable hierarchical document chunking
196258FEATURE_HIERARCHICAL_CHUNKING = true
197259
260+ # ============================================
261+ # Temporal Configuration (Durable Workflows)
262+ # ============================================
263+ # Temporal enables durable workflows for batch memory processing and
264+ # document ingestion. For self-hosted deployments, use the Docker Temporal.
265+ #
266+ # For Docker (self-hosted Temporal):
267+ TEMPORAL_ADDRESS = temporal:7233
268+ TEMPORAL_NAMESPACE = default
269+ TEMPORAL_TASK_QUEUE = memory-processing
270+
271+ # For Temporal Cloud (optional - if using Temporal Cloud instead):
272+ # TEMPORAL_ADDRESS=your-namespace.tmprl.cloud:7233
273+ # TEMPORAL_NAMESPACE=your-namespace
274+ # TEMPORAL_API_KEY=your-temporal-cloud-api-key
275+
276+ # Enable/disable Temporal (set to false to use simple background tasks instead)
277+ TEMPORAL_ENABLED = true
278+
279+ # ============================================
280+ # Document Processing Configuration (Optional)
281+ # ============================================
282+ # These providers are optional - configure as needed for document ingestion.
283+ # Leave as placeholders if you don't need document processing.
284+
285+ # TensorLake - AI document parsing
286+ TENSORLAKE_API_KEY = your-tensorlake-api-key-here
287+ TENSORLAKE_BASE_URL = https://api.tensorlake.ai
288+
289+ # Reducto - Document structure extraction
290+ REDUCTO_API_KEY = your-reducto-api-key-here
291+ REDUCTO_ENVIRONMENT = production
292+ REDUCTO_PIPELINE_ID = your-reducto-pipeline-id-here
293+
294+ # Google Gemini - Vision/PDF processing
295+ GOOGLE_API_KEY = your-google-api-key-here
296+
297+ # Groq - Fast LLM for document analysis
298+ GROQ_API_KEY = your-groq-api-key-here
299+ GROQ_NEO_CYPHER = 1
300+ GROQ_FALLBACK_MODEL = openai/gpt-oss-20b
301+
302+ # Cohere - Reranking (optional)
303+ COHERE_API_KEY = your-cohere-api-key-here
304+
305+ # ============================================
306+ # Test Credentials (Auto-Generated)
307+ # ============================================
308+ # These are auto-populated on first startup by the bootstrap script.
309+ # After running `docker compose up -d`, check .env.opensource for real values.
310+ # Do NOT set these manually - they are created from Parse Server data.
311+ #
312+ # TEST_SESSION_TOKEN=r:auto-generated-on-first-run
313+ # TEST_X_USER_API_KEY=pmem_oss_auto-generated-on-first-run
314+ # TEST_USER_ID=auto-generated
315+ # TEST_TENANT_ID=auto-generated
316+ # TEST_WORKSPACE_ID=auto-generated
317+ # TEST_NAMESPACE_ID=auto-generated
318+ # TEST_ORGANIZATION_ID=auto-generated
319+
198320# ============================================
199321# CLOUD-ONLY FEATURES (Not needed for OSS)
200322# ============================================
201323# The following are only used in cloud edition:
202324# - AUTH0_* (OAuth authentication)
203325# - STRIPE_* (Payment processing)
204326# - AMPLITUDE_* (Cloud analytics)
205- # - TEMPORAL_* (Durable workflows)
206327# - AZURE_* (Azure services)
207328# - NEO4J_GRAPHQL_ENDPOINT (Neo4j Aura GraphQL)
0 commit comments