-
-
Notifications
You must be signed in to change notification settings - Fork 209
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
290 lines (281 loc) · 11.2 KB
/
docker-compose.yml
File metadata and controls
290 lines (281 loc) · 11.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
# Production container topology for the cloud Agent OS deployment (Sub B of
# Epic #11720). Implements the multi-container topology decided in ADR 0014
# (Cloud Deployment Topology + Scheduler Task Taxonomy).
#
# Profile variants — `docker compose [--profile <name>] up`:
# - default (no profile): the baseline MCP stack — chroma + kb-server + mc-server.
# - `cloud`: adds the orchestrator — the full Agent OS deployment,
# running ADR 0014's cloud-safe scheduler profile.
# - `ingress`: adds the Caddy reverse proxy — TLS termination + public
# `/kb` + `/mc` path routing for the MCP servers (Sub C #11724).
# - `local-model`: adds an optional self-hosted OpenAI-compatible provider
# runtime. External provider endpoints remain the default.
#
# Per-container resource limits (`deploy.resources.limits`) are declared on every
# service so devops can govern RAM / CPU per service. The values are conservative
# starting points — tune per deployment host.
services:
chroma:
image: chromadb/chroma:1.5.9
volumes:
- chroma-data:/chroma/chroma
healthcheck:
test: ["CMD-SHELL", "bash -c '</dev/tcp/127.0.0.1/8000'"]
interval: 10s
timeout: 5s
retries: 12
start_period: 60s
networks:
- neo-mcp-network
deploy:
resources:
limits:
memory: 2g
cpus: "2.0"
kb-server:
build:
context: .
dockerfile: Dockerfile
args:
TARGET_SERVER: knowledge-base
environment:
- NEO_TRANSPORT=sse
- NEO_AUTO_SYNC=false
- NEO_KB_AUTO_START_DATABASE=false
- NEO_CHROMA_HOST=chroma
- NEO_CHROMA_PORT=8000
- MCP_HTTP_PORT=3000
- NEO_MEMORY_DB_PATH=/app/.neo-ai-data/sqlite/memory-core-graph.sqlite
- NEO_MODEL_PROVIDER=${NEO_MODEL_PROVIDER:-}
- NEO_EMBEDDING_PROVIDER=${NEO_EMBEDDING_PROVIDER:-}
- NEO_OPENAI_COMPATIBLE_HOST=${NEO_OPENAI_COMPATIBLE_HOST:-}
- NEO_OPENAI_COMPATIBLE_MODEL=${NEO_OPENAI_COMPATIBLE_MODEL:-}
- NEO_OPENAI_COMPATIBLE_EMBEDDING_MODEL=${NEO_OPENAI_COMPATIBLE_EMBEDDING_MODEL:-}
- NEO_OPENAI_COMPATIBLE_API_KEY=${NEO_OPENAI_COMPATIBLE_API_KEY:-}
- NEO_OLLAMA_KEEP_ALIVE=${NEO_OLLAMA_KEEP_ALIVE:-}
- NEO_OPENAI_COMPATIBLE_KEEP_ALIVE=${NEO_OPENAI_COMPATIBLE_KEEP_ALIVE:-}
volumes:
- shared-sqlite-data:/app/.neo-ai-data/sqlite
depends_on:
chroma:
condition: service_healthy
networks:
- neo-mcp-network
expose:
- "3000"
healthcheck:
test: ["CMD", "node", "./ai/scripts/diagnostics/mcpHealthcheck.mjs", "--url", "http://127.0.0.1:3000", "--client-name", "neo-kb-container-healthcheck"]
interval: 10s
timeout: 10s
retries: 12
start_period: 45s
deploy:
resources:
limits:
memory: 1g
cpus: "1.0"
mc-server:
build:
context: .
dockerfile: Dockerfile
args:
TARGET_SERVER: memory-core
environment:
- NEO_TRANSPORT=sse
- NEO_MC_PRIMARY=false
- NEO_MAILBOX_DEFAULT_REPLY_POLICY=blocked
- NEO_AUTO_SUMMARIZE=false
- NEO_MEM_AUTO_START_DATABASE=false
- NEO_MEM_AUTO_START_INFERENCE=false
- NEO_AUTO_DREAM=false
- NEO_AUTO_GOLDEN_PATH=false
- NEO_REAL_TIME_MEMORY_PARSING=false
- NEO_AUTO_INGEST_FS=false
- NEO_CHROMA_HOST=chroma
- NEO_CHROMA_PORT=8000
- MCP_HTTP_PORT=3001
- NEO_MEMORY_DB_PATH=/app/.neo-ai-data/sqlite/memory-core-graph.sqlite
- NEO_MODEL_PROVIDER=${NEO_MODEL_PROVIDER:-}
- NEO_EMBEDDING_PROVIDER=${NEO_EMBEDDING_PROVIDER:-}
- NEO_OPENAI_COMPATIBLE_HOST=${NEO_OPENAI_COMPATIBLE_HOST:-}
- NEO_OPENAI_COMPATIBLE_MODEL=${NEO_OPENAI_COMPATIBLE_MODEL:-}
- NEO_OPENAI_COMPATIBLE_EMBEDDING_MODEL=${NEO_OPENAI_COMPATIBLE_EMBEDDING_MODEL:-}
- NEO_OPENAI_COMPATIBLE_API_KEY=${NEO_OPENAI_COMPATIBLE_API_KEY:-}
- NEO_OLLAMA_KEEP_ALIVE=${NEO_OLLAMA_KEEP_ALIVE:-}
- NEO_OPENAI_COMPATIBLE_KEEP_ALIVE=${NEO_OPENAI_COMPATIBLE_KEEP_ALIVE:-}
volumes:
- shared-sqlite-data:/app/.neo-ai-data/sqlite
depends_on:
chroma:
condition: service_healthy
networks:
- neo-mcp-network
expose:
- "3001"
healthcheck:
test: ["CMD", "node", "./ai/scripts/diagnostics/mcpHealthcheck.mjs", "--url", "http://127.0.0.1:3001", "--client-name", "neo-mc-container-healthcheck"]
interval: 10s
timeout: 10s
retries: 12
start_period: 45s
deploy:
resources:
limits:
memory: 1g
cpus: "1.0"
# The Agent OS maintenance orchestrator (ADR 0014 §2.3). Built from the shared
# image via the generalized `SERVICE_ENTRYPOINT` arg. `NEO_AI_DEPLOYMENT_MODE=cloud`
# activates the cloud-safe scheduler profile — only the cloud-deployable lanes
# (summary, backup, dream, golden-path) run; the local-only lanes (primary-dev-sync,
# kbSync, bridgeDaemon) are disabled via the Sub A #11722 deployment-mode toggles.
# `cloud` profile — started by `docker compose --profile cloud up`.
orchestrator:
build:
context: .
dockerfile: Dockerfile
args:
SERVICE_ENTRYPOINT: ai/daemons/orchestrator/daemon.mjs
environment:
- NEO_AI_DEPLOYMENT_MODE=cloud
- NEO_ORCHESTRATOR_PRIMARY_DEV_SYNC_ENABLED=false
- NEO_ORCHESTRATOR_CHROMA_DAEMON_ENABLED=false
- NEO_ORCHESTRATOR_KB_SYNC_ENABLED=false
- NEO_ORCHESTRATOR_BRIDGE_DAEMON_ENABLED=false
- NEO_ORCHESTRATOR_GOLDEN_PATH_REPO_ENRICHMENT_ENABLED=false
- NEO_ORCHESTRATOR_MLX_ENABLED=false
- NEO_TENANT_REPO_MIRROR_ROOT=/app/.neo-ai-data
- NEO_MODEL_PROVIDER=${NEO_MODEL_PROVIDER:-}
- NEO_EMBEDDING_PROVIDER=${NEO_EMBEDDING_PROVIDER:-}
- NEO_OPENAI_COMPATIBLE_HOST=${NEO_OPENAI_COMPATIBLE_HOST:-}
- NEO_OPENAI_COMPATIBLE_MODEL=${NEO_OPENAI_COMPATIBLE_MODEL:-}
- NEO_OPENAI_COMPATIBLE_EMBEDDING_MODEL=${NEO_OPENAI_COMPATIBLE_EMBEDDING_MODEL:-}
- NEO_OPENAI_COMPATIBLE_API_KEY=${NEO_OPENAI_COMPATIBLE_API_KEY:-}
- NEO_OLLAMA_KEEP_ALIVE=${NEO_OLLAMA_KEEP_ALIVE:-}
- NEO_OPENAI_COMPATIBLE_KEEP_ALIVE=${NEO_OPENAI_COMPATIBLE_KEEP_ALIVE:-}
- NEO_CHROMA_HOST=chroma
- NEO_CHROMA_PORT=8000
- NEO_MEMORY_DB_PATH=/app/.neo-ai-data/sqlite/memory-core-graph.sqlite
volumes:
- shared-sqlite-data:/app/.neo-ai-data/sqlite
- tenant-repo-mirrors:/app/.neo-ai-data/tenant-repos
# Redeploy-safe backup persistence (Sub C #11724): the cloud-safe `backup`
# scheduler lane writes bundles here. A host bind-mount keeps them across
# container rebuilds and host-accessible for off-site copy / inspection.
- ./.neo-ai-data/backups:/app/.neo-ai-data/backups
# kb-config.yaml bootstrap tier (#12145): deployments that provide a kb-config.yaml
# (per-tenant Source/Parser registration + pull-mode `tenantRepos`) MUST mount it
# read-only here AND on kb-server. The pull-mode sync's tiered resolver
# (TenantRepoSyncService -> KnowledgeBaseIngestionService.listConfiguredTenantRepos)
# reads it from `<neoRootDir>/kb-config.yaml` the same way kb-server resolves it at
# query time; without the mount the orchestrator silently falls back to the
# aiConfig.tenantRepos[] default tier only. Example:
# - ./kb-config.yaml:/app/kb-config.yaml:ro
depends_on:
chroma:
condition: service_healthy
kb-server:
condition: service_healthy
mc-server:
condition: service_healthy
networks:
- neo-mcp-network
profiles:
- cloud
# Liveness verified via TaskStateService's persistent state file (#11937).
# TaskStateService writes JSON state on every task lifecycle transition;
# mtime within 10 minutes (≫ swarm-heartbeat + summary cadences with safety
# margin) = orchestrator is alive and polling. 90s start_period covers the
# first-poll latency on cold container boot.
healthcheck:
test: ["CMD-SHELL", "node -e \"const fs=require('fs');const f=(process.env.NEO_AI_ORCHESTRATOR_DIR||'/app/.neo-ai-data/orchestrator-daemon')+'/orchestrator-state.json';try{const m=fs.statSync(f).mtimeMs;process.exit(Date.now()-m<600000?0:1)}catch(e){process.exit(1)}\""]
interval: 60s
timeout: 5s
retries: 3
start_period: 90s
deploy:
resources:
limits:
memory: 1g
cpus: "1.0"
# Reverse proxy + TLS termination — the public ingress for the cloud Agent OS
# deployment (Sub C #11724). `ingress` profile — `docker compose --profile
# ingress up` (combine with `--profile cloud` for the full stack). Caddy
# terminates TLS and path-routes `/kb/*` -> kb-server:3000, `/mc/*` -> mc-server:3001.
# Config: ai/deploy/Caddyfile.
ingress:
image: caddy:2-alpine
ports:
- "443:443"
environment:
- NEO_DEPLOY_HOSTNAME=${NEO_DEPLOY_HOSTNAME:-localhost}
volumes:
- ./Caddyfile:/etc/caddy/Caddyfile:ro
- caddy-data:/data
- caddy-config:/config
depends_on:
- kb-server
- mc-server
networks:
- neo-mcp-network
profiles:
- ingress
# TCP-port responsiveness on 443 (caddy's only public surface). Direct nc -z
# exit-status check — caddy:2-alpine includes BusyBox nc; exit 0 = port accepts
# TCP connection, non-zero = caddy crashed. Avoids the wget --quiet + grep
# silent-success-empty-output failure mode (#11939 cycle-1 review per @neo-gpt).
healthcheck:
test: ["CMD-SHELL", "nc -z 127.0.0.1 443"]
interval: 30s
timeout: 5s
retries: 3
start_period: 30s
deploy:
resources:
limits:
memory: 256m
cpus: "0.5"
# Optional self-hosted model-provider runtime (Post-MVP residual #11734).
# The service is disabled unless the operator starts `--profile local-model`.
# It deliberately remains separate from the orchestrator: configure KB/MC/
# orchestrator consumers with `NEO_MODEL_PROVIDER=openAiCompatible` and
# `NEO_OPENAI_COMPATIBLE_HOST=http://local-model:11434` only for deployments
# that explicitly opt into this profile.
local-model:
image: ${NEO_LOCAL_MODEL_IMAGE:-ollama/ollama:latest}
environment:
- OLLAMA_HOST=0.0.0.0:11434
- OLLAMA_MODELS=/root/.ollama
- OLLAMA_KEEP_ALIVE=${NEO_LOCAL_MODEL_KEEP_ALIVE:--1}
- OLLAMA_CONTEXT_LENGTH=${NEO_LOCAL_MODEL_CONTEXT_LENGTH:-262144}
volumes:
- local-model-data:/root/.ollama
networks:
- neo-mcp-network
expose:
- "11434"
healthcheck:
test: ["CMD", "ollama", "list"]
interval: 30s
timeout: 10s
retries: 10
start_period: 60s
profiles:
- local-model
deploy:
resources:
limits:
memory: "${NEO_LOCAL_MODEL_MEMORY_LIMIT:-32g}"
cpus: "${NEO_LOCAL_MODEL_CPU_LIMIT:-4.0}"
# kb-server / mc-server stay internal-only (`expose`) on `neo-mcp-network`; the
# `ingress` profile is the sole public surface. `caddy-data` persists Caddy's
# internal CA + issued certs across container rebuilds (Sub C #11724).
networks:
neo-mcp-network:
driver: bridge
volumes:
chroma-data:
shared-sqlite-data:
tenant-repo-mirrors:
caddy-data:
caddy-config:
local-model-data: