-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
81 lines (77 loc) · 2.11 KB
/
docker-compose.yml
File metadata and controls
81 lines (77 loc) · 2.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
services:
ollama:
image: docker.io/ollama/ollama:latest
ports:
- ${OLLAMA_PORT:-7869}:${OLLAMA_INTERNAL_PORT:-11434}
volumes:
- .:/code
- ./.ollama:/root/.ollama
pull_policy: always
tty: true
restart: always
environment:
- OLLAMA_KEEP_ALIVE=${OLLAMA_KEEP_ALIVE:-24h}
- OLLAMA_HOST=0.0.0.0
entrypoint: ["sh", "-c"]
command: ["ollama serve & sleep 10 && ollama pull ${INFERENCE_MODEL:-llama3.2:1b} && wait"]
healthcheck:
test: ["CMD", "sh", "-c", "ollama list | grep -q '${INFERENCE_MODEL:-llama3.2:1b}'"]
interval: 30s
timeout: 15s
retries: 40
start_period: 300s
llama-stack:
image: llamastack/distribution-ollama:latest
ports:
- ${LLAMA_STACK_PORT:-5001}:5000
volumes:
- ./.llama:/root/.llama
pull_policy: always
restart: always
environment:
- INFERENCE_MODEL=${INFERENCE_MODEL:-llama3.2:1b}
- OLLAMA_URL=http://ollama:${OLLAMA_INTERNAL_PORT:-11434}
- LLAMA_STACK_PORT=5000
command: --port 5000 --template ollama
depends_on:
ollama:
condition: service_healthy
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:5000/v1/models"]
interval: 30s
timeout: 10s
retries: 5
start_period: 30s
llama-stack-playground:
build:
context: .
dockerfile: Dockerfile.playground
ports:
- ${PLAYGROUND_PORT:-8501}:${PLAYGROUND_PORT:-8501}
volumes:
- ./.llama:/root/.llama
restart: always
environment:
- LLAMA_STACK_ENDPOINT=http://llama-stack:5000
depends_on:
llama-stack:
condition: service_healthy
chainlit-app:
build:
context: .
dockerfile: Dockerfile.demo_01
ports:
- ${CHAINLIT_PORT:-9090}:8000
restart: always
environment:
- LLAMA_STACK_API_URL=http://llama-stack:5000
- VECTOR_DB_ID=my_demo_vector_db
depends_on:
llama-stack:
condition: service_healthy
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000"]
interval: 30s
timeout: 10s
retries: 5
start_period: 120s