-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcompose.yaml
More file actions
157 lines (148 loc) · 5.33 KB
/
compose.yaml
File metadata and controls
157 lines (148 loc) · 5.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# yaml-language-server: $schema=https://raw.githubusercontent.com/compose-spec/compose-spec/master/schema/compose-spec.json
services:
llm:
image: ghcr.io/ggerganov/llama.cpp:server # this is the server latest tag
command: ["-m", "/models/hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF/llama-3.2-1b-instruct-q4_k_m.gguf", "--pooling", "cls"]
environment:
LLAMA_ARG_PORT: 8081
expose:
- 8081
ports:
- "8081:8081"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8081/health"]
start_period: 10s
interval: 10s
timeout: 5s
retries: 5
volumes:
- ./models/:/models/:ro
embedder:
image: ghcr.io/ggerganov/llama.cpp:server # this is the server latest tag
command: ["-m", "/models/CompendiumLabs/bge-small-en-v1.5-gguf/bge-small-en-v1.5-f32.gguf", "--embedding"]
environment:
LLAMA_ARG_PORT: 8082
expose:
- 8082
ports:
- "8082:8082"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8082/health"]
start_period: 10s
interval: 10s
timeout: 5s
retries: 5
volumes:
- ./models/:/models/:ro
reranker:
image: ghcr.io/ggerganov/llama.cpp:server # this is the server latest tag
command: ["-m", "/models/BAAI/bge-reranker-base/bge-reranker-base-Q4_K_M.gguf", "--reranking"]
environment:
LLAMA_ARG_PORT: 8083
expose:
- 8083
ports:
- "8083:8083"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8083/health"]
start_period: 10s
interval: 10s
timeout: 5s
retries: 5
volumes:
- ./models/:/models/:ro
qdrant:
image: qdrant/qdrant:v1.12.4
environment:
QDRANT__SERVICE__HTTP_PORT: ${QDRANT_HTTP_PORT:-6333}
QDRANT__SERVICE__GRPC_PORT: ${QDRANT_GRPC_PORT:-6334}
expose:
- ${QDRANT_HTTP_PORT:-6333}
- ${QDRANT_GRPC_PORT:-6334}
ports:
- "${QDRANT_HTTP_PORT:-6333}:${QDRANT_HTTP_PORT:-6333}" # dashboard is available at http://localhost:${QDRANT_HTTP_PORT:-6333}/dashboard
- "${QDRANT_GRPC_PORT:-6334}:${QDRANT_GRPC_PORT:-6334}"
volumes:
- qdrant:/qdrant/storage
qdrant-healthcheck:
image: busybox:1-musl
entrypoint: ["sleep", "infinity"] # Keeps the service running forever
depends_on:
- qdrant
healthcheck:
test: ["CMD", "wget", "--no-verbose", "--quiet","--spider", "--tries=1", "http://qdrant:${QDRANT_HTTP_PORT:-6333}/healthz"]
start_period: 10s
interval: 10s
timeout: 5s
retries: 5
rag:
# image: ghcr.io/aria3ppp/rag
# pull_policy: build
build:
dockerfile: ./docker/Dockerfile.rag
context: .
environment:
RAG_SERVER_GRPC_PORT: ${RAG_SERVER_GRPC_PORT:-9001}
RAG_SERVER_GATEWAY_PORT: ${RAG_SERVER_GATEWAY_PORT:-8000}
RAG_SERVER_GATEWAY_ALLOWED_ORIGINS: ${RAG_SERVER_GATEWAY_ALLOWED_ORIGINS:-*}
RAG_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT: ${RAG_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT:-30s}
OPENAI_BASEURL: ${OPENAI_BASEURL:-http://llm:8081/v1}
OPENAI_APIKEY: ${OPENAI_APIKEY:-apikey}
OPENAI_MODEL: ${OPENAI_MODEL:-model}
RERANKER_BASEURL: ${RERANKER_BASEURL:-http://reranker:8083/v1}
VECTORSTORE_HOST: ${VECTORSTORE_HOST:-vectorstore}
VECTORSTORE_SERVER_GRPC_PORT: ${VECTORSTORE_SERVER_GRPC_PORT:-9091}
expose:
- ${RAG_SERVER_GRPC_PORT:-9001} # grpc
- ${RAG_SERVER_GATEWAY_PORT:-8000} # http gateway
ports:
- "${RAG_SERVER_GRPC_PORT:-9001}:${RAG_SERVER_GRPC_PORT:-9001}" # grpc
- "${RAG_SERVER_GATEWAY_PORT:-8000}:${RAG_SERVER_GATEWAY_PORT:-8000}" # http gateway
depends_on:
llm:
condition: service_healthy
reranker:
condition: service_healthy
vectorstore:
condition: service_healthy
healthcheck:
test: ["CMD", "/app/rag", "-probe", "http", "-mute"]
start_period: 10s
interval: 10s
timeout: 5s
retries: 5
vectorstore:
# image: ghcr.io/aria3ppp/vectorstore
# pull_policy: build
build:
dockerfile: ./docker/Dockerfile.vectorstore
context: .
environment:
VECTORSTORE_SERVER_GRPC_PORT: ${VECTORSTORE_SERVER_GRPC_PORT:-9091}
VECTORSTORE_SERVER_GATEWAY_PORT: ${VECTORSTORE_SERVER_GATEWAY_PORT:-8080}
VECTORSTORE_SERVER_GATEWAY_ALLOWED_ORIGINS: ${VECTORSTORE_SERVER_GATEWAY_ALLOWED_ORIGINS:-*}
VECTORSTORE_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT: ${VECTORSTORE_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT:-30s}
EMBEDDER_BASEURL: ${EMBEDDER_BASEURL:-http://embedder:8082/v1}
QDRANT_HOST: ${QDRANT_HOST:-qdrant}
QDRANT_GRPC_PORT: ${QDRANT_GRPC_PORT:-6334}
QDRANT_COLLECTION_NAME: ${QDRANT_COLLECTION_NAME:-collection}
QDRANT_VECTOR_SIZE: ${QDRANT_VECTOR_SIZE:-384}
expose:
- ${VECTORSTORE_SERVER_GRPC_PORT:-9091} # grpc
- ${VECTORSTORE_SERVER_GATEWAY_PORT:-8080} # http gateway
ports:
- "${VECTORSTORE_SERVER_GRPC_PORT:-9091}:${VECTORSTORE_SERVER_GRPC_PORT:-9091}" # grpc
- "${VECTORSTORE_SERVER_GATEWAY_PORT:-8080}:${VECTORSTORE_SERVER_GATEWAY_PORT:-8080}" # http gateway
depends_on:
embedder:
condition: service_healthy
qdrant-healthcheck:
condition: service_healthy
healthcheck:
test: ["CMD", "/app/vectorstore", "-probe", "http", "-mute"]
start_period: 10s
interval: 10s
timeout: 5s
retries: 5
volumes:
qdrant: {}