@@ -47,16 +47,19 @@ services:
4747 tei-embedding-service :
4848 image : ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
4949 container_name : tei-embedding-server
50+ restart : unless-stopped
5051 ports :
5152 - " 8090:80"
5253 volumes :
5354 - " ../../../../assets/data:/data"
54- shm_size : 1g
55+ shm_size : 2g
5556 environment :
5657 MODEL_ID : ${EMBEDDING_MODEL_ID:-BAAI/bge-base-en-v1.5}
5758 PORT : 80
5859 MAX_BATCH_TOKENS : 16384
5960 HF_TOKEN : ${HUGGINGFACEHUB_API_TOKEN}
61+ # Enable faster model downloads
62+ HF_HUB_ENABLE_HF_TRANSFER : " 1"
6063 # Intel Xeon optimizations
6164 OMP_NUM_THREADS : 4
6265 KMP_AFFINITY : " granularity=fine,compact,1,0"
@@ -69,7 +72,7 @@ services:
6972 interval : 30s
7073 timeout : 10s
7174 retries : 10
72- start_period : 120s
75+ start_period : 180s
7376
7477 # Embedding Microservice
7578 embedding :
@@ -119,15 +122,18 @@ services:
119122 tei-reranking-service :
120123 image : ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
121124 container_name : tei-reranking-server
125+ restart : unless-stopped
122126 ports :
123127 - " 8808:80"
124128 volumes :
125129 - " ../../../../assets/data:/data"
126- shm_size : 1g
130+ shm_size : 2g
127131 environment :
128132 MODEL_ID : ${RERANK_MODEL_ID:-BAAI/bge-reranker-base}
129133 PORT : 80
130134 HF_TOKEN : ${HUGGINGFACEHUB_API_TOKEN}
135+ # Enable faster model downloads
136+ HF_HUB_ENABLE_HF_TRANSFER : " 1"
131137 http_proxy : ${http_proxy}
132138 https_proxy : ${https_proxy}
133139 no_proxy : ${no_proxy}
@@ -136,7 +142,7 @@ services:
136142 interval : 30s
137143 timeout : 10s
138144 retries : 10
139- start_period : 120s
145+ start_period : 180s
140146
141147 # Reranking Microservice
142148 reranking :
@@ -164,17 +170,20 @@ services:
164170 tgi-service :
165171 image : ghcr.io/huggingface/text-generation-inference:2.0.1
166172 container_name : tgi-service
173+ restart : unless-stopped
167174 ports :
168175 - " 8008:80"
169176 volumes :
170177 - " ../../../../assets/data:/data"
171- shm_size : 1g
178+ shm_size : 2g
172179 environment :
173180 MODEL_ID : ${LLM_MODEL_ID:-Intel/neural-chat-7b-v3-3}
174181 HF_TOKEN : ${HUGGINGFACEHUB_API_TOKEN}
175182 MAX_INPUT_LENGTH : 2048
176183 MAX_TOTAL_TOKENS : 4096
177184 PORT : 80
185+ # Enable faster model downloads
186+ HF_HUB_ENABLE_HF_TRANSFER : " 1"
178187 # Intel Xeon optimizations
179188 OMP_NUM_THREADS : 8
180189 KMP_AFFINITY : " granularity=fine,compact,1,0"
@@ -184,7 +193,7 @@ services:
184193 interval : 30s
185194 timeout : 10s
186195 retries : 10
187- start_period : 180s
196+ start_period : 300s
188197
189198 # LLM Microservice
190199 llm-tgi :
@@ -315,4 +324,4 @@ volumes:
315324
316325networks :
317326 default :
318- driver : bridge
327+ driver : bridge
0 commit comments