Skip to content

Commit e8d8ca0

Browse files
Fix for TEI Server
Signed-off-by: cogniware-devops <ambarish.desai@cogniware.ai>
1 parent f0071be commit e8d8ca0

1 file changed

Lines changed: 16 additions & 7 deletions

File tree

CogniwareIms/docker_compose/intel/cpu/xeon/compose.yaml

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -47,16 +47,19 @@ services:
4747
tei-embedding-service:
4848
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
4949
container_name: tei-embedding-server
50+
restart: unless-stopped
5051
ports:
5152
- "8090:80"
5253
volumes:
5354
- "../../../../assets/data:/data"
54-
shm_size: 1g
55+
shm_size: 2g
5556
environment:
5657
MODEL_ID: ${EMBEDDING_MODEL_ID:-BAAI/bge-base-en-v1.5}
5758
PORT: 80
5859
MAX_BATCH_TOKENS: 16384
5960
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
61+
# Enable faster model downloads
62+
HF_HUB_ENABLE_HF_TRANSFER: "1"
6063
# Intel Xeon optimizations
6164
OMP_NUM_THREADS: 4
6265
KMP_AFFINITY: "granularity=fine,compact,1,0"
@@ -69,7 +72,7 @@ services:
6972
interval: 30s
7073
timeout: 10s
7174
retries: 10
72-
start_period: 120s
75+
start_period: 180s
7376

7477
# Embedding Microservice
7578
embedding:
@@ -119,15 +122,18 @@ services:
119122
tei-reranking-service:
120123
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
121124
container_name: tei-reranking-server
125+
restart: unless-stopped
122126
ports:
123127
- "8808:80"
124128
volumes:
125129
- "../../../../assets/data:/data"
126-
shm_size: 1g
130+
shm_size: 2g
127131
environment:
128132
MODEL_ID: ${RERANK_MODEL_ID:-BAAI/bge-reranker-base}
129133
PORT: 80
130134
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
135+
# Enable faster model downloads
136+
HF_HUB_ENABLE_HF_TRANSFER: "1"
131137
http_proxy: ${http_proxy}
132138
https_proxy: ${https_proxy}
133139
no_proxy: ${no_proxy}
@@ -136,7 +142,7 @@ services:
136142
interval: 30s
137143
timeout: 10s
138144
retries: 10
139-
start_period: 120s
145+
start_period: 180s
140146

141147
# Reranking Microservice
142148
reranking:
@@ -164,17 +170,20 @@ services:
164170
tgi-service:
165171
image: ghcr.io/huggingface/text-generation-inference:2.0.1
166172
container_name: tgi-service
173+
restart: unless-stopped
167174
ports:
168175
- "8008:80"
169176
volumes:
170177
- "../../../../assets/data:/data"
171-
shm_size: 1g
178+
shm_size: 2g
172179
environment:
173180
MODEL_ID: ${LLM_MODEL_ID:-Intel/neural-chat-7b-v3-3}
174181
HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
175182
MAX_INPUT_LENGTH: 2048
176183
MAX_TOTAL_TOKENS: 4096
177184
PORT: 80
185+
# Enable faster model downloads
186+
HF_HUB_ENABLE_HF_TRANSFER: "1"
178187
# Intel Xeon optimizations
179188
OMP_NUM_THREADS: 8
180189
KMP_AFFINITY: "granularity=fine,compact,1,0"
@@ -184,7 +193,7 @@ services:
184193
interval: 30s
185194
timeout: 10s
186195
retries: 10
187-
start_period: 180s
196+
start_period: 300s
188197

189198
# LLM Microservice
190199
llm-tgi:
@@ -315,4 +324,4 @@ volumes:
315324

316325
networks:
317326
default:
318-
driver: bridge
327+
driver: bridge

0 commit comments

Comments
 (0)