-
Notifications
You must be signed in to change notification settings - Fork 21
Expand file tree
/
Copy pathdocker-compose-nim.yml
More file actions
106 lines (102 loc) · 3.4 KB
/
docker-compose-nim.yml
File metadata and controls
106 lines (102 loc) · 3.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# NVIDIA NIM Microservices for Retail Agentic Commerce
#
# This compose file deploys local NVIDIA NIMs for LLM inference and embeddings.
# Use this instead of NVIDIA API Catalog hosted endpoints for on-prem deployment.
#
# Prerequisites:
# - NVIDIA GPU(s) with sufficient VRAM
# - NGC_API_KEY environment variable set
# - LOCAL_NIM_CACHE directory created (~/.cache/nim)
#
# Usage:
# # Start NIMs only
# docker compose -f docker-compose-nim.yml up -d
#
# # Start with infrastructure and application services
# docker compose -f docker-compose.infra.yml -f docker-compose-nim.yml -f docker-compose.yml up -d
#
# After starting, update your .env to use local NIMs:
# NIM_LLM_BASE_URL=http://nemotron-nano:8000/v1
# NIM_LLM_MODEL_NAME=nvidia/nemotron-3-nano
# NIM_EMBED_BASE_URL=http://embedqa:8000/v1
# NIM_EMBED_MODEL_NAME=nvidia/nv-embedqa-e5-v5
#
# Note: The model name for local NIM is 'nvidia/nemotron-3-nano' (without the version suffix)
# while the public endpoint uses 'nvidia/nemotron-3-nano-30b-a3b'
services:
# =============================================================================
# NVIDIA NIM - LLM (Nemotron Nano)
# =============================================================================
nemotron-nano:
image: nvcr.io/nim/nvidia/nemotron-3-nano:1
container_name: nemotron-nano
ports:
- "8010:8000"
environment:
- NGC_API_KEY=${NGC_API_KEY}
volumes:
- ${LOCAL_NIM_CACHE:-~/.cache/nim}:/opt/nim/.cache
user: "${UID:-1000}"
shm_size: "16gb"
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ['0']
capabilities: [gpu]
restart: "no"
networks:
- acp-nim-network
- acp-infra-network
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/v1/health/ready"]
interval: 30s
timeout: 10s
retries: 5
start_period: 120s
# =============================================================================
# NVIDIA NIM - Embedding Model
# =============================================================================
embedqa:
image: nvcr.io/nim/nvidia/nv-embedqa-e5-v5:1.6
container_name: embedqa
ports:
- "8011:8000"
environment:
- NGC_API_KEY=${NGC_API_KEY}
volumes:
- ${LOCAL_NIM_CACHE:-~/.cache/nim}:/opt/nim/.cache
user: "${UID:-1000}"
deploy:
resources:
reservations:
devices:
- driver: nvidia
device_ids: ['1']
capabilities: [gpu]
restart: "no"
networks:
- acp-nim-network
- acp-infra-network
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/v1/health/ready"]
interval: 30s
timeout: 10s
retries: 5
start_period: 60s
# =============================================================================
# NETWORKS
# =============================================================================
networks:
acp-nim-network:
name: acp-nim-network
driver: bridge
# Shared network for all services
# Created here so NIMs can start first (they take longest to initialize)
# Infrastructure and application services will join this network
acp-infra-network:
name: acp-infra-network
driver: bridge