vllm-webui/env.example at main · bryonbaker/vllm-webui · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# Environment variables for vLLM Playground Container
# Copy this file to .env and customize as needed
# Usage: docker-compose --env-file .env up -d

# HuggingFace Token (required for gated models like Llama, Gemma)
# Get your token from: https://huggingface.co/settings/tokens
# HF_TOKEN=hf_your_token_here

# WebUI Port (default: 7860)
WEBUI_PORT=7860

# vLLM API Port (default: 8000)
VLLM_PORT=8000

# vLLM Target Device (cpu for macOS)
VLLM_TARGET_DEVICE=cpu

# vLLM CUDA Build (0 for CPU-only)
VLLM_BUILD_WITH_CUDA=0

# CPU KV Cache Space (in GB, default: 4)
VLLM_CPU_KVCACHE_SPACE=4

# CPU Thread Binding (auto, none, or specific cores)
VLLM_CPU_OMP_THREADS_BIND=auto