dual-node-llm-serving/start_service.sh at main · 3DAlgoLab/dual-node-llm-serving · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/bin/bash
# start_service.sh - Starts dual node llama servers and nginx (uses config from .env)
# Services generated from llama-node.service.in template by setup-llama.sh.
# Uses centralized .env for model name, parameters, and paths. Zero-sudo.

set -e

# Load configuration from .env (centralized model settings)
load_env() {
    local env_file="$(dirname "${BASH_SOURCE[0]}")/.env"
    if [[ -f "$env_file" ]]; then
        set -a
        # shellcheck source=.env
        source "$env_file"
        set +a
    fi
}

load_env

# ─────────────────────────────────────────────
# ERROR HANDLING FUNCTIONS - applied to ALL
# ─────────────────────────────────────────────
error() {
    echo "❌ ERROR: $1" >&2
    exit 1
}

success() {
    echo "✅ $1"
}

log() {
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
}

wait_gpus() {
    local gpu_script="$(dirname "${BASH_SOURCE[0]}")/wait_gpus.sh"
    if [[ -x "$gpu_script" ]]; then
        bash "$gpu_script" 4 120 || error "GPU initialization timed out (expected 4 GPUs)"
    else
        log "WARNING: wait_gpus.sh not found, skipping GPU check"
    fi
}

check_systemd() {
    if ! systemctl --user list-unit-files | grep -q "user-nginx"; then
        log "WARNING: Services not registered. Run ./setup-llama.sh first (generates from template)"
    fi
    success "Systemd environment checked"
}

start_nodes() {
    log "🚀 Starting dual-node servers (GPU 0-1 and 2-3)..."
    systemctl --user start llama-node1 || error "Failed to start llama-node1"
    systemctl --user start llama-node2 || error "Failed to start llama-node2"
    success "Dual nodes started (ports 8081, 8082; model: ${MODEL_NAME:-Unknown}, alias: coder)"
}

start_nginx() {
    log "🔄 Starting user-level nginx reverse proxy (zero-sudo via symlink)..."
    mkdir -p "/run/user/$(id -u)/nginx" || error "Failed to create nginx pid dir"
    systemctl --user start user-nginx || error "Failed to start user-nginx"
    success "Nginx started (ports 8888 local, 19101 external; uses llama_backends)"
}

wait_for_ready() {
    log "Waiting for services to be ready..."
    sleep 5
    success "Services ready"
}

show_status() {
    log "Service status:"
    for svc in llama-node1 llama-node2 user-nginx; do
        echo "=== $svc ==="
        systemctl --user status "$svc" | grep -E "Active:|Main PID:" || true
        echo ""
    done
}

show_endpoints() {
    echo "Endpoints:"
    echo "  - localhost:8888 (nginx local, requires API key from .env)"
    echo "  - localhost:19101 (nginx external, DPI_FACTORY_API_KEY auth)"
    echo "  - localhost:8081 (Node 1 direct)"
    echo "  - localhost:8082 (Node 2 direct)"
    echo ""
    echo "Model: ${MODEL_NAME:-Unknown} (alias: coder), ctx=${CONTEXT_LENGTH:-200000}, MemoryHigh=22G/Max=30G"
}

# Main function
main() {
    wait_gpus
    check_systemd
    start_nodes
    start_nginx
    wait_for_ready
    show_status
    show_endpoints
    success "All dual node services started successfully! (${MODEL_NAME:-Unknown})"
    echo "Use './stop_service.sh' to stop. Run 'make lint' for checks."
}

main "$@"