dual-node-llm-serving/setup_llama.sh at main · 3DAlgoLab/dual-node-llm-serving · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/bin/bash
# setup-llama.sh - Dual node setup for Qwen3.6-27B model (alias: coder)
# Uses systemd/llama-node.service.in template (source of truth); generated files ignored in git per .gitignore.
# No more heredocs or duplication. ctx=200K, MemoryHigh=100G, MemoryMax=128G per node.
# Comprehensive error handling on all functions. set -e enabled. Integrates deploy_nginx.sh.
# Registration now fully in setup. ensure_llm_serving_link() for zero-sudo symlink.

set -e

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
HOME_DIR="$HOME"
LLAMA_CPP_PATH="$HOME_DIR/llm-serving/llama.cpp/build/bin/llama-server"

# Load central configuration from .env (single source of truth)
load_env() {
    local env_file="${SCRIPT_DIR}/.env"
    if [[ ! -f "$env_file" ]]; then
        error ".env file not found at $env_file"
    fi
    set -a
    # shellcheck source=.env
    source "$env_file"
    set +a
    success "Loaded model configuration from .env (MODEL_NAME=${MODEL_NAME})"
}

# ─────────────────────────────────────────────
# ERROR HANDLING (on ALL functions)
# ─────────────────────────────────────────────
error() {
    echo "❌ ERROR: $1" >&2
    exit 1
}

success() {
    echo "✅ $1"
}

log() {
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
}

# Load central configuration from .env (single source of truth)
load_env

check_llama_cpp() {
    if [[ ! -x "$LLAMA_CPP_PATH" ]]; then
        log "ERROR: llama.cpp server not found at $LLAMA_CPP_PATH (via ~/llm-serving symlink)"
        log "Please run: cd ~/llm-serving/llama.cpp && LLAMA_CUDA=1 make -j"
        error "Build llama.cpp first"
    fi
    if [[ ! -f "${MODEL_PATH}" ]]; then
        error "Model not found at ${MODEL_PATH}. Check LM Studio download."
    fi
    success "llama.cpp and model paths verified (${MODEL_NAME})"
}

ensure_llm_serving_link() {
    local link_path="$HOME_DIR/llm-serving"
    local target_path="$HOME_DIR/ai-serving/llama-cpp-ex"
    log "Ensuring ~/llm-serving symlink to $target_path if missing (implements TODO #5: zero-sudo)..."

    if [[ -L "$link_path" ]]; then
        if [[ "$(readlink -f "$link_path")" == "$(readlink -f "$target_path")" ]]; then
            success "Symlink ~/llm-serving already exists and points correctly"
            return 0
        else
            log "WARNING: Existing symlink points elsewhere. Replacing it."
            rm -f "$link_path" || error "Failed to remove old symlink"
        fi
    elif [[ -d "$link_path" ]]; then
        log "WARNING: $link_path exists as a directory. Replacing with symlink for zero-sudo setup."
        rm -rf "$link_path" || error "Failed to remove existing dir at $link_path"
    elif [[ -e "$link_path" ]]; then
        error "Unexpected file at $link_path - remove manually"
    fi

    ln -sfn "$target_path" "$link_path" || error "Failed to create symlink $link_path -> $target_path"
    success "✓ Created ~/llm-serving symlink to project (no sudo privilege required)"
}

generate_service() {
    local node_num=$1
    local gpu_ids=$2
    local port=$3
    local template="$SCRIPT_DIR/systemd/llama-node.service.in"
    local target="$SCRIPT_DIR/systemd/llama-node${node_num}.service"

    # Use envsubst to replace ALL ${VAR} from .env (single source of truth) + sed for %PLACEHOLDERS%
    envsubst '${MODEL_NAME} ${MODEL_PATH} ${MMPROJ_PATH} ${CONTEXT_LENGTH}' < "$template" |
        sed -e "s/%NODE_DESC%/${node_num}/g" \
            -e "s/%GPU_IDS%/${gpu_ids}/g" \
            -e "s/%PORT%/${port}/g" > "$target" || error "Failed to generate node${node_num}"

    success "Generated llama-node${node_num}.service (GPU ${gpu_ids}, port ${port}, model=${MODEL_NAME})"
}

setup_dual() {
    log "Setting up dual node (${MODEL_NAME}, alias=coder, ctx=${CONTEXT_LENGTH})..."

    mkdir -p "$HOME_DIR/llm-serving/systemd" "$HOME_DIR/llm-serving/nginx" || error "Failed to create directories"

    # Generate from template (source of truth)
    generate_service "1" "0,1" "8081"
    generate_service "2" "2,3" "8082"

    log "✓ Dual node systemd services generated from template"
    success "Dual node setup complete"
}

register_services() {
    # Services generated from template in setup_dual() / llama-node.service.in (no heredoc duplication)
    local service_dir="$HOME/.config/systemd/user"
    local source_dir="$SCRIPT_DIR/systemd"

    log "Registering systemd services for node1, node2 and nginx (${MODEL_NAME} - coder, from template)..."

    mkdir -p "$service_dir" || error "Failed to create $service_dir"
    cp "$source_dir/llama-node1.service" "$service_dir/" || error "Failed to copy llama-node1.service"
    cp "$source_dir/llama-node2.service" "$service_dir/" || error "Failed to copy llama-node2.service"
    cp "$source_dir/user-nginx.service" "$service_dir/" || error "Failed to copy user-nginx.service"

    systemctl --user daemon-reload || error "Failed to daemon-reload"
    systemctl --user enable llama-node1 llama-node2 user-nginx || error "Failed to enable services"

    success "Services for node1, node2 and nginx registered successfully"
    log "Use './start_service.sh' to start. Model: ${MODEL_NAME}, alias: coder, ctx=${CONTEXT_LENGTH}"
}

main() {
    # ensure_llm_serving_link called early in main() after checks (per spec for TODO #5)
    ensure_llm_serving_link
    check_llama_cpp
    setup_dual
    log "Running deploy_nginx.sh for env-based API key..."
    cd "$SCRIPT_DIR" || error "Cannot cd to script dir"
    ./deploy_nginx.sh || error "deploy_nginx.sh failed"
    log "Registering services..."
    register_services

    success "✓ Dual node setup completed successfully!"
    log "Use DPI_FACTORY_API_KEY from .env for auth. Model paths updated to ~/.lmstudio/..."
}

main