Skip to content

Commit 16cf95f

Browse files
gHashTagona-agent
andcommitted
Add Fly.io deployment configuration
- Dockerfile for TRINITY LLM with Mistral-7B-Instruct - fly.toml with performance-8x VM (8 CPU, 16GB RAM) Co-authored-by: Ona <no-reply@ona.com>
1 parent fc72398 commit 16cf95f

2 files changed

Lines changed: 96 additions & 0 deletions

File tree

Dockerfile

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# TRINITY LLM - Zig-based LLM Inference Engine
2+
# phi^2 + 1/phi^2 = 3 = TRINITY
3+
4+
FROM debian:bookworm-slim AS builder
5+
6+
# Install build dependencies
7+
RUN apt-get update && apt-get install -y \
8+
curl \
9+
xz-utils \
10+
&& rm -rf /var/lib/apt/lists/*
11+
12+
# Download and install Zig 0.14.0
13+
RUN curl -L https://ziglang.org/download/0.14.0/zig-linux-x86_64-0.14.0.tar.xz | tar -xJ -C /opt \
14+
&& ln -s /opt/zig-linux-x86_64-0.14.0/zig /usr/local/bin/zig
15+
16+
WORKDIR /build
17+
18+
# Copy source code
19+
COPY src/ src/
20+
21+
# Build the binary with release optimizations
22+
RUN zig build-exe src/vibeec/gen_cmd.zig --name vibee -OReleaseFast \
23+
&& chmod +x vibee
24+
25+
# Runtime stage - minimal image
26+
FROM debian:bookworm-slim
27+
28+
# Install runtime dependencies
29+
RUN apt-get update && apt-get install -y \
30+
ca-certificates \
31+
curl \
32+
&& rm -rf /var/lib/apt/lists/*
33+
34+
WORKDIR /app
35+
36+
# Copy binary from builder
37+
COPY --from=builder /build/vibee /app/vibee
38+
39+
# Create models directory
40+
RUN mkdir -p /app/models
41+
42+
# Download Mistral-7B-Instruct Q4_K_M (best open source model for quality)
43+
# Size: ~4.4GB, excellent instruction following
44+
RUN echo "Downloading Mistral-7B-Instruct-v0.2 Q4_K_M..." && \
45+
curl -L -o /app/models/mistral-7b-instruct-v0.2.Q4_K_M.gguf \
46+
"https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf"
47+
48+
# Set environment
49+
ENV MODEL_PATH=/app/models/mistral-7b-instruct-v0.2.Q4_K_M.gguf
50+
ENV TEMPERATURE=0.7
51+
ENV TOP_P=0.9
52+
53+
# Expose port (for future HTTP API)
54+
EXPOSE 8080
55+
56+
# Run chat
57+
CMD ["/app/vibee", "chat", "--model", "/app/models/mistral-7b-instruct-v0.2.Q4_K_M.gguf", "--temperature", "0.7", "--top-p", "0.9"]

fly.toml

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# TRINITY LLM - Fly.io Configuration
2+
# phi^2 + 1/phi^2 = 3 = TRINITY
3+
4+
app = "trinity-llm"
5+
primary_region = "iad"
6+
7+
[build]
8+
dockerfile = "Dockerfile"
9+
10+
[env]
11+
MODEL_PATH = "/app/models/mistral-7b-instruct-v0.2.Q4_K_M.gguf"
12+
TEMPERATURE = "0.7"
13+
TOP_P = "0.9"
14+
15+
# Use performance-8x for LLM inference (8 CPU, 16GB RAM)
16+
# This is needed for Mistral-7B model
17+
[[vm]]
18+
size = "performance-8x"
19+
memory = "16gb"
20+
cpus = 8
21+
22+
# Persistent volume for models (optional - model is baked into image)
23+
# [[mounts]]
24+
# source = "trinity_models"
25+
# destination = "/app/models"
26+
27+
[http_service]
28+
internal_port = 8080
29+
force_https = true
30+
auto_stop_machines = true
31+
auto_start_machines = true
32+
min_machines_running = 0
33+
34+
[[http_service.checks]]
35+
grace_period = "120s"
36+
interval = "30s"
37+
method = "GET"
38+
path = "/health"
39+
timeout = "10s"

0 commit comments

Comments
 (0)