Add Fly.io deployment configuration

gHashTag · ona-agent · gHashTag · commit 16cf95fc2cbc · 2026-02-01T17:07:43.000Z
- Dockerfile for TRINITY LLM with Mistral-7B-Instruct
- fly.toml with performance-8x VM (8 CPU, 16GB RAM)

Co-authored-by: Ona &lt;no-reply@ona.com&gt;
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,57 @@
+# TRINITY LLM - Zig-based LLM Inference Engine
+# phi^2 + 1/phi^2 = 3 = TRINITY
+
+FROM debian:bookworm-slim AS builder
+
+# Install build dependencies
+RUN apt-get update && apt-get install -y \
+    curl \
+    xz-utils \
+    && rm -rf /var/lib/apt/lists/*
+
+# Download and install Zig 0.14.0
+RUN curl -L https://ziglang.org/download/0.14.0/zig-linux-x86_64-0.14.0.tar.xz | tar -xJ -C /opt \
+    && ln -s /opt/zig-linux-x86_64-0.14.0/zig /usr/local/bin/zig
+
+WORKDIR /build
+
+# Copy source code
+COPY src/ src/
+
+# Build the binary with release optimizations
+RUN zig build-exe src/vibeec/gen_cmd.zig --name vibee -OReleaseFast \
+    && chmod +x vibee
+
+# Runtime stage - minimal image
+FROM debian:bookworm-slim
+
+# Install runtime dependencies
+RUN apt-get update && apt-get install -y \
+    ca-certificates \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+# Copy binary from builder
+COPY --from=builder /build/vibee /app/vibee
+
+# Create models directory
+RUN mkdir -p /app/models
+
+# Download Mistral-7B-Instruct Q4_K_M (best open source model for quality)
+# Size: ~4.4GB, excellent instruction following
+RUN echo "Downloading Mistral-7B-Instruct-v0.2 Q4_K_M..." && \
+    curl -L -o /app/models/mistral-7b-instruct-v0.2.Q4_K_M.gguf \
+    "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf"
+
+# Set environment
+ENV MODEL_PATH=/app/models/mistral-7b-instruct-v0.2.Q4_K_M.gguf
+ENV TEMPERATURE=0.7
+ENV TOP_P=0.9
+
+# Expose port (for future HTTP API)
+EXPOSE 8080
+
+# Run chat
+CMD ["/app/vibee", "chat", "--model", "/app/models/mistral-7b-instruct-v0.2.Q4_K_M.gguf", "--temperature", "0.7", "--top-p", "0.9"]
diff --git a/fly.toml b/fly.toml
@@ -0,0 +1,39 @@
+# TRINITY LLM - Fly.io Configuration
+# phi^2 + 1/phi^2 = 3 = TRINITY
+
+app = "trinity-llm"
+primary_region = "iad"
+
+[build]
+  dockerfile = "Dockerfile"
+
+[env]
+  MODEL_PATH = "/app/models/mistral-7b-instruct-v0.2.Q4_K_M.gguf"
+  TEMPERATURE = "0.7"
+  TOP_P = "0.9"
+
+# Use performance-8x for LLM inference (8 CPU, 16GB RAM)
+# This is needed for Mistral-7B model
+[[vm]]
+  size = "performance-8x"
+  memory = "16gb"
+  cpus = 8
+
+# Persistent volume for models (optional - model is baked into image)
+# [[mounts]]
+#   source = "trinity_models"
+#   destination = "/app/models"
+
+[http_service]
+  internal_port = 8080
+  force_https = true
+  auto_stop_machines = true
+  auto_start_machines = true
+  min_machines_running = 0
+
+[[http_service.checks]]
+  grace_period = "120s"
+  interval = "30s"
+  method = "GET"
+  path = "/health"
+  timeout = "10s"