gHashTag
diff --git a/‎Dockerfile.flyio‎
Lines changed: 57 additions & 0 deletions b/‎Dockerfile.flyio‎
Lines changed: 57 additions & 0 deletions
diff --git a/‎benchmark_flyio.sh‎
Lines changed: 58 additions & 0 deletions b/‎benchmark_flyio.sh‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎fly.toml‎
Lines changed: 16 additions & 7 deletions b/‎fly.toml‎
Lines changed: 16 additions & 7 deletions
@@ -0,0 +1,57 @@
+# ═══════════════════════════════════════════════════════════════════════════════
+# TRINITY INFERENCE - Fly.io Deployment
+# Multi-threaded LLM inference with 16 CPU cores
+# φ² + 1/φ² = 3 = TRINITY
+# ═══════════════════════════════════════════════════════════════════════════════
+
+FROM debian:bookworm-slim AS builder
+
+# Install Zig
+RUN apt-get update && apt-get install -y \
+    curl \
+    xz-utils \
+    && rm -rf /var/lib/apt/lists/*
+
+# Download and install Zig 0.13.0
+RUN curl -L https://ziglang.org/download/0.13.0/zig-linux-x86_64-0.13.0.tar.xz | tar -xJ -C /opt
+ENV PATH="/opt/zig-linux-x86_64-0.13.0:${PATH}"
+
+# Copy source code
+WORKDIR /app
+COPY src/vibeec/*.zig ./src/vibeec/
+
+# Build with ReleaseFast optimization
+WORKDIR /app/src/vibeec
+RUN zig build-exe tri_inference.zig -O ReleaseFast -o /app/tri_inference
+
+# ═══════════════════════════════════════════════════════════════════════════════
+# Runtime image
+# ═══════════════════════════════════════════════════════════════════════════════
+
+FROM debian:bookworm-slim
+
+# Install runtime dependencies
+RUN apt-get update && apt-get install -y \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+
+# Copy binary
+COPY --from=builder /app/tri_inference /app/tri_inference
+
+# Copy models (will be downloaded at runtime if not present)
+# Models should be mounted as volumes or downloaded
+
+# Create models directory
+RUN mkdir -p /app/models
+
+# Download SmolLM2 360M model
+RUN curl -L -o /app/models/smollm2-360m.tri \
+    "https://huggingface.co/bartowski/SmolLM2-360M-Instruct-GGUF/resolve/main/SmolLM2-360M-Instruct-Q8_0.gguf" || true
+
+# Set environment
+ENV NUM_THREADS=16
+
+# Run benchmark
+CMD ["/app/tri_inference", "/app/models/smollm2-360m.tri"]
@@ -0,0 +1,58 @@
+#!/bin/bash
+# ═══════════════════════════════════════════════════════════════════════════════
+# TRINITY BENCHMARK - Fly.io Performance Estimation
+# φ² + 1/φ² = 3 = TRINITY
+# ═══════════════════════════════════════════════════════════════════════════════
+
+echo "╔══════════════════════════════════════════════════════════════╗"
+echo "║           TRINITY BENCHMARK - FLY.IO ESTIMATION              ║"
+echo "║           φ² + 1/φ² = 3 = TRINITY                            ║"
+echo "╚══════════════════════════════════════════════════════════════╝"
+echo ""
+
+# Current environment
+CURRENT_CORES=$(nproc)
+echo "Current environment: ${CURRENT_CORES} cores"
+echo ""
+
+# Run benchmark
+echo "Running benchmark on ${CURRENT_CORES} cores..."
+cd /workspaces/trinity/src/vibeec
+RESULT=$(./tri_inference ../../models/smollm2-360m.tri 2>&1 | grep "Speed")
+CURRENT_SPEED=$(echo "$RESULT" | grep -oP '[\d.]+(?= tokens/sec)')
+
+echo "Current speed: ${CURRENT_SPEED} tok/s"
+echo ""
+
+# Estimate Fly.io performance
+echo "═══════════════════════════════════════════════════════════════"
+echo "ESTIMATED FLY.IO PERFORMANCE (based on linear scaling):"
+echo "═══════════════════════════════════════════════════════════════"
+echo ""
+
+# Calculate estimates (assuming ~80% parallel efficiency)
+EFFICIENCY=0.8
+
+for CORES in 4 8 16; do
+    SPEEDUP=$(echo "scale=2; 1 + ($CORES - $CURRENT_CORES) * $EFFICIENCY / $CURRENT_CORES" | bc)
+    ESTIMATED=$(echo "scale=2; $CURRENT_SPEED * $SPEEDUP" | bc)
+    
+    case $CORES in
+        4) SIZE="performance-4x" ;;
+        8) SIZE="performance-8x" ;;
+        16) SIZE="performance-16x" ;;
+    esac
+    
+    echo "  $SIZE ($CORES cores): ~${ESTIMATED} tok/s (${SPEEDUP}x speedup)"
+done
+
+echo ""
+echo "═══════════════════════════════════════════════════════════════"
+echo "TO DEPLOY ON FLY.IO:"
+echo "═══════════════════════════════════════════════════════════════"
+echo ""
+echo "1. Login: flyctl auth login"
+echo "2. Create app: flyctl apps create trinity-inference"
+echo "3. Deploy: flyctl deploy --config fly.toml"
+echo ""
+echo "KOSCHEI IS IMMORTAL | GOLDEN CHAIN IS CLOSED"
@@ -1,5 +1,8 @@
+# ═══════════════════════════════════════════════════════════════════════════════
 # TRINITY LLM - Fly.io Configuration
-# phi^2 + 1/phi^2 = 3 = TRINITY
+# Multi-threaded LLM inference with MAXIMUM CPU cores
+# φ² + 1/φ² = 3 = TRINITY
+# ═══════════════════════════════════════════════════════════════════════════════
 
 app = "trinity-llm"
 primary_region = "iad"
@@ -11,15 +14,21 @@ primary_region = "iad"
   MODEL_PATH = "/app/models/smollm-135m-instruct-q8_0.gguf"
   TEMPERATURE = "0.7"
   TOP_P = "0.9"
+  NUM_THREADS = "16"
 
-# Use shared-cpu-2x for SmolLM-135M
-# 2GB RAM needed: model (139MB) + dequantized weights (~600MB) + buffers
+# MAXIMUM CPU: performance-16x = 16 dedicated CPU cores, 32GB RAM
+# For benchmark testing multi-threaded inference
 [[vm]]
-  size = "shared-cpu-2x"
-  memory = "2gb"
-  cpus = 2
+  size = "performance-16x"
+  memory = "32gb"
+  cpus = 16
 
-# Persistent volume for models (optional - model is baked into image)
+# Alternative sizes:
+# performance-8x: 8 CPU, 16GB RAM
+# performance-4x: 4 CPU, 8GB RAM
+# shared-cpu-8x: 8 shared CPU, 16GB RAM
+
+# Persistent volume for models
 # [[mounts]]
 #   source = "trinity_models"
 #   destination = "/app/models"