fix: feature-flag in-memory rate limiter, disable by default

GeneralJerel · GeneralJerel · commit e45e254f993a · 2026-03-24T13:35:50.000-07:00
In-memory rate limiting doesn't scale across multiple instances for
high-traffic deployments. Disable by default via RATE_LIMIT_ENABLED
env var so it doesn't silently misbehave at scale. Can be re-enabled
for single-instance or low-traffic deployments.
diff --git a/.env.example b/.env.example
@@ -4,6 +4,7 @@ OPENAI_API_KEY=
 # Recommended: gpt-5.4, gpt-5.4-pro, claude-opus-4-6, gemini-3.1-pro
 LLM_MODEL=gpt-5.4-2026-03-05
 
-# Rate limiting (per IP)
+# Rate limiting (per IP) — disabled by default
+RATE_LIMIT_ENABLED=false
 RATE_LIMIT_WINDOW_MS=60000
-RATE_LIMIT_MAX=40
+RATE_LIMIT_MAX=40
diff --git a/apps/app/src/app/api/copilotkit/route.ts b/apps/app/src/app/api/copilotkit/route.ts
@@ -6,12 +6,16 @@ import {
 import { LangGraphAgent } from "@copilotkit/runtime/langgraph";
 import { NextRequest } from "next/server";
 
-// Simple sliding-window rate limiter (per IP)
+// Simple in-memory sliding-window rate limiter (per IP)
+// Enable via RATE_LIMIT_ENABLED=true — off by default.
+// For high-traffic deployments, consider Redis-backed rate limiting instead.
+const RATE_LIMIT_ENABLED = process.env.RATE_LIMIT_ENABLED === "true";
 const RATE_LIMIT_WINDOW_MS = Number(process.env.RATE_LIMIT_WINDOW_MS) || 60_000;
 const RATE_LIMIT_MAX = Number(process.env.RATE_LIMIT_MAX) || 40;
 const hits = new Map<string, number[]>();
 
 function isRateLimited(ip: string): boolean {
+  if (!RATE_LIMIT_ENABLED) return false;
   const now = Date.now();
   const timestamps = hits.get(ip)?.filter(t => t > now - RATE_LIMIT_WINDOW_MS) ?? [];
   timestamps.push(now);
@@ -20,14 +24,16 @@ function isRateLimited(ip: string): boolean {
 }
 
 // Prune stale entries every 5 min to prevent unbounded memory growth
-setInterval(() => {
-  const cutoff = Date.now() - RATE_LIMIT_WINDOW_MS;
-  hits.forEach((timestamps, ip) => {
-    const recent = timestamps.filter(t => t > cutoff);
-    if (recent.length === 0) hits.delete(ip);
-    else hits.set(ip, recent);
-  });
-}, 300_000);
+if (RATE_LIMIT_ENABLED) {
+  setInterval(() => {
+    const cutoff = Date.now() - RATE_LIMIT_WINDOW_MS;
+    hits.forEach((timestamps, ip) => {
+      const recent = timestamps.filter(t => t > cutoff);
+      if (recent.length === 0) hits.delete(ip);
+      else hits.set(ip, recent);
+    });
+  }, 300_000);
+}
 
 // Normalize Render's fromService hostport (bare host:port) into a full URL
 const raw = process.env.LANGGRAPH_DEPLOYMENT_URL;