diff --git a/apps/web/src/app/admin/api/free-model-usage/stats/route.ts b/apps/web/src/app/admin/api/free-model-usage/stats/route.ts index 487569106c..b439352cf2 100644 --- a/apps/web/src/app/admin/api/free-model-usage/stats/route.ts +++ b/apps/web/src/app/admin/api/free-model-usage/stats/route.ts @@ -2,7 +2,7 @@ import type { NextRequest } from 'next/server'; import { NextResponse } from 'next/server'; import { getUserFromAuth } from '@/lib/user.server'; import { db } from '@/lib/drizzle'; -import { free_model_usage } from '@kilocode/db/schema'; +import { free_model_usage, kilocode_users } from '@kilocode/db/schema'; import { sql } from 'drizzle-orm'; import { FREE_MODEL_RATE_LIMIT_WINDOW_HOURS, @@ -10,12 +10,24 @@ import { ADMIN_RATE_LIMIT_TEST_MODEL, } from '@/lib/constants'; +export type UserAtLimit = { + kiloUserId: string; + requestCount: number; + googleUserName: string | null; + googleUserEmail: string | null; + googleUserImageUrl: string | null; +}; + export type FreeModelUsageStatsResponse = { - // Current window stats (last 3 hours) + // Current window stats windowUniqueIps: number; windowTotalRequests: number; windowAvgRequestsPerIp: number; - windowIpsAtRequestLimit: number; + // Anonymous IPs whose anonymous-only request count has reached the limit. + windowAnonymousIpsAtRequestLimit: number; + // Authenticated users whose per-user request count has reached the limit. + windowUsersAtRequestLimit: number; + windowUsersAtLimitList: UserAtLimit[]; windowAnonymousRequests: number; windowAuthenticatedRequests: number; @@ -53,8 +65,8 @@ export async function GET( sql`${free_model_usage.created_at} >= NOW() - INTERVAL '${sql.raw(String(FREE_MODEL_RATE_LIMIT_WINDOW_HOURS))} hours' AND ${TEST_ROW_FILTER}` ); - // Count IPs at or above the rate limit threshold using a SQL subquery - const ipsAtLimitResult = await db + // Anonymous IPs at the per-IP limit (anonymous-only rows, matching checkFreeModelRateLimit). + const anonymousIpsAtLimitResult = await db .select({ count: sql`COUNT(*)`, }) @@ -62,12 +74,41 @@ export async function GET( sql`( SELECT ${free_model_usage.ip_address} FROM ${free_model_usage} - WHERE ${free_model_usage.created_at} >= NOW() - INTERVAL '${sql.raw(String(FREE_MODEL_RATE_LIMIT_WINDOW_HOURS))} hours' AND ${TEST_ROW_FILTER} + WHERE ${free_model_usage.created_at} >= NOW() - INTERVAL '${sql.raw(String(FREE_MODEL_RATE_LIMIT_WINDOW_HOURS))} hours' + AND ${TEST_ROW_FILTER} + AND ${free_model_usage.kilo_user_id} IS NULL GROUP BY ${free_model_usage.ip_address} HAVING COUNT(*) >= ${FREE_MODEL_MAX_REQUESTS_PER_WINDOW} ) sub` ); + // Authenticated users at the per-user limit (matching checkFreeModelRateLimitByUser). + // Returns the actual user rows (joined with kilocode_users for display) ordered by + // request count desc; the count of all such users is the length of this array. + const usersAtLimitRows = await db + .select({ + kiloUserId: free_model_usage.kilo_user_id, + requestCount: sql`COUNT(*)`.as('request_count'), + googleUserName: kilocode_users.google_user_name, + googleUserEmail: kilocode_users.google_user_email, + googleUserImageUrl: kilocode_users.google_user_image_url, + }) + .from(free_model_usage) + .leftJoin(kilocode_users, sql`${kilocode_users.id} = ${free_model_usage.kilo_user_id}`) + .where( + sql`${free_model_usage.created_at} >= NOW() - INTERVAL '${sql.raw(String(FREE_MODEL_RATE_LIMIT_WINDOW_HOURS))} hours' + AND ${TEST_ROW_FILTER} + AND ${free_model_usage.kilo_user_id} IS NOT NULL` + ) + .groupBy( + free_model_usage.kilo_user_id, + kilocode_users.google_user_name, + kilocode_users.google_user_email, + kilocode_users.google_user_image_url + ) + .having(sql`COUNT(*) >= ${FREE_MODEL_MAX_REQUESTS_PER_WINDOW}`) + .orderBy(sql`request_count DESC`); + // Get stats for the last 24 hours const dailyResult = await db .select({ @@ -93,7 +134,6 @@ export async function GET( const windowUniqueIps = bigIntToNumber(windowStats.unique_ips); const windowTotalRequests = bigIntToNumber(windowStats.total_requests); - const ipsAtRequestLimit = bigIntToNumber(ipsAtLimitResult[0]?.count ?? 0); return NextResponse.json({ // Current window stats @@ -101,7 +141,15 @@ export async function GET( windowTotalRequests, windowAvgRequestsPerIp: windowUniqueIps > 0 ? Math.round(windowTotalRequests / windowUniqueIps) : 0, - windowIpsAtRequestLimit: ipsAtRequestLimit, + windowAnonymousIpsAtRequestLimit: bigIntToNumber(anonymousIpsAtLimitResult[0]?.count ?? 0), + windowUsersAtRequestLimit: usersAtLimitRows.length, + windowUsersAtLimitList: usersAtLimitRows.map(row => ({ + kiloUserId: row.kiloUserId ?? '', + requestCount: bigIntToNumber(row.requestCount), + googleUserName: row.googleUserName, + googleUserEmail: row.googleUserEmail, + googleUserImageUrl: row.googleUserImageUrl, + })), windowAnonymousRequests: bigIntToNumber(windowStats.anonymous_requests), windowAuthenticatedRequests: bigIntToNumber(windowStats.authenticated_requests), diff --git a/apps/web/src/app/admin/components/FreeModelUsageStats.tsx b/apps/web/src/app/admin/components/FreeModelUsageStats.tsx index 9474e7efc4..f796ef1fcb 100644 --- a/apps/web/src/app/admin/components/FreeModelUsageStats.tsx +++ b/apps/web/src/app/admin/components/FreeModelUsageStats.tsx @@ -56,7 +56,10 @@ export function FreeModelUsageStats() { Rate Limit Configuration - Current free model rate limit settings (IP-based) + + Current free model rate limit settings (per user for authenticated requests, per IP for + anonymous requests) +
@@ -99,26 +102,30 @@ export function FreeModelUsageStats() { - + 0 + ? 'border-destructive bg-destructive/5' + : 'border-primary/40' + } + > - IPs at Request Limit + Anonymous IPs at Limit - IPs that have reached {formatNumber(data?.maxRequestsPerWindow ?? 0)} requests + Anonymous IPs that have reached {formatNumber(data?.maxRequestsPerWindow ?? 0)}{' '} + anonymous requests -
- {formatNumber(data?.windowIpsAtRequestLimit ?? 0)} +
0 + ? 'text-destructive text-3xl font-bold' + : 'text-3xl font-bold' + } + > + {formatNumber(data?.windowAnonymousIpsAtRequestLimit ?? 0)}
- {(data?.windowUniqueIps ?? 0) > 0 && ( -
- {( - ((data?.windowIpsAtRequestLimit ?? 0) / (data?.windowUniqueIps ?? 1)) * - 100 - ).toFixed(1)} - % of active IPs -
- )}
diff --git a/apps/web/src/app/admin/components/RateLimitTesting.tsx b/apps/web/src/app/admin/components/RateLimitTesting.tsx index 26023acde2..835b531325 100644 --- a/apps/web/src/app/admin/components/RateLimitTesting.tsx +++ b/apps/web/src/app/admin/components/RateLimitTesting.tsx @@ -11,48 +11,46 @@ export function RateLimitTesting() { const trpc = useTRPC(); const queryClient = useQueryClient(); - const ipUsageQuery = useQuery(trpc.admin.freeModelUsage.getMyIpUsage.queryOptions()); + const usageQuery = useQuery(trpc.admin.freeModelUsage.getMyUsage.queryOptions()); const rateLimitMutation = useMutation( - trpc.admin.freeModelUsage.rateLimitMyIp.mutationOptions({ + trpc.admin.freeModelUsage.rateLimitMe.mutationOptions({ onSuccess: data => { if (data.alreadyRateLimited) { toast.message('Already rate limited', { - description: `IP ${data.ipAddress} already has ${data.newTotal} requests in the current window.`, + description: `User ${data.kiloUserId} already has ${data.newTotal} requests in the current window.`, }); } else { toast.success( - `Inserted ${data.rowsInserted} rows for IP ${data.ipAddress}. New total: ${data.newTotal}.` + `Inserted ${data.rowsInserted} rows for user ${data.kiloUserId}. New total: ${data.newTotal}.` ); } void queryClient.invalidateQueries({ - queryKey: trpc.admin.freeModelUsage.getMyIpUsage.queryKey(), + queryKey: trpc.admin.freeModelUsage.getMyUsage.queryKey(), }); }, onError: error => { - toast.error(error.message || 'Failed to rate limit IP'); + toast.error(error.message || 'Failed to rate limit user'); }, }) ); - const data = ipUsageQuery.data; + const data = usageQuery.data; return ( Rate Limit Testing - Insert enough requests to trigger the free model rate limit for your current IP address. + Insert enough requests to trigger the free model rate limit for your own user id. - {ipUsageQuery.isLoading && ( -

Loading IP usage...

- )} + {usageQuery.isLoading &&

Loading usage...

} - {ipUsageQuery.error && ( + {usageQuery.error && (

- {ipUsageQuery.error.message || 'Failed to load IP usage'} + {usageQuery.error.message || 'Failed to load usage'}

)} @@ -60,8 +58,8 @@ export function RateLimitTesting() { <>
-

Your IP

-

{data.ipAddress}

+

Your user id

+

{data.kiloUserId}

Usage ({data.windowHours}h window)

@@ -88,7 +86,7 @@ export function RateLimitTesting() { ? 'Inserting rows...' : data.isRateLimited ? 'Already Rate Limited' - : `Rate Limit My IP (insert ${data.limit - data.currentUsage} rows)`} + : `Rate Limit Me (insert ${data.limit - data.currentUsage} rows)`} )} diff --git a/apps/web/src/app/admin/components/UserRateLimitStats.tsx b/apps/web/src/app/admin/components/UserRateLimitStats.tsx new file mode 100644 index 0000000000..480d5680d8 --- /dev/null +++ b/apps/web/src/app/admin/components/UserRateLimitStats.tsx @@ -0,0 +1,126 @@ +'use client'; + +import { useQuery } from '@tanstack/react-query'; +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card'; +import { + Table, + TableBody, + TableCell, + TableHead, + TableHeader, + TableRow, +} from '@/components/ui/table'; +import { UserAvatarLink } from './UserAvatarLink'; +import type { FreeModelUsageStatsResponse } from '../api/free-model-usage/stats/route'; + +export function UserRateLimitStats() { + const { data, isLoading, error } = useQuery({ + queryKey: ['admin-free-model-usage-stats'], + queryFn: async () => { + const response = await fetch('/admin/api/free-model-usage/stats'); + if (!response.ok) { + throw new Error('Failed to fetch free model usage statistics'); + } + return (await response.json()) as FreeModelUsageStatsResponse; + }, + refetchInterval: 60000, + }); + + if (error) { + return ( + + + Error + Failed to load user rate limit statistics + + +

+ {error instanceof Error ? error.message : 'An error occurred'} +

+
+
+ ); + } + + if (isLoading || !data) { + return ( + + + Loading... + Fetching user rate limit statistics + + + ); + } + + const usersAtLimit = data.windowUsersAtLimitList; + const isHot = data.windowUsersAtRequestLimit > 0; + const formatNumber = (num: number) => num.toLocaleString(); + + return ( +
+ + + Users at Limit + + Authenticated users that have reached {formatNumber(data.maxRequestsPerWindow)} requests + in the last {data.rateLimitWindowHours}h + + + +
+ {formatNumber(data.windowUsersAtRequestLimit)} +
+
+
+ + {usersAtLimit.length > 0 && ( + + + User IDs at Limit + + The authenticated users currently being rate-limited, ordered by request count. + + + + + + + User + Kilo user id + Requests in window + + + + {usersAtLimit.map(user => ( + + + {user.googleUserName ? ( + + ) : ( + (unknown user) + )} + + {user.kiloUserId} + + {formatNumber(user.requestCount)} + + + ))} + +
+
+
+ )} +
+ ); +} diff --git a/apps/web/src/app/admin/free-model-usage/page.tsx b/apps/web/src/app/admin/free-model-usage/page.tsx index 640bcc3948..e6bde3cd2e 100644 --- a/apps/web/src/app/admin/free-model-usage/page.tsx +++ b/apps/web/src/app/admin/free-model-usage/page.tsx @@ -4,6 +4,7 @@ import { Suspense } from 'react'; import { FreeModelUsageStats } from '../components/FreeModelUsageStats'; import { PromotedModelUsageStats } from '../components/PromotedModelUsageStats'; import { RateLimitTesting } from '../components/RateLimitTesting'; +import { UserRateLimitStats } from '../components/UserRateLimitStats'; import AdminPage from '../components/AdminPage'; import { BreadcrumbItem, BreadcrumbPage } from '@/components/ui/breadcrumb'; @@ -24,9 +25,9 @@ export default function FreeModelUsagePage() {

- Monitor IP-based rate limiting for free model usage. This applies to both anonymous and - authenticated users. Rate limiting is based on request count per IP address within a - rolling window. + Monitor rate limiting for free model usage. Authenticated requests are limited per user + id; anonymous requests are limited per IP address. Both share the same per-window request + cap.

@@ -67,6 +68,19 @@ export default function FreeModelUsagePage() { +
+

Authenticated User Rate Limit

+
+ +

+ Authenticated requests are rate-limited per Kilo user id. The list below shows users + currently at the per-window limit. +

+ + Loading user rate limit statistics...
}> + + +

Promoted Models Usage

diff --git a/apps/web/src/app/api/openrouter/[...path]/route.ts b/apps/web/src/app/api/openrouter/[...path]/route.ts index 4082260bcc..29eec3e652 100644 --- a/apps/web/src/app/api/openrouter/[...path]/route.ts +++ b/apps/web/src/app/api/openrouter/[...path]/route.ts @@ -4,12 +4,7 @@ import { isOpenCodeBasedClient, stripRequiredPrefix } from '@/lib/utils'; import { applyTrackingIds } from '@/lib/ai-gateway/providerHash'; import { extractPromptInfo } from '@/lib/ai-gateway/extractPromptInfo'; import { determineFallbackFeature } from '@/lib/ai-gateway/determineFallbackFeature'; -import { - validateFeatureHeader, - FEATURE_HEADER, - isUserRateLimitedFeature, - type FeatureValue, -} from '@/lib/feature-detection'; +import { validateFeatureHeader, FEATURE_HEADER } from '@/lib/feature-detection'; import type { OpenRouterChatCompletionRequest, GatewayResponsesRequest, @@ -81,7 +76,6 @@ import { } from '@/lib/ai-gateway/o11y/api-metrics.server'; import { normalizeModelId } from '@/lib/ai-gateway/model-utils'; import { isForbiddenFreeModel } from '@/lib/ai-gateway/forbidden-free-models'; -import { isCloudflareIP } from '@/lib/cloudflare-ip'; import { isKiloAutoModel, KILO_AUTO_FREE_MODEL } from '@/lib/ai-gateway/kilo-auto'; import { applyResolvedAutoModel } from '@/lib/ai-gateway/kilo-auto/resolution'; import { fixOpenCodeDuplicateReasoning } from '@/lib/ai-gateway/providers/fixOpenCodeDuplicateReasoning'; @@ -122,24 +116,11 @@ function validatePath( } async function resolveRateLimit( - feature: FeatureValue | null, ipAddress: string, authPromise: Promise<{ user: { id: string } | null }> -): Promise< - | NextResponseType - | { result: { allowed: boolean; requestCount: number }; subject: string } -> { - if (isUserRateLimitedFeature(feature) && isCloudflareIP(ipAddress)) { - const { user } = await authPromise; - if (!user) { - return NextResponse.json( - { - error: 'Authentication required for this feature', - error_type: ProxyErrorType.authentication_required, - }, - { status: 401 } - ); - } +): Promise<{ result: { allowed: boolean; requestCount: number }; subject: string }> { + const { user } = await authPromise; + if (user) { return { result: await checkFreeModelRateLimitByUser(user.id), subject: `user: ${user.id}`, @@ -264,14 +245,11 @@ export async function POST(request: NextRequest): Promise { test('returns null for null input', () => { @@ -19,22 +19,3 @@ describe('validateFeatureHeader', () => { expect(validateFeatureHeader(' Cloud-Agent ')).toBe('cloud-agent'); }); }); - -describe('isUserRateLimitedFeature', () => { - test('returns true for server-side products', () => { - expect(isUserRateLimitedFeature('cloud-agent')).toBe(true); - expect(isUserRateLimitedFeature('code-review')).toBe(true); - expect(isUserRateLimitedFeature('app-builder')).toBe(true); - }); - - test('returns false for client-side products', () => { - expect(isUserRateLimitedFeature('vscode-extension')).toBe(false); - expect(isUserRateLimitedFeature('jetbrains-extension')).toBe(false); - expect(isUserRateLimitedFeature('cli')).toBe(false); - expect(isUserRateLimitedFeature('direct-gateway')).toBe(false); - }); - - test('returns false for null', () => { - expect(isUserRateLimitedFeature(null)).toBe(false); - }); -}); diff --git a/apps/web/src/lib/feature-detection.ts b/apps/web/src/lib/feature-detection.ts index 188ed61eca..21af48ff76 100644 --- a/apps/web/src/lib/feature-detection.ts +++ b/apps/web/src/lib/feature-detection.ts @@ -48,18 +48,3 @@ export function validateFeatureHeader(headerValue: string | null): FeatureValue const result = featureSchema.safeParse(headerValue.trim().toLowerCase()); return result.success ? result.data : null; } - -/** - * Server-side products that rate-limit free models per user instead of per IP. - * These products share infrastructure IPs, so IP-based limits would be too restrictive. - */ -const USER_RATE_LIMITED_FEATURES: ReadonlySet = new Set([ - 'cloud-agent', - 'code-review', - 'app-builder', -]); - -export function isUserRateLimitedFeature(feature: FeatureValue | null): boolean { - if (!feature) return false; - return USER_RATE_LIMITED_FEATURES.has(feature); -} diff --git a/apps/web/src/lib/free-model-rate-limiter.ts b/apps/web/src/lib/free-model-rate-limiter.ts index 425d48777c..73b30f4669 100644 --- a/apps/web/src/lib/free-model-rate-limiter.ts +++ b/apps/web/src/lib/free-model-rate-limiter.ts @@ -54,12 +54,12 @@ async function getModelUsageSinceTimeByUser( /** * Check if an IP address is within the free model rate limit. - * This applies to ALL free model requests, both anonymous and authenticated. + * Only counts anonymous requests; authenticated requests use checkFreeModelRateLimitByUser. */ export async function checkFreeModelRateLimit(ipAddress: string): Promise { const windowStart = new Date(Date.now() - FREE_MODEL_RATE_LIMIT_WINDOW_HOURS * 60 * 60 * 1000); - const requestCount = await getModelUsageSinceTime(windowStart, ipAddress); + const requestCount = await getModelUsageSinceTime(windowStart, ipAddress, true); return { allowed: requestCount < FREE_MODEL_MAX_REQUESTS_PER_WINDOW, @@ -69,8 +69,6 @@ export async function checkFreeModelRateLimit(ipAddress: string): Promise { const windowStart = new Date(Date.now() - FREE_MODEL_RATE_LIMIT_WINDOW_HOURS * 60 * 60 * 1000); diff --git a/apps/web/src/routers/admin/free-model-usage-router.ts b/apps/web/src/routers/admin/free-model-usage-router.ts index 8a1bfcb417..962eef190e 100644 --- a/apps/web/src/routers/admin/free-model-usage-router.ts +++ b/apps/web/src/routers/admin/free-model-usage-router.ts @@ -30,14 +30,14 @@ async function getCallerIp(): Promise { return ip; } -async function countUsageForIp(ipAddress: string): Promise { +async function countUsageForUser(kiloUserId: string): Promise { const windowStart = getWindowStart(); const usage = await db .select({ totalRequests: count() }) .from(free_model_usage) .where( and( - sql`${free_model_usage.ip_address} = ${ipAddress}`, + sql`${free_model_usage.kilo_user_id} = ${kiloUserId}`, gte(free_model_usage.created_at, windowStart.toISOString()) ) ); @@ -45,11 +45,11 @@ async function countUsageForIp(ipAddress: string): Promise { } export const adminFreeModelUsageRouter = createTRPCRouter({ - getMyIpUsage: adminProcedure.query(async () => { - const ipAddress = await getCallerIp(); - const currentUsage = await countUsageForIp(ipAddress); + getMyUsage: adminProcedure.query(async ({ ctx }) => { + const kiloUserId = ctx.user.id; + const currentUsage = await countUsageForUser(kiloUserId); return { - ipAddress, + kiloUserId, currentUsage, limit: FREE_MODEL_MAX_REQUESTS_PER_WINDOW, windowHours: FREE_MODEL_RATE_LIMIT_WINDOW_HOURS, @@ -57,29 +57,31 @@ export const adminFreeModelUsageRouter = createTRPCRouter({ }; }), - rateLimitMyIp: adminProcedure.mutation(async () => { - const ipAddress = await getCallerIp(); - const currentUsage = await countUsageForIp(ipAddress); + rateLimitMe: adminProcedure.mutation(async ({ ctx }) => { + const kiloUserId = ctx.user.id; + const currentUsage = await countUsageForUser(kiloUserId); const rowsNeeded = FREE_MODEL_MAX_REQUESTS_PER_WINDOW - currentUsage; if (rowsNeeded <= 0) { return { - ipAddress, + kiloUserId, rowsInserted: 0, newTotal: currentUsage, alreadyRateLimited: true, }; } + const ipAddress = await getCallerIp(); const rows = Array.from({ length: rowsNeeded }, () => ({ ip_address: ipAddress, model: ADMIN_RATE_LIMIT_TEST_MODEL, + kilo_user_id: kiloUserId, })); await db.insert(free_model_usage).values(rows); return { - ipAddress, + kiloUserId, rowsInserted: rowsNeeded, newTotal: FREE_MODEL_MAX_REQUESTS_PER_WINDOW, alreadyRateLimited: false,