diff --git a/cueweb/app/api/track/route.ts b/cueweb/app/api/track/route.ts new file mode 100644 index 000000000..e01fe0602 --- /dev/null +++ b/cueweb/app/api/track/route.ts @@ -0,0 +1,61 @@ +/* + * Copyright Contributors to the OpenCue Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { NextRequest, NextResponse } from "next/server"; + +import MetricsService from "@/lib/metrics-service"; +import { extractUser } from "@/lib/track-user"; + +// POST /api/track - usage beacon from the client. The client sends only the +// kind + a coarse name; the USER is resolved server-side from the session, so +// it can't be spoofed. Increments the matching Prometheus counter. +// { kind: "page", name: "" } +// { kind: "action", name: "" } +// { kind: "facility", name: "" } +// { kind: "login" } +export async function POST(request: NextRequest): Promise { + let body: { kind?: string; name?: string }; + try { + body = await request.json(); + } catch { + return NextResponse.json({ error: "Invalid JSON" }, { status: 400 }); + } + + const kind = String(body?.kind ?? ""); + const name = String(body?.name ?? "").slice(0, 64); // cap length defensively + const user = await extractUser(request); + const metrics = MetricsService.getInstance(); + + switch (kind) { + case "page": + metrics.recordPageView(user, name); + break; + case "action": + metrics.recordAction(user, name); + break; + case "facility": + metrics.recordFacility(user, name || "unknown"); + break; + case "login": + metrics.recordLogin(user); + break; + default: + return NextResponse.json({ error: "Unknown kind" }, { status: 400 }); + } + + // 204: fire-and-forget beacon, nothing to return. + return new NextResponse(null, { status: 204 }); +} diff --git a/cueweb/app/layout.tsx b/cueweb/app/layout.tsx index 05b7f0df2..10fe8272f 100644 --- a/cueweb/app/layout.tsx +++ b/cueweb/app/layout.tsx @@ -29,6 +29,7 @@ import { KeyboardShortcuts } from "@/components/ui/shortcuts-overlay"; import { AboutDialog } from "@/components/ui/about-dialog"; import { PluginSettingsDialog } from "@/components/ui/settings-dialog"; import { ToastHost } from "@/components/ui/toast-host"; +import { UsageTracker } from "@/components/ui/usage-tracker"; export const metadata: Metadata = { title: "CueWeb", @@ -53,6 +54,7 @@ export default function RootLayout({ children }: { children: React.ReactNode }) + diff --git a/cueweb/app/utils/api_utils.ts b/cueweb/app/utils/api_utils.ts index 7518169d0..477b1df03 100644 --- a/cueweb/app/utils/api_utils.ts +++ b/cueweb/app/utils/api_utils.ts @@ -15,6 +15,7 @@ */ import { handleError } from "./notify_utils"; +import { trackActionEndpoint } from "./usage_tracking"; /************************************************************/ // Client-safe API helpers (same-origin calls to this app's own /api routes). @@ -28,6 +29,8 @@ import { handleError } from "./notify_utils"; // Helper function to access a post API with a success or failure returned and handle any errors. // Actions follow this format: post to the API and see if the action was successful export async function accessActionApi(endpoint: string, body: string | string[]): Promise<{ success?: boolean; error?: string }> { + // Usage metric: record the user action (best-effort, fire-and-forget). + trackActionEndpoint(endpoint); // Default to a same-origin relative URL when NEXT_PUBLIC_URL is empty // or unset. The API routes are mounted by this same Next.js app, so // the browser can reach them at whatever origin the page loaded from diff --git a/cueweb/app/utils/gateway_server.ts b/cueweb/app/utils/gateway_server.ts index 8a6c405ee..93ffc4c3c 100644 --- a/cueweb/app/utils/gateway_server.ts +++ b/cueweb/app/utils/gateway_server.ts @@ -29,6 +29,7 @@ import { NextResponse } from "next/server"; import { handleError } from "./notify_utils"; import { getRequestFacilityTargetWithOverrides } from "@/lib/facility-server"; +import MetricsService from "@/lib/metrics-service"; interface JwtParams { sub: string; @@ -115,22 +116,53 @@ export async function fetchObjectFromRestGateway( } // Centralized route handler to fetch data and handle errors. +// Shorten a gRPC endpoint ("/job.JobInterface/GetJobs") to a compact, +// bounded metric label ("job.getjobs") so the API usage counter stays small. +function shortEndpoint(endpoint: string): string { + const parts = endpoint.replace(/^\//, "").split("/"); + const iface = (parts[0] ?? "").split(".")[0] || "unknown"; + const method = (parts[1] ?? "").toLowerCase() || "unknown"; + return `${iface}.${method}`; +} + export async function handleRoute( method: string, endpoint: string, body: string, log = false, ): Promise { + // Usage metrics: time the call and record it per (short endpoint, status + // class). Best-effort - metric failures must never affect the response. + const startedAt = Date.now(); + const shortName = shortEndpoint(endpoint); + let observed = false; + const observe = (status: number) => { + if (observed) return; + observed = true; + try { + MetricsService.getInstance().recordApiRequest( + shortName, + status, + (Date.now() - startedAt) / 1000, + ); + } catch { + // ignore - metrics must never affect the response + } + }; + try { const response = await fetchObjectFromRestGateway(endpoint, method, body); const responseData = await response.json(); if (responseData.error) { + observe(response.status >= 400 ? response.status : 500); throw new Error(responseData.error); } + observe(response.status); return NextResponse.json({ data: responseData.data }, { status: response.status }); } catch (error) { + observe(500); handleError(error); return NextResponse.json({ error: (error as Error).message }, { status: 500 }); } diff --git a/cueweb/app/utils/usage_tracking.ts b/cueweb/app/utils/usage_tracking.ts new file mode 100644 index 000000000..60cf710df --- /dev/null +++ b/cueweb/app/utils/usage_tracking.ts @@ -0,0 +1,99 @@ +/* + * Copyright Contributors to the OpenCue Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Client-side usage beacons. Each call posts a tiny payload to /api/track, +// which resolves the USER server-side (from the session) and increments a +// Prometheus counter. Fire-and-forget; never throws; opt out at build time with +// NEXT_PUBLIC_USAGE_TRACKING=off. + +const ENABLED = + typeof window !== "undefined" && + (process.env.NEXT_PUBLIC_USAGE_TRACKING ?? "on").toLowerCase() !== "off"; + +function beacon(payload: { kind: string; name?: string }): void { + if (!ENABLED) return; + try { + const body = JSON.stringify(payload); + // sendBeacon survives navigation; fall back to keepalive fetch. + if (navigator.sendBeacon) { + navigator.sendBeacon("/api/track", new Blob([body], { type: "application/json" })); + } else { + void fetch("/api/track", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body, + keepalive: true, + }); + } + } catch { + // ignore - usage tracking must never affect the UI + } +} + +// Map a Next.js pathname to a coarse, bounded page/module name (must match the +// ALLOWED_PAGES allow-list in lib/metrics-service.ts; anything else -> "other"). +export function pageNameForPath(pathname: string): string { + if (!pathname || pathname === "/") return "monitor-jobs"; + if (pathname.startsWith("/dashboard")) return "dashboard"; + if (pathname.startsWith("/monitor-cue")) return "monitor-cue"; + if (pathname.startsWith("/split")) return "monitor-jobs"; + if (pathname.startsWith("/hosts/")) return "host-details"; + if (pathname.startsWith("/hosts")) return "monitor-hosts"; + if (pathname.startsWith("/jobs/")) return "job-details"; + if (pathname.startsWith("/frames/")) return "frame-log"; + if (pathname.startsWith("/allocations")) return "allocations"; + if (pathname.startsWith("/limits")) return "limits"; + if (pathname.startsWith("/redirect")) return "redirect"; + if (pathname.startsWith("/services")) return "services"; + if (pathname.startsWith("/shows")) return "shows"; + if (pathname.startsWith("/stuck-frames")) return "stuck-frames"; + if (pathname.startsWith("/subscription-graphs")) return "subscription-graphs"; + if (pathname.startsWith("/subscriptions")) return "subscriptions"; + if (pathname.startsWith("/cuesubmit")) return "cuesubmit"; + if (pathname.startsWith("/plugins")) return "plugins"; + if (pathname.startsWith("/settings")) return "settings"; + if (pathname.startsWith("/login")) return "login"; + return "other"; +} + +export function trackPage(pathname: string): void { + beacon({ kind: "page", name: pageNameForPath(pathname) }); +} + +export function trackAction(action: string): void { + beacon({ kind: "action", name: action }); +} + +// Derive an action key from a gateway-proxy action endpoint +// ("/api/job/action/kill" -> "job-kill"). Returns "" for non-action routes. +export function actionKeyForEndpoint(endpoint: string): string { + const m = endpoint.match(/\/api\/([a-z]+)\/action\/([a-z]+)/i); + return m ? `${m[1].toLowerCase()}-${m[2].toLowerCase()}` : ""; +} + +// Track an action by its endpoint (used by the shared action dispatcher). +export function trackActionEndpoint(endpoint: string): void { + const key = actionKeyForEndpoint(endpoint); + if (key) trackAction(key); +} + +export function trackFacility(facility: string): void { + beacon({ kind: "facility", name: facility }); +} + +export function trackLogin(): void { + beacon({ kind: "login" }); +} diff --git a/cueweb/components/ui/usage-tracker.tsx b/cueweb/components/ui/usage-tracker.tsx new file mode 100644 index 000000000..99004c959 --- /dev/null +++ b/cueweb/components/ui/usage-tracker.tsx @@ -0,0 +1,39 @@ +"use client"; + +/* + * Copyright Contributors to the OpenCue Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import * as React from "react"; +import { usePathname } from "next/navigation"; + +import { trackPage } from "@/app/utils/usage_tracking"; + +// Mounted once from the root layout. Emits a usage page-view beacon whenever the +// route changes (deduped per pathname so a polling re-render doesn't inflate +// counts). Renders nothing. +export function UsageTracker() { + const pathname = usePathname(); + const lastRef = React.useRef(null); + + React.useEffect(() => { + if (!pathname || pathname === lastRef.current) return; + lastRef.current = pathname; + // Don't count the login page as a module view; it gets its own login beacon. + if (!pathname.startsWith("/login")) trackPage(pathname); + }, [pathname]); + + return null; +} diff --git a/cueweb/lib/metrics-service.ts b/cueweb/lib/metrics-service.ts index fa8fb0d5a..20ad210b5 100644 --- a/cueweb/lib/metrics-service.ts +++ b/cueweb/lib/metrics-service.ts @@ -14,20 +14,143 @@ * limitations under the License. */ -// lib/metricsService.ts -import { Counter, Registry } from 'prom-client'; +// lib/metrics-service.ts +// +// Prometheus usage metrics for CueWeb. Singleton over a single prom-client +// Registry exposed at GET /api/metrics. Tracks WHO uses WHAT, how often, and +// how fast - per user, per page/module, per action - with bounded cardinality +// (page/action label values are validated against allow-lists; the API +// counters carry no user label). Mirrors the asset-search approach. +import { Counter, Histogram, Registry } from "prom-client"; + +import { getConfiguredFacilities } from "@/lib/facility"; + +// Sentinel user when the caller is unauthenticated (auth disabled / no session). +export const ANONYMOUS_USER = "anonymous"; + +// Allow-lists keep `page` / `action` cardinality bounded: unknown values map to +// "other" so a buggy/hostile client can't explode the series count. +export const ALLOWED_PAGES = [ + "dashboard", "monitor-jobs", "job-graph", "job-details", "frame-log", + "monitor-cue", "monitor-hosts", "host-details", "allocations", "limits", + "redirect", "services", "shows", "stuck-frames", "subscriptions", + "subscription-graphs", "cuesubmit", "plugins", "settings", "login", "other", +] as const; + +// The action keys correspond to the gateway-proxy action routes +// (/api//action/ -> "-"), so cardinality is bounded +// by the fixed set of routes. A few client-only keys (submit, view presets) are +// appended; anything else maps to "other". +export const ALLOWED_ACTIONS = [ + "comment-delete", "comment-save", + "frame-createdependonframe", "frame-createdependonjob", + "frame-createdependonlayer", "frame-dropdepends", "frame-eat", + "frame-getdepends", "frame-kill", "frame-markaswaiting", "frame-retry", + "group-createsubgroup", "group-delete", "group-reparentgroups", + "group-reparentjobs", "group-update", + "host-addcomment", "host-addtags", "host-delete", "host-lock", "host-reboot", + "host-rebootwhenidle", "host-redirecttojob", "host-removetags", + "host-renametag", "host-setallocation", "host-sethardwarestate", + "host-takeownership", "host-unlock", + "job-addcomment", "job-addrenderpart", "job-addsubscriber", + "job-createdependonframe", "job-createdependonjob", "job-createdependonlayer", + "job-dropdepends", "job-eatframes", "job-getdepends", + "job-getwhatdependsonthis", "job-kill", "job-killframes", + "job-markdoneframes", "job-pause", "job-reorderframes", "job-retryframes", + "job-setautoeat", "job-setmaxcores", "job-setmaxgpus", "job-setmaxretries", + "job-setmincores", "job-setmingpus", "job-setpriority", "job-staggerframes", + "job-unpause", + "layer-createdependonframe", "layer-createdependonjob", + "layer-createdependonlayer", "layer-createframebyframedepend", + "layer-eatframes", "layer-getdepends", "layer-getoutputpaths", "layer-kill", + "layer-markdone", "layer-reorderframes", "layer-retryframes", + "layer-setmincores", "layer-setmingpumemory", "layer-setminmemory", + "layer-settags", "layer-setthreadable", "layer-staggerframes", + "limit-create", "limit-delete", "limit-rename", "limit-setmaxvalue", + "proc-kill", "proc-unbook", "proc-unbookone", + "show-createsubscription", "show-enablebooking", "show-enabledispatching", + "show-setcommentemail", "show-setdefaultmaxcores", "show-setdefaultmincores", + // client-only actions (not gateway action routes) + "job-submit", "view-save", "view-apply", "redirect", "other", +] as const; + +type Page = (typeof ALLOWED_PAGES)[number]; +type Action = (typeof ALLOWED_ACTIONS)[number]; + +function normalize( + value: string, + allowed: T, +): T[number] { + return (allowed as readonly string[]).includes(value) + ? (value as T[number]) + : ("other" as T[number]); +} + +// HTTP status bucket so the API counter stays small (3 classes, not 1/status). +function statusClass(status: number): string { + if (status >= 500) return "5xx"; + if (status >= 400) return "4xx"; + if (status >= 300) return "3xx"; + return "2xx"; +} class MetricsService { private static instance: MetricsService; private registry: Registry; + + // Generic counter store kept for backwards compatibility with the original + // registerCounter/incrementCounter API (used by /api/increment). private counters: Map; + // Pre-registered usage metrics. + private pageViews!: Counter; + private actions!: Counter; + private apiRequests!: Counter; + private apiDuration!: Histogram; + private logins!: Counter; + private facilitySelected!: Counter; + private constructor() { this.registry = new Registry(); this.counters = new Map(); - // Initialize default metrics (optional) - // collectDefaultMetrics({ register: this.registry }); + this.pageViews = new Counter({ + name: "cueweb_page_views_total", + help: "CueWeb page/module views, by user and page", + labelNames: ["user", "page"], + registers: [this.registry], + }); + this.actions = new Counter({ + name: "cueweb_actions_total", + help: "CueWeb user actions, by user and action", + labelNames: ["user", "action"], + registers: [this.registry], + }); + this.apiRequests = new Counter({ + name: "cueweb_api_requests_total", + help: "CueWeb gateway-proxy API calls, by endpoint and status class", + labelNames: ["endpoint", "status"], + registers: [this.registry], + }); + this.apiDuration = new Histogram({ + name: "cueweb_api_request_duration_seconds", + help: "CueWeb gateway-proxy API latency in seconds, by endpoint", + labelNames: ["endpoint"], + buckets: [0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30], + registers: [this.registry], + }); + this.logins = new Counter({ + name: "cueweb_logins_total", + help: "CueWeb session starts, by user", + labelNames: ["user"], + registers: [this.registry], + }); + this.facilitySelected = new Counter({ + name: "cueweb_facility_selected_total", + help: "Cuebot Facility switches, by user and facility", + labelNames: ["user", "facility"], + registers: [this.registry], + }); } public static getInstance(): MetricsService { @@ -37,13 +160,43 @@ class MetricsService { return MetricsService.instance; } + // --- Usage helpers -------------------------------------------------------- + + public recordPageView(user: string, page: string): void { + this.pageViews.inc({ user, page: normalize(page, ALLOWED_PAGES) }); + } + + public recordAction(user: string, action: string): void { + this.actions.inc({ user, action: normalize(action, ALLOWED_ACTIONS) }); + } + + public recordApiRequest(endpoint: string, status: number, durationSeconds: number): void { + this.apiRequests.inc({ endpoint, status: statusClass(status) }); + this.apiDuration.observe({ endpoint }, durationSeconds); + } + + public recordLogin(user: string): void { + this.logins.inc({ user }); + } + + public recordFacility(user: string, facility: string): void { + // Bound the facility label to the deployment's configured facilities + // (NEXT_PUBLIC_CUEBOT_FACILITIES); anything else -> "other" so a hostile + // /api/track beacon can't create unbounded series. + const configured = getConfiguredFacilities(); + const bounded = configured.includes(facility) ? facility : "other"; + this.facilitySelected.inc({ user, facility: bounded }); + } + + // --- Back-compat generic counter API (used by /api/increment) ------------- + public registerCounter(name: string, help: string): Counter | undefined { if (!this.counters.has(name)) { const counter = new Counter({ name, help, registers: [this.registry], - labelNames: ['user'] + labelNames: ["user"], }); this.counters.set(name, counter); } @@ -53,10 +206,8 @@ class MetricsService { public incrementCounter(name: string, username: string): void { const counter = this.counters.get(name); if (counter) { - // Increment the specified counter for the given username. - counter.inc({user: username}); + counter.inc({ user: username }); } else { - // Log a warning if the counter specified by name does not exist in the registry. console.warn(`Counter ${name} not found`); } } @@ -65,4 +216,6 @@ class MetricsService { return this.registry.metrics(); } } -export default MetricsService; \ No newline at end of file + +export default MetricsService; +export type { Page, Action }; diff --git a/cueweb/lib/track-user.ts b/cueweb/lib/track-user.ts new file mode 100644 index 000000000..dea54733a --- /dev/null +++ b/cueweb/lib/track-user.ts @@ -0,0 +1,59 @@ +/* + * Copyright Contributors to the OpenCue Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Server-side resolution of the calling user for usage metrics. The user label +// is resolved from the authenticated NextAuth session (never trusted from the +// client request body), falling back to a reverse-proxy identity header and +// finally the ANONYMOUS_USER sentinel when auth is disabled. Mirrors +// asset-search's extract_user(): session -> X-User -> X-Forwarded-User -> +// anonymous. +import type { NextRequest } from "next/server"; +import { getServerSession } from "next-auth"; + +import { authOptions } from "@/lib/auth"; +import { ANONYMOUS_USER } from "@/lib/metrics-service"; + +function localPart(value: string): string { + return value.includes("@") ? value.split("@")[0] : value; +} + +// The X-User / X-Forwarded-User identity headers are forgeable by any client, +// so they are only honored when the operator explicitly opts in - i.e. the +// deployment sits behind a trusted reverse proxy / auth gateway that strips +// inbound copies and injects the authenticated identity. Off by default; the +// authenticated NextAuth session is always preferred and is non-forgeable. +const TRUST_IDENTITY_HEADER = + (process.env.CUEWEB_TRUST_IDENTITY_HEADER ?? "").toLowerCase() === "true"; + +export async function extractUser(request: NextRequest): Promise { + // Authoritative source: the signed-in session (cannot be spoofed). + try { + const session = await getServerSession(authOptions).catch(() => null); + const fromSession = session?.user?.name || session?.user?.email; + if (fromSession) return localPart(fromSession).trim() || ANONYMOUS_USER; + } catch { + // Fall through to the (opt-in) proxy header / anonymous. + } + + // Only trust the proxy-injected identity header when explicitly enabled. + if (TRUST_IDENTITY_HEADER) { + const header = + request.headers.get("X-User") || request.headers.get("X-Forwarded-User"); + if (header) return localPart(header).trim() || ANONYMOUS_USER; + } + + return ANONYMOUS_USER; +} diff --git a/docs/_docs/developer-guide/cueweb-development.md b/docs/_docs/developer-guide/cueweb-development.md index 48ebc1877..1ae563482 100644 --- a/docs/_docs/developer-guide/cueweb-development.md +++ b/docs/_docs/developer-guide/cueweb-development.md @@ -2350,6 +2350,64 @@ export function Button({ className, variant, size, ...props }: ButtonProps) { --- +## Usage metrics (Prometheus + Grafana) + +CueWeb exposes per-user usage metrics at `GET /api/metrics` (Prometheus text) +so operators can see *who uses what, how often, and how fast*. Bounded +cardinality is the design constraint: `page` / `action` label values come from +fixed allow-lists, and the API counters carry no `user` label. Files involved: + +```text +lib/metrics-service.ts # prom-client singleton Registry + metric set + helpers + ALLOWED_PAGES/ALLOWED_ACTIONS +lib/track-user.ts # extractUser(req): session -> X-User/X-Forwarded-User -> "anonymous" +app/api/metrics/route.ts # GET /api/metrics (registry.metrics()) +app/api/track/route.ts # POST /api/track client beacon (resolves user server-side) +app/utils/usage_tracking.ts # client beacons: trackPage/trackAction/trackActionEndpoint/trackFacility/trackLogin +components/ui/usage-tracker.tsx # mounted in layout; emits a page-view beacon on route change +app/utils/gateway_server.ts # handleRoute records cueweb_api_requests_total + cueweb_api_request_duration_seconds +app/utils/api_utils.ts # accessActionApi calls trackActionEndpoint (per-user action tracking) +``` + +### Metric set + +| Metric | Type | Labels | +|--------|------|--------| +| `cueweb_page_views_total` | Counter | `user`, `page` | +| `cueweb_actions_total` | Counter | `user`, `action` | +| `cueweb_api_requests_total` | Counter | `endpoint`, `status` | +| `cueweb_api_request_duration_seconds` | Histogram | `endpoint` | +| `cueweb_logins_total` | Counter | `user` | +| `cueweb_facility_selected_total` | Counter | `user`, `facility` | + +### How it flows + +- **Page views**: `UsageTracker` (mounted once in `app/layout.tsx`) maps the + pathname to a coarse page name (`pageNameForPath`) and `POST`s `/api/track` + `{kind:"page",name}` on route change (deduped per pathname). `navigator.sendBeacon` + survives navigation. +- **Actions**: the shared client dispatcher `accessActionApi(endpoint, …)` calls + `trackActionEndpoint(endpoint)`, which derives an action key + (`/api/job/action/kill` → `job-kill`) and beacons it. Since `performAction` + routes through `accessActionApi`, every job/layer/frame/host/proc action is + covered from one place. +- **API requests + latency**: `handleRoute` (the single server-side gateway + proxy used by ~119 routes) times each call and records the short endpoint + (`/job.JobInterface/GetJobs` → `job.getjobs`) + status class. No `user` label + keeps it small; failures never affect the response. +- **User resolution**: the client never sends the `user`. `/api/track` resolves + it server-side via `extractUser()` (NextAuth session → identity header → + `anonymous`), so it can't be spoofed. + +### Wiring + dashboard + +Prometheus scrapes `cueweb:3000/api/metrics` +(`sandbox/config/prometheus-monitoring.yml`); Grafana auto-provisions +`sandbox/config/grafana/dashboards/cueweb-usage.json` ("CueWeb User Usage", with +a `$user` variable). Use a fixed `[5m]` rate window for the latency percentile +panels. Opt out of the client beacon with `NEXT_PUBLIC_USAGE_TRACKING=off`. + +--- + ## Configuration and Deployment ### Environment Configuration diff --git a/docs/_docs/getting-started/deploying-cueweb.md b/docs/_docs/getting-started/deploying-cueweb.md index 157f454da..71d9c4e77 100644 --- a/docs/_docs/getting-started/deploying-cueweb.md +++ b/docs/_docs/getting-started/deploying-cueweb.md @@ -841,20 +841,68 @@ curl https://cueweb.company.com/api/health curl https://cueweb.company.com/api/health/detailed ``` -### Prometheus Metrics +### Prometheus Metrics (user usage) -Enable metrics collection: +CueWeb exposes Prometheus usage metrics at **`GET /api/metrics`** (plain text, +never gated by the authorization gate). They answer *who uses what, how often, +and how fast* - per user, per page/module, per action - with bounded +cardinality. No setup beyond pointing Prometheus at the endpoint. -```javascript -// next.config.js -module.exports = { - experimental: { - instrumentationHook: true, - }, - // Other config... -} +The `/api/metrics` endpoint returns the metrics in Prometheus text format: + +![CueWeb /api/metrics endpoint - page view and action counters](/assets/images/cueweb/cueweb_user_usage_metrics_api_metrics_endpoint1.png) + +![CueWeb /api/metrics endpoint - per-endpoint API request counters](/assets/images/cueweb/cueweb_user_usage_metrics_api_metrics_endpoint2.png) + +![CueWeb /api/metrics endpoint - API request duration histogram](/assets/images/cueweb/cueweb_user_usage_metrics_api_metrics_endpoint3.png) + +**1. Scrape CueWeb from Prometheus.** Add a job to your Prometheus config (the +sandbox already does this in `sandbox/config/prometheus-monitoring.yml`): + +```yaml + - job_name: 'cueweb' + static_configs: + - targets: ['cueweb:3000'] + metrics_path: /api/metrics ``` +Once scraped, the `cueweb_*` series are queryable in Prometheus: + +![Querying a cueweb usage metric in Prometheus](/assets/images/cueweb/cueweb_user_usage_metrics_prometheus_query.png) + +**2. Import the Grafana dashboard.** The sandbox auto-provisions +`sandbox/config/grafana/dashboards/cueweb-usage.json` ("CueWeb User Usage"): +overview stats, page/module views, actions, per-endpoint API latency +(p50/p90/p99), and Top-N users, all filterable by a `$user` template variable. + +![CueWeb User Usage Grafana dashboard - overview and pages/modules](/assets/images/cueweb/cueweb_user_usage_metrics_grafana_charts1.png) + +![CueWeb User Usage Grafana dashboard - actions and API latency](/assets/images/cueweb/cueweb_user_usage_metrics_grafana_charts2.png) + +![CueWeb User Usage Grafana dashboard - per-user panels](/assets/images/cueweb/cueweb_user_usage_metrics_grafana_charts3.png) + +**Metrics exposed:** + +| Metric | Labels | Meaning | +|--------|--------|---------| +| `cueweb_page_views_total` | `user`, `page` | Page/module views (Monitor Jobs, Monitor Cue, Monitor Hosts, View Job Graph, CueSubmit, Plugins, …). | +| `cueweb_actions_total` | `user`, `action` | User actions (`job-kill`, `frame-retry`, `host-lock`, `job-submit`, …). | +| `cueweb_api_requests_total` | `endpoint`, `status` | Gateway-proxy API calls by short endpoint and status class (`2xx`/`4xx`/`5xx`). | +| `cueweb_api_request_duration_seconds` | `endpoint` | API latency histogram (for p50/p90/p99). | +| `cueweb_logins_total` | `user` | Session starts. | +| `cueweb_facility_selected_total` | `user`, `facility` | Cuebot Facility switches. | + +The `user` label is resolved **server-side** in this order: the signed-in +NextAuth session (authoritative, non-spoofable) → the `X-User` / +`X-Forwarded-User` identity headers **only when `CUEWEB_TRUST_IDENTITY_HEADER=true`** +(off by default; enable it only behind a trusted reverse proxy / auth gateway +that strips inbound copies and injects the identity) → `anonymous`. So with +authentication disabled and no trusted proxy, every event is attributed to +`anonymous` and a client cannot forge another user. Only the username and coarse +page/action names are recorded - no job names, search text, or file paths. +Disable the client beacon at build time with `NEXT_PUBLIC_USAGE_TRACKING=off` +(the `/api/metrics` endpoint stays). + ### Sentry Integration Configure error tracking: diff --git a/docs/_docs/reference/cueweb.md b/docs/_docs/reference/cueweb.md index e096d01fa..7c6152bb5 100644 --- a/docs/_docs/reference/cueweb.md +++ b/docs/_docs/reference/cueweb.md @@ -104,6 +104,7 @@ CueWeb is a web-based application that provides browser access to OpenCue render | `NEXT_PUBLIC_CUEPROGBAR_URL` | Optional registered URL scheme the **Show Progress Bar** dialog's launch button hands off to a local handler. Empty hides the launch button. | (empty) | | `NEXT_PUBLIC_PREVIEW_COMMAND` | Command shown/copied by the frame menu's **Preview All** dialog to open rendered output in an external image viewer. Placeholders `{paths}` / `{job}` / `{layer}` / `{frame}` are substituted. | `rv {paths}` | | `NEXT_PUBLIC_PREVIEW_URL` | Optional registered URL scheme **Preview All** hands off to a local viewer (e.g. `openrv://{paths}`). Empty hides the launch button (the command is still shown to copy). | (empty) | +| `NEXT_PUBLIC_USAGE_TRACKING` | Set to `off` to disable the client-side usage beacon behind the `cueweb_page_views_total` / `cueweb_actions_total` Prometheus metrics. `GET /api/metrics` and the server-side API request/latency metrics stay enabled regardless. See [Usage metrics](#usage-metrics-prometheus). | `on` | | `NEXT_PUBLIC_EMAIL_DOMAIN` | Email domain used to derive the **Email Artist...** dialog defaults: `@` for **To**, `-@` for **From** and **CC**. See [Email Artist dialog](#email-artist-dialog). | `your.domain.com` | | `NEXT_PUBLIC_EMAIL_SUPPORT_SUFFIX` | Per-show support alias suffix used in the **Email Artist...** dialog's From / CC defaults (`-@`). Matches CueGUI's "production support team" alias convention. | `pst` | | `NEXT_PUBLIC_EMAIL_REQUEST_CORES_SUFFIX` | Per-show support alias suffix used in the **Request Cores...** dialog's CC default (`-@`). Distinct from the Email Artist `pst` alias because CueGUI's `RequestCoresDialog` traditionally targets a different team queue. | `support` | @@ -1364,6 +1365,29 @@ When the deployment uses the Loki backend (`NEXT_PUBLIC_LOKI_URL` set), logs are --- +## Usage metrics (Prometheus) + +`GET /api/metrics` exposes Prometheus usage metrics (plain text; never gated by the authorization gate) so operators can track *who uses what, how often, and how fast* - per user, per page/module, per action - with bounded cardinality. + +![CueWeb /api/metrics endpoint output](/assets/images/cueweb/cueweb_user_usage_metrics_api_metrics_endpoint1.png) + +| Metric | Type | Labels | Notes | +|--------|------|--------|-------| +| `cueweb_page_views_total` | Counter | `user`, `page` | Page/module views; `page` is from a fixed allow-list (unknown → `other`). | +| `cueweb_actions_total` | Counter | `user`, `action` | User actions (`job-kill`, `frame-retry`, `host-lock`, `job-submit`, …), keyed off the action routes. | +| `cueweb_api_requests_total` | Counter | `endpoint`, `status` | Every gateway-proxy call by short endpoint (`job.getjobs`) and status class (`2xx`/`4xx`/`5xx`). No `user` label. | +| `cueweb_api_request_duration_seconds` | Histogram | `endpoint` | API latency, for p50/p90/p99 panels. | +| `cueweb_logins_total` | Counter | `user` | Session starts. | +| `cueweb_facility_selected_total` | Counter | `user`, `facility` | Cuebot Facility switches. | + +- **User label** is resolved server-side from the signed-in NextAuth session (`lib/track-user.ts`), so the client can never spoof it; it falls back to `anonymous` when there is no session. The forgeable `X-User` / `X-Forwarded-User` identity headers are honored **only** when `CUEWEB_TRUST_IDENTITY_HEADER=true` (off by default) - set it only when CueWeb sits behind a trusted reverse proxy / auth gateway that strips inbound copies and injects the authenticated identity. Only the username and coarse page/action names are recorded - no job names, search text, or file paths. +- **Instrumentation**: `app/utils/gateway_server.ts` `handleRoute` records the API request + latency for all routes; the client `UsageTracker` + `accessActionApi` beacon page views and actions to `POST /api/track`. Disable the client beacon with `NEXT_PUBLIC_USAGE_TRACKING=off`. +- **Wiring**: Prometheus scrapes `cueweb:3000/api/metrics` (`sandbox/config/prometheus-monitoring.yml`); Grafana auto-provisions the **CueWeb User Usage** dashboard (`sandbox/config/grafana/dashboards/cueweb-usage.json`) with a `$user` variable. + +![CueWeb User Usage Grafana dashboard](/assets/images/cueweb/cueweb_user_usage_metrics_grafana_charts1.png) + +--- + ## Global Application Header CueWeb mounts a persistent header at the top of every authenticated route diff --git a/docs/assets/images/cueweb/cueweb_user_usage_metrics_api_metrics_endpoint1.png b/docs/assets/images/cueweb/cueweb_user_usage_metrics_api_metrics_endpoint1.png new file mode 100644 index 000000000..766295164 Binary files /dev/null and b/docs/assets/images/cueweb/cueweb_user_usage_metrics_api_metrics_endpoint1.png differ diff --git a/docs/assets/images/cueweb/cueweb_user_usage_metrics_api_metrics_endpoint2.png b/docs/assets/images/cueweb/cueweb_user_usage_metrics_api_metrics_endpoint2.png new file mode 100644 index 000000000..1d09e4188 Binary files /dev/null and b/docs/assets/images/cueweb/cueweb_user_usage_metrics_api_metrics_endpoint2.png differ diff --git a/docs/assets/images/cueweb/cueweb_user_usage_metrics_api_metrics_endpoint3.png b/docs/assets/images/cueweb/cueweb_user_usage_metrics_api_metrics_endpoint3.png new file mode 100644 index 000000000..55ccaa8b2 Binary files /dev/null and b/docs/assets/images/cueweb/cueweb_user_usage_metrics_api_metrics_endpoint3.png differ diff --git a/docs/assets/images/cueweb/cueweb_user_usage_metrics_grafana_charts1.png b/docs/assets/images/cueweb/cueweb_user_usage_metrics_grafana_charts1.png new file mode 100644 index 000000000..d6cf913cc Binary files /dev/null and b/docs/assets/images/cueweb/cueweb_user_usage_metrics_grafana_charts1.png differ diff --git a/docs/assets/images/cueweb/cueweb_user_usage_metrics_grafana_charts2.png b/docs/assets/images/cueweb/cueweb_user_usage_metrics_grafana_charts2.png new file mode 100644 index 000000000..53f7b3e86 Binary files /dev/null and b/docs/assets/images/cueweb/cueweb_user_usage_metrics_grafana_charts2.png differ diff --git a/docs/assets/images/cueweb/cueweb_user_usage_metrics_grafana_charts3.png b/docs/assets/images/cueweb/cueweb_user_usage_metrics_grafana_charts3.png new file mode 100644 index 000000000..7546f04ad Binary files /dev/null and b/docs/assets/images/cueweb/cueweb_user_usage_metrics_grafana_charts3.png differ diff --git a/docs/assets/images/cueweb/cueweb_user_usage_metrics_prometheus_query.png b/docs/assets/images/cueweb/cueweb_user_usage_metrics_prometheus_query.png new file mode 100644 index 000000000..f288241ad Binary files /dev/null and b/docs/assets/images/cueweb/cueweb_user_usage_metrics_prometheus_query.png differ diff --git a/sandbox/config/grafana/dashboards/cueweb-usage.json b/sandbox/config/grafana/dashboards/cueweb-usage.json new file mode 100644 index 000000000..6b030c373 --- /dev/null +++ b/sandbox/config/grafana/dashboards/cueweb-usage.json @@ -0,0 +1,189 @@ +{ + "annotations": { "list": [] }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "links": [], + "liveNow": false, + "refresh": "30s", + "schemaVersion": 39, + "tags": ["opencue", "cueweb", "usage"], + "templating": { + "list": [ + { + "current": { "selected": true, "text": "All", "value": "$__all" }, + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "definition": "label_values(cueweb_page_views_total, user)", + "includeAll": true, + "allValue": ".*", + "multi": true, + "name": "user", + "label": "User", + "options": [], + "query": { "query": "label_values(cueweb_page_views_total, user)", "refId": "user" }, + "refresh": 2, + "sort": 1, + "type": "query" + } + ] + }, + "time": { "from": "now-6h", "to": "now" }, + "timepicker": {}, + "timezone": "", + "title": "CueWeb User Usage", + "uid": "cueweb-user-usage", + "version": 1, + "panels": [ + { "type": "row", "title": "Overview", "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, "id": 1 }, + { + "type": "stat", "title": "Total Page Views", "id": 2, + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 5, "w": 6, "x": 0, "y": 1 }, + "fieldConfig": { "defaults": { "unit": "short", "color": { "mode": "thresholds" } }, "overrides": [] }, + "options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "value", "graphMode": "area" }, + "targets": [ { "refId": "A", "expr": "sum(cueweb_page_views_total{user=~\"$user\"})", "datasource": { "type": "prometheus", "uid": "prometheus" } } ] + }, + { + "type": "stat", "title": "Total Actions", "id": 3, + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 5, "w": 6, "x": 6, "y": 1 }, + "fieldConfig": { "defaults": { "unit": "short", "color": { "mode": "thresholds" } }, "overrides": [] }, + "options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "value", "graphMode": "area" }, + "targets": [ { "refId": "A", "expr": "sum(cueweb_actions_total{user=~\"$user\"})", "datasource": { "type": "prometheus", "uid": "prometheus" } } ] + }, + { + "type": "stat", "title": "Active Users (all-time)", "id": 4, + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 5, "w": 6, "x": 12, "y": 1 }, + "fieldConfig": { "defaults": { "unit": "short", "color": { "mode": "thresholds" } }, "overrides": [] }, + "options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "value", "graphMode": "none" }, + "targets": [ { "refId": "A", "expr": "count(count by (user) (cueweb_page_views_total))", "datasource": { "type": "prometheus", "uid": "prometheus" } } ] + }, + { + "type": "stat", "title": "API Error Ratio (5m)", "id": 5, + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 5, "w": 6, "x": 18, "y": 1 }, + "fieldConfig": { "defaults": { "unit": "percentunit", "color": { "mode": "thresholds" }, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "yellow", "value": 0.01 }, { "color": "red", "value": 0.05 } ] } }, "overrides": [] }, + "options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "value", "graphMode": "area" }, + "targets": [ { "refId": "A", "expr": "sum(rate(cueweb_api_requests_total{status=~\"4xx|5xx\"}[5m])) / clamp_min(sum(rate(cueweb_api_requests_total[5m])), 1)", "datasource": { "type": "prometheus", "uid": "prometheus" } } ] + }, + + { "type": "row", "title": "Pages / Modules", "gridPos": { "h": 1, "w": 24, "x": 0, "y": 6 }, "id": 10 }, + { + "type": "timeseries", "title": "Page Views / sec by Page", "id": 11, + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 7 }, + "fieldConfig": { "defaults": { "unit": "ops", "custom": { "drawStyle": "line", "fillOpacity": 10 } }, "overrides": [] }, + "options": { "legend": { "displayMode": "table", "placement": "right", "calcs": ["sum"] } }, + "targets": [ { "refId": "A", "expr": "sum by (page) (rate(cueweb_page_views_total{user=~\"$user\"}[$__rate_interval]))", "legendFormat": "{{page}}", "datasource": { "type": "prometheus", "uid": "prometheus" } } ] + }, + { + "type": "timeseries", "title": "Top Pages (total views)", "id": 12, + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 7 }, + "fieldConfig": { "defaults": { "unit": "short", "custom": { "drawStyle": "bars", "fillOpacity": 60 } }, "overrides": [] }, + "options": { "legend": { "displayMode": "table", "placement": "right", "calcs": ["lastNotNull"] } }, + "targets": [ { "refId": "A", "expr": "topk(15, sum by (page) (cueweb_page_views_total{user=~\"$user\"}))", "legendFormat": "{{page}}", "datasource": { "type": "prometheus", "uid": "prometheus" } } ] + }, + + { "type": "row", "title": "Actions", "gridPos": { "h": 1, "w": 24, "x": 0, "y": 15 }, "id": 20 }, + { + "type": "timeseries", "title": "Actions / sec by Action", "id": 21, + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 }, + "fieldConfig": { "defaults": { "unit": "ops", "custom": { "drawStyle": "line", "fillOpacity": 10 } }, "overrides": [] }, + "options": { "legend": { "displayMode": "table", "placement": "right", "calcs": ["sum"] } }, + "targets": [ { "refId": "A", "expr": "topk(15, sum by (action) (rate(cueweb_actions_total{user=~\"$user\"}[$__rate_interval])))", "legendFormat": "{{action}}", "datasource": { "type": "prometheus", "uid": "prometheus" } } ] + }, + { + "type": "timeseries", "title": "Top 20 Actions (total)", "id": 22, + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 }, + "fieldConfig": { "defaults": { "unit": "short", "custom": { "drawStyle": "bars", "fillOpacity": 60 } }, "overrides": [] }, + "options": { "legend": { "displayMode": "table", "placement": "right", "calcs": ["lastNotNull"] } }, + "targets": [ { "refId": "A", "expr": "topk(20, sum by (action) (cueweb_actions_total{user=~\"$user\"}))", "legendFormat": "{{action}}", "datasource": { "type": "prometheus", "uid": "prometheus" } } ] + }, + + { "type": "row", "title": "API Requests & Latency", "gridPos": { "h": 1, "w": 24, "x": 0, "y": 24 }, "id": 30 }, + { + "type": "timeseries", "title": "Requests / sec by Endpoint (Top 15)", "id": 31, + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 25 }, + "fieldConfig": { "defaults": { "unit": "ops", "custom": { "drawStyle": "line", "fillOpacity": 10 } }, "overrides": [] }, + "options": { "legend": { "displayMode": "table", "placement": "right", "calcs": ["sum"] } }, + "targets": [ { "refId": "A", "expr": "topk(15, sum by (endpoint) (rate(cueweb_api_requests_total[$__rate_interval])))", "legendFormat": "{{endpoint}}", "datasource": { "type": "prometheus", "uid": "prometheus" } } ] + }, + { + "type": "timeseries", "title": "API Latency p50 / p90 / p99 (all endpoints)", "id": 32, + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 25 }, + "fieldConfig": { "defaults": { "unit": "s", "custom": { "drawStyle": "line", "fillOpacity": 10 } }, "overrides": [] }, + "options": { "legend": { "displayMode": "list", "placement": "bottom" } }, + "targets": [ + { "refId": "A", "expr": "histogram_quantile(0.50, sum by (le) (rate(cueweb_api_request_duration_seconds_bucket[5m])))", "legendFormat": "p50", "datasource": { "type": "prometheus", "uid": "prometheus" } }, + { "refId": "B", "expr": "histogram_quantile(0.90, sum by (le) (rate(cueweb_api_request_duration_seconds_bucket[5m])))", "legendFormat": "p90", "datasource": { "type": "prometheus", "uid": "prometheus" } }, + { "refId": "C", "expr": "histogram_quantile(0.99, sum by (le) (rate(cueweb_api_request_duration_seconds_bucket[5m])))", "legendFormat": "p99", "datasource": { "type": "prometheus", "uid": "prometheus" } } + ] + }, + { + "type": "timeseries", "title": "Average Latency by Endpoint", "id": 33, + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 33 }, + "fieldConfig": { "defaults": { "unit": "s", "custom": { "drawStyle": "line", "fillOpacity": 10 } }, "overrides": [] }, + "options": { "legend": { "displayMode": "table", "placement": "right", "calcs": ["mean"] } }, + "targets": [ { "refId": "A", "expr": "sum by (endpoint) (rate(cueweb_api_request_duration_seconds_sum[5m])) / clamp_min(sum by (endpoint) (rate(cueweb_api_request_duration_seconds_count[5m])), 0.0001)", "legendFormat": "{{endpoint}}", "datasource": { "type": "prometheus", "uid": "prometheus" } } ] + }, + { + "type": "timeseries", "title": "Error Rate by Endpoint (4xx/5xx)", "id": 34, + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 33 }, + "fieldConfig": { "defaults": { "unit": "ops", "custom": { "drawStyle": "line", "fillOpacity": 10 } }, "overrides": [] }, + "options": { "legend": { "displayMode": "table", "placement": "right", "calcs": ["sum"] } }, + "targets": [ { "refId": "A", "expr": "sum by (endpoint) (rate(cueweb_api_requests_total{status=~\"4xx|5xx\"}[$__rate_interval]))", "legendFormat": "{{endpoint}}", "datasource": { "type": "prometheus", "uid": "prometheus" } } ] + }, + + { "type": "row", "title": "Users", "gridPos": { "h": 1, "w": 24, "x": 0, "y": 41 }, "id": 40 }, + { + "type": "timeseries", "title": "Top 20 Users by Page Views", "id": 41, + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 42 }, + "fieldConfig": { "defaults": { "unit": "short", "custom": { "drawStyle": "bars", "fillOpacity": 60 } }, "overrides": [] }, + "options": { "legend": { "displayMode": "table", "placement": "right", "calcs": ["lastNotNull"] } }, + "targets": [ { "refId": "A", "expr": "topk(20, sum by (user) (cueweb_page_views_total{user=~\"$user\"}))", "legendFormat": "{{user}}", "datasource": { "type": "prometheus", "uid": "prometheus" } } ] + }, + { + "type": "timeseries", "title": "Top 20 Users by Actions", "id": 42, + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 42 }, + "fieldConfig": { "defaults": { "unit": "short", "custom": { "drawStyle": "bars", "fillOpacity": 60 } }, "overrides": [] }, + "options": { "legend": { "displayMode": "table", "placement": "right", "calcs": ["lastNotNull"] } }, + "targets": [ { "refId": "A", "expr": "topk(20, sum by (user) (cueweb_actions_total{user=~\"$user\"}))", "legendFormat": "{{user}}", "datasource": { "type": "prometheus", "uid": "prometheus" } } ] + }, + { + "type": "timeseries", "title": "Page Views / sec by User (Top 10)", "id": 43, + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 50 }, + "fieldConfig": { "defaults": { "unit": "ops", "custom": { "drawStyle": "line", "fillOpacity": 10 } }, "overrides": [] }, + "options": { "legend": { "displayMode": "table", "placement": "right", "calcs": ["sum"] } }, + "targets": [ { "refId": "A", "expr": "topk(10, sum by (user) (rate(cueweb_page_views_total{user=~\"$user\"}[$__rate_interval])))", "legendFormat": "{{user}}", "datasource": { "type": "prometheus", "uid": "prometheus" } } ] + }, + { + "type": "timeseries", "title": "Logins by User", "id": 44, + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 50 }, + "fieldConfig": { "defaults": { "unit": "short", "custom": { "drawStyle": "bars", "fillOpacity": 60 } }, "overrides": [] }, + "options": { "legend": { "displayMode": "table", "placement": "right", "calcs": ["lastNotNull"] } }, + "targets": [ { "refId": "A", "expr": "topk(20, sum by (user) (cueweb_logins_total{user=~\"$user\"}))", "legendFormat": "{{user}}", "datasource": { "type": "prometheus", "uid": "prometheus" } } ] + }, + + { "type": "row", "title": "Facilities", "gridPos": { "h": 1, "w": 24, "x": 0, "y": 58 }, "id": 50 }, + { + "type": "timeseries", "title": "Cuebot Facility Switches by Facility", "id": 51, + "datasource": { "type": "prometheus", "uid": "prometheus" }, + "gridPos": { "h": 8, "w": 24, "x": 0, "y": 59 }, + "fieldConfig": { "defaults": { "unit": "short", "custom": { "drawStyle": "line", "fillOpacity": 10 } }, "overrides": [] }, + "options": { "legend": { "displayMode": "table", "placement": "right", "calcs": ["lastNotNull"] } }, + "targets": [ { "refId": "A", "expr": "sum by (facility) (cueweb_facility_selected_total{user=~\"$user\"})", "legendFormat": "{{facility}}", "datasource": { "type": "prometheus", "uid": "prometheus" } } ] + } + ] +} diff --git a/sandbox/config/prometheus-monitoring.yml b/sandbox/config/prometheus-monitoring.yml index e5bc6e68d..109f403dd 100644 --- a/sandbox/config/prometheus-monitoring.yml +++ b/sandbox/config/prometheus-monitoring.yml @@ -9,6 +9,12 @@ scrape_configs: - targets: ['cuebot:8080'] metrics_path: /metrics + # CueWeb usage metrics (per-user page views / actions / API latency) + - job_name: 'cueweb' + static_configs: + - targets: ['cueweb:3000'] + metrics_path: /api/metrics + # Prometheus self-monitoring - job_name: 'prometheus' static_configs: diff --git a/sandbox/config/prometheus/prometheus.yml b/sandbox/config/prometheus/prometheus.yml index 632b12ca6..9f563694b 100644 --- a/sandbox/config/prometheus/prometheus.yml +++ b/sandbox/config/prometheus/prometheus.yml @@ -12,3 +12,10 @@ scrape_configs: static_configs: - targets: - 'db-exporter:9187' + + # CueWeb usage metrics (per-user page views / actions / API latency). + - job_name: cueweb + metrics_path: /api/metrics + static_configs: + - targets: + - 'cueweb:3000'