Skip to content

Commit 5de9b52

Browse files
authored
feat(gastown): add manual container token refresh in town settings (#1102)
* feat(gastown): add manual container token refresh button in town settings (#1101) * fix(gastown): remove LIMIT clauses from Grafana dashboard queries The $timeFilter macro already bounds the result set sufficiently. Hardcoded LIMITs were silently truncating data in 9 panels.
1 parent d44ee0a commit 5de9b52

5 files changed

Lines changed: 100 additions & 20 deletions

File tree

cloudflare-gastown/gastown-grafana-dash-1.json

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -681,8 +681,8 @@
681681
"interval": "",
682682
"intervalFactor": 1,
683683
"nullifySparse": false,
684-
"query": "SELECT $timeSeries AS t, blob1 AS label, SUM(_sample_interval) AS count FROM gastown_events WHERE $timeFilter GROUP BY t, label ORDER BY t LIMIT 500",
685-
"rawSql": "SELECT $timeSeries AS t, blob1 AS label, SUM(_sample_interval) AS count FROM gastown_events WHERE $timeFilter GROUP BY t, label ORDER BY t LIMIT 500",
684+
"query": "SELECT $timeSeries AS t, blob1 AS label, SUM(_sample_interval) AS count FROM gastown_events WHERE $timeFilter GROUP BY t, label ORDER BY t",
685+
"rawSql": "SELECT $timeSeries AS t, blob1 AS label, SUM(_sample_interval) AS count FROM gastown_events WHERE $timeFilter GROUP BY t, label ORDER BY t",
686686
"refId": "A",
687687
"round": "0s",
688688
"showFormattedSQL": false,
@@ -1026,8 +1026,8 @@
10261026
"interval": "",
10271027
"intervalFactor": 1,
10281028
"nullifySparse": false,
1029-
"query": "SELECT blob1 AS event, SUM(_sample_interval) AS total, SUM(IF(blob5 != '', _sample_interval, 0)) AS error_count, SUM(IF(blob5 = '', _sample_interval, 0)) AS success_count, SUM(IF(blob5 != '', _sample_interval, 0)) / SUM(_sample_interval) AS error_rate FROM gastown_events WHERE $timeFilter GROUP BY event HAVING error_count > 0 ORDER BY error_count DESC LIMIT 50",
1030-
"rawSql": "SELECT blob1 AS event, SUM(_sample_interval) AS total, SUM(IF(blob5 != '', _sample_interval, 0)) AS error_count, SUM(IF(blob5 = '', _sample_interval, 0)) AS success_count, SUM(IF(blob5 != '', _sample_interval, 0)) / SUM(_sample_interval) AS error_rate FROM gastown_events WHERE $timeFilter GROUP BY event HAVING error_count > 0 ORDER BY error_count DESC LIMIT 50",
1029+
"query": "SELECT blob1 AS event, SUM(_sample_interval) AS total, SUM(IF(blob5 != '', _sample_interval, 0)) AS error_count, SUM(IF(blob5 = '', _sample_interval, 0)) AS success_count, SUM(IF(blob5 != '', _sample_interval, 0)) / SUM(_sample_interval) AS error_rate FROM gastown_events WHERE $timeFilter GROUP BY event HAVING error_count > 0 ORDER BY error_count DESC",
1030+
"rawSql": "SELECT blob1 AS event, SUM(_sample_interval) AS total, SUM(IF(blob5 != '', _sample_interval, 0)) AS error_count, SUM(IF(blob5 = '', _sample_interval, 0)) AS success_count, SUM(IF(blob5 != '', _sample_interval, 0)) / SUM(_sample_interval) AS error_rate FROM gastown_events WHERE $timeFilter GROUP BY event HAVING error_count > 0 ORDER BY error_count DESC",
10311031
"refId": "A",
10321032
"round": "0s",
10331033
"showFormattedSQL": false,
@@ -1106,8 +1106,8 @@
11061106
"interval": "",
11071107
"intervalFactor": 1,
11081108
"nullifySparse": false,
1109-
"query": "SELECT blob5 AS error_message, blob1 AS event, SUM(_sample_interval) AS count FROM gastown_events WHERE $timeFilter AND blob5 != '' GROUP BY error_message, event ORDER BY count DESC LIMIT 30",
1110-
"rawSql": "SELECT blob5 AS error_message, blob1 AS event, SUM(_sample_interval) AS count FROM gastown_events WHERE $timeFilter AND blob5 != '' GROUP BY error_message, event ORDER BY count DESC LIMIT 30",
1109+
"query": "SELECT blob5 AS error_message, blob1 AS event, SUM(_sample_interval) AS count FROM gastown_events WHERE $timeFilter AND blob5 != '' GROUP BY error_message, event ORDER BY count DESC",
1110+
"rawSql": "SELECT blob5 AS error_message, blob1 AS event, SUM(_sample_interval) AS count FROM gastown_events WHERE $timeFilter AND blob5 != '' GROUP BY error_message, event ORDER BY count DESC",
11111111
"refId": "A",
11121112
"round": "0s",
11131113
"showFormattedSQL": false,
@@ -1323,8 +1323,8 @@
13231323
"interval": "",
13241324
"intervalFactor": 1,
13251325
"nullifySparse": false,
1326-
"query": "SELECT $timeSeries AS t, blob1 AS label, SUM(_sample_interval * double1) / SUM(_sample_interval) AS avg_duration FROM gastown_events WHERE $timeFilter AND blob3 IN ('http', 'trpc') GROUP BY t, label ORDER BY t LIMIT 500",
1327-
"rawSql": "SELECT $timeSeries AS t, blob1 AS label, SUM(_sample_interval * double1) / SUM(_sample_interval) AS avg_duration FROM gastown_events WHERE $timeFilter AND blob3 IN ('http', 'trpc') GROUP BY t, label ORDER BY t LIMIT 500",
1326+
"query": "SELECT $timeSeries AS t, blob1 AS label, SUM(_sample_interval * double1) / SUM(_sample_interval) AS avg_duration FROM gastown_events WHERE $timeFilter AND blob3 IN ('http', 'trpc') GROUP BY t, label ORDER BY t",
1327+
"rawSql": "SELECT $timeSeries AS t, blob1 AS label, SUM(_sample_interval * double1) / SUM(_sample_interval) AS avg_duration FROM gastown_events WHERE $timeFilter AND blob3 IN ('http', 'trpc') GROUP BY t, label ORDER BY t",
13281328
"refId": "A",
13291329
"round": "0s",
13301330
"showFormattedSQL": false,
@@ -1451,8 +1451,8 @@
14511451
"interval": "",
14521452
"intervalFactor": 1,
14531453
"nullifySparse": false,
1454-
"query": "SELECT blob4 AS route, blob3 AS delivery, SUM(_sample_interval) AS count, SUM(_sample_interval * double1) / SUM(_sample_interval) AS avg_latency_ms, MAX(double1) AS max_ms FROM gastown_events WHERE $timeFilter AND blob3 IN ('http', 'trpc') AND blob4 != '' GROUP BY route, delivery HAVING count > 5 ORDER BY avg_latency_ms DESC LIMIT 40",
1455-
"rawSql": "SELECT blob4 AS route, blob3 AS delivery, SUM(_sample_interval) AS count, SUM(_sample_interval * double1) / SUM(_sample_interval) AS avg_latency_ms, MAX(double1) AS max_ms FROM gastown_events WHERE $timeFilter AND blob3 IN ('http', 'trpc') AND blob4 != '' GROUP BY route, delivery HAVING count > 5 ORDER BY avg_latency_ms DESC LIMIT 40",
1454+
"query": "SELECT blob4 AS route, blob3 AS delivery, SUM(_sample_interval) AS count, SUM(_sample_interval * double1) / SUM(_sample_interval) AS avg_latency_ms, MAX(double1) AS max_ms FROM gastown_events WHERE $timeFilter AND blob3 IN ('http', 'trpc') AND blob4 != '' GROUP BY route, delivery HAVING count > 5 ORDER BY avg_latency_ms DESC",
1455+
"rawSql": "SELECT blob4 AS route, blob3 AS delivery, SUM(_sample_interval) AS count, SUM(_sample_interval * double1) / SUM(_sample_interval) AS avg_latency_ms, MAX(double1) AS max_ms FROM gastown_events WHERE $timeFilter AND blob3 IN ('http', 'trpc') AND blob4 != '' GROUP BY route, delivery HAVING count > 5 ORDER BY avg_latency_ms DESC",
14561456
"refId": "A",
14571457
"round": "0s",
14581458
"showFormattedSQL": false,
@@ -1817,8 +1817,8 @@
18171817
"interval": "",
18181818
"intervalFactor": 1,
18191819
"nullifySparse": false,
1820-
"query": "SELECT blob2 AS user_id, SUM(_sample_interval) AS total_events, SUM(IF(blob5 != '', _sample_interval, 0)) AS error_count, SUM(IF(blob5 != '', _sample_interval, 0)) / SUM(_sample_interval) AS error_rate, SUM(_sample_interval * double1) / SUM(_sample_interval) AS avg_latency_ms, COUNT(DISTINCT blob6) AS town_count FROM gastown_events WHERE $timeFilter AND blob2 != '' GROUP BY user_id ORDER BY total_events DESC LIMIT 25",
1821-
"rawSql": "SELECT blob2 AS user_id, SUM(_sample_interval) AS total_events, SUM(IF(blob5 != '', _sample_interval, 0)) AS error_count, SUM(IF(blob5 != '', _sample_interval, 0)) / SUM(_sample_interval) AS error_rate, SUM(_sample_interval * double1) / SUM(_sample_interval) AS avg_latency_ms, COUNT(DISTINCT blob6) AS town_count FROM gastown_events WHERE $timeFilter AND blob2 != '' GROUP BY user_id ORDER BY total_events DESC LIMIT 25",
1820+
"query": "SELECT blob2 AS user_id, SUM(_sample_interval) AS total_events, SUM(IF(blob5 != '', _sample_interval, 0)) AS error_count, SUM(IF(blob5 != '', _sample_interval, 0)) / SUM(_sample_interval) AS error_rate, SUM(_sample_interval * double1) / SUM(_sample_interval) AS avg_latency_ms, COUNT(DISTINCT blob6) AS town_count FROM gastown_events WHERE $timeFilter AND blob2 != '' GROUP BY user_id ORDER BY total_events DESC",
1821+
"rawSql": "SELECT blob2 AS user_id, SUM(_sample_interval) AS total_events, SUM(IF(blob5 != '', _sample_interval, 0)) AS error_count, SUM(IF(blob5 != '', _sample_interval, 0)) / SUM(_sample_interval) AS error_rate, SUM(_sample_interval * double1) / SUM(_sample_interval) AS avg_latency_ms, COUNT(DISTINCT blob6) AS town_count FROM gastown_events WHERE $timeFilter AND blob2 != '' GROUP BY user_id ORDER BY total_events DESC",
18221822
"refId": "A",
18231823
"round": "0s",
18241824
"showFormattedSQL": false,
@@ -1937,8 +1937,8 @@
19371937
"interval": "",
19381938
"intervalFactor": 1,
19391939
"nullifySparse": false,
1940-
"query": "SELECT blob2 AS user_id, SUM(IF(blob5 != '', _sample_interval, 0)) AS error_count, SUM(_sample_interval) AS total_events, SUM(IF(blob5 != '', _sample_interval, 0)) / SUM(_sample_interval) AS error_rate FROM gastown_events WHERE $timeFilter AND blob2 != '' GROUP BY user_id HAVING error_count > 0 ORDER BY error_count DESC LIMIT 25",
1941-
"rawSql": "SELECT blob2 AS user_id, SUM(IF(blob5 != '', _sample_interval, 0)) AS error_count, SUM(_sample_interval) AS total_events, SUM(IF(blob5 != '', _sample_interval, 0)) / SUM(_sample_interval) AS error_rate FROM gastown_events WHERE $timeFilter AND blob2 != '' GROUP BY user_id HAVING error_count > 0 ORDER BY error_count DESC LIMIT 25",
1940+
"query": "SELECT blob2 AS user_id, SUM(IF(blob5 != '', _sample_interval, 0)) AS error_count, SUM(_sample_interval) AS total_events, SUM(IF(blob5 != '', _sample_interval, 0)) / SUM(_sample_interval) AS error_rate FROM gastown_events WHERE $timeFilter AND blob2 != '' GROUP BY user_id HAVING error_count > 0 ORDER BY error_count DESC",
1941+
"rawSql": "SELECT blob2 AS user_id, SUM(IF(blob5 != '', _sample_interval, 0)) AS error_count, SUM(_sample_interval) AS total_events, SUM(IF(blob5 != '', _sample_interval, 0)) / SUM(_sample_interval) AS error_rate FROM gastown_events WHERE $timeFilter AND blob2 != '' GROUP BY user_id HAVING error_count > 0 ORDER BY error_count DESC",
19421942
"refId": "A",
19431943
"round": "0s",
19441944
"showFormattedSQL": false,
@@ -2108,12 +2108,12 @@
21082108
"editorMode": "sql",
21092109
"extrapolate": true,
21102110
"format": "table",
2111-
"formattedQuery": "/* grafana dashboard='Gastown Operations', user=admin */\nSELECT blob1 AS event, SUM(_sample_interval) AS count FROM gastown_events WHERE timestamp >= toDateTime(1773451903) AND timestamp <= toDateTime(1773453703) GROUP BY event ORDER BY count DESC LIMIT 10",
2111+
"formattedQuery": "/* grafana dashboard='Gastown Operations', user=admin */\nSELECT blob1 AS event, SUM(_sample_interval) AS count FROM gastown_events WHERE timestamp >= toDateTime(1773451903) AND timestamp <= toDateTime(1773453703) GROUP BY event ORDER BY count DESC",
21122112
"interval": "",
21132113
"intervalFactor": 1,
21142114
"nullifySparse": false,
2115-
"query": "SELECT blob1 AS event, SUM(_sample_interval) AS count FROM gastown_events WHERE $timeFilter GROUP BY event ORDER BY count DESC LIMIT 10",
2116-
"rawSql": "SELECT blob1 AS event, SUM(_sample_interval) AS count FROM gastown_events WHERE $timeFilter GROUP BY event ORDER BY count DESC LIMIT 10",
2115+
"query": "SELECT blob1 AS event, SUM(_sample_interval) AS count FROM gastown_events WHERE $timeFilter GROUP BY event ORDER BY count DESC",
2116+
"rawSql": "SELECT blob1 AS event, SUM(_sample_interval) AS count FROM gastown_events WHERE $timeFilter GROUP BY event ORDER BY count DESC",
21172117
"refId": "A",
21182118
"round": "0s",
21192119
"showFormattedSQL": false,
@@ -2123,7 +2123,7 @@
21232123
"useWindowFuncForMacros": true
21242124
}
21252125
],
2126-
"title": "Top 10 Events (share of total)",
2126+
"title": "Events by Count",
21272127
"type": "table"
21282128
},
21292129
{
@@ -2232,8 +2232,8 @@
22322232
"interval": "",
22332233
"intervalFactor": 1,
22342234
"nullifySparse": false,
2235-
"query": "SELECT blob1 AS event, SUM(_sample_interval) AS total, SUM(IF(blob5 = '', _sample_interval, 0)) AS success, SUM(IF(blob5 != '', _sample_interval, 0)) AS errors, SUM(IF(blob5 != '', _sample_interval, 0)) / SUM(_sample_interval) AS error_rate, SUM(_sample_interval * double1) / SUM(_sample_interval) AS avg_latency_ms FROM gastown_events WHERE $timeFilter GROUP BY event ORDER BY total DESC LIMIT 50",
2236-
"rawSql": "SELECT blob1 AS event, SUM(_sample_interval) AS total, SUM(IF(blob5 = '', _sample_interval, 0)) AS success, SUM(IF(blob5 != '', _sample_interval, 0)) AS errors, SUM(IF(blob5 != '', _sample_interval, 0)) / SUM(_sample_interval) AS error_rate, SUM(_sample_interval * double1) / SUM(_sample_interval) AS avg_latency_ms FROM gastown_events WHERE $timeFilter GROUP BY event ORDER BY total DESC LIMIT 50",
2235+
"query": "SELECT blob1 AS event, SUM(_sample_interval) AS total, SUM(IF(blob5 = '', _sample_interval, 0)) AS success, SUM(IF(blob5 != '', _sample_interval, 0)) AS errors, SUM(IF(blob5 != '', _sample_interval, 0)) / SUM(_sample_interval) AS error_rate, SUM(_sample_interval * double1) / SUM(_sample_interval) AS avg_latency_ms FROM gastown_events WHERE $timeFilter GROUP BY event ORDER BY total DESC",
2236+
"rawSql": "SELECT blob1 AS event, SUM(_sample_interval) AS total, SUM(IF(blob5 = '', _sample_interval, 0)) AS success, SUM(IF(blob5 != '', _sample_interval, 0)) AS errors, SUM(IF(blob5 != '', _sample_interval, 0)) / SUM(_sample_interval) AS error_rate, SUM(_sample_interval * double1) / SUM(_sample_interval) AS avg_latency_ms FROM gastown_events WHERE $timeFilter GROUP BY event ORDER BY total DESC",
22372237
"refId": "A",
22382238
"round": "0s",
22392239
"showFormattedSQL": false,

cloudflare-gastown/src/dos/Town.do.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,20 @@ export class TownDO extends DurableObject<Env> {
450450
return result;
451451
}
452452

453+
/**
454+
* Force-refresh the container token, bypassing the 1-hour throttle.
455+
* Called from the user-facing tRPC mutation so operators can manually
456+
* push a fresh JWT to the running container.
457+
*/
458+
async forceRefreshContainerToken(): Promise<void> {
459+
const townId = this.townId;
460+
if (!townId) throw new Error('townId not set');
461+
const townConfig = await this.getTownConfig();
462+
const userId = townConfig.owner_user_id ?? townId;
463+
await dispatch.refreshContainerToken(this.env, townId, userId);
464+
this.lastContainerTokenRefreshAt = Date.now();
465+
}
466+
453467
// ══════════════════════════════════════════════════════════════════
454468
// Rig Registry
455469
// ══════════════════════════════════════════════════════════════════

cloudflare-gastown/src/trpc/router.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -575,6 +575,15 @@ export const gastownRouter = router({
575575
return townStub.updateTownConfig(input.config);
576576
}),
577577

578+
refreshContainerToken: gastownProcedure
579+
.input(z.object({ townId: z.string().uuid() }))
580+
.mutation(async ({ ctx, input }) => {
581+
await verifyTownOwnership(ctx.env, ctx.userId, input.townId);
582+
const townStub = getTownDOStub(ctx.env, input.townId);
583+
await townStub.setTownId(input.townId);
584+
await townStub.forceRefreshContainerToken();
585+
}),
586+
578587
// ── Events ──────────────────────────────────────────────────────────
579588

580589
getBeadEvents: gastownProcedure

src/app/(app)/gastown/[townId]/settings/TownSettingsPageClient.tsx

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ import {
2222
Shield,
2323
Variable,
2424
Layers,
25+
RefreshCw,
26+
Container,
2527
} from 'lucide-react';
2628
import { motion } from 'motion/react';
2729

@@ -37,6 +39,7 @@ const SECTIONS = [
3739
{ id: 'convoys', label: 'Convoys', icon: Layers },
3840
{ id: 'merge-strategy', label: 'Merge Strategy', icon: GitPullRequest },
3941
{ id: 'refinery', label: 'Refinery', icon: Shield },
42+
{ id: 'container', label: 'Container', icon: Container },
4043
] as const;
4144

4245
function useScrollSpy(sectionIds: readonly string[]) {
@@ -93,6 +96,13 @@ export function TownSettingsPageClient({ townId }: Props) {
9396
})
9497
);
9598

99+
const refreshToken = useMutation(
100+
trpc.gastown.refreshContainerToken.mutationOptions({
101+
onSuccess: () => toast.success('Container token refreshed'),
102+
onError: err => toast.error(`Token refresh failed: ${err.message}`),
103+
})
104+
);
105+
96106
// Local state for form fields
97107
const [envVars, setEnvVars] = useState<EnvVarEntry[]>([]);
98108
const [githubToken, setGithubToken] = useState('');
@@ -468,6 +478,39 @@ export function TownSettingsPageClient({ townId }: Props) {
468478
</div>
469479
</div>
470480
</SettingsSection>
481+
482+
{/* ── Container ──────────────────────────────────────── */}
483+
<SettingsSection
484+
id="container"
485+
title="Container"
486+
description="Manage the town's container runtime and authentication tokens."
487+
icon={Container}
488+
index={6}
489+
>
490+
<div className="space-y-3">
491+
<div className="flex items-center justify-between rounded-lg border border-white/[0.06] bg-white/[0.02] px-4 py-3">
492+
<div>
493+
<p className="text-sm text-white/70">Container Token</p>
494+
<p className="text-[11px] text-white/30">
495+
JWT shared by all agents in the container. Auto-refreshed hourly (8h
496+
expiry). Force a refresh if agents are experiencing auth failures.
497+
</p>
498+
</div>
499+
<Button
500+
onClick={() => refreshToken.mutate({ townId })}
501+
disabled={refreshToken.isPending}
502+
variant="secondary"
503+
size="sm"
504+
className="ml-4 shrink-0 gap-1.5"
505+
>
506+
<RefreshCw
507+
className={`size-3 ${refreshToken.isPending ? 'animate-spin' : ''}`}
508+
/>
509+
{refreshToken.isPending ? 'Refreshing...' : 'Refresh Token'}
510+
</Button>
511+
</div>
512+
</div>
513+
</SettingsSection>
471514
</div>
472515
</div>
473516

src/lib/gastown/types/router.d.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,13 @@ export declare const gastownRouter: import('@trpc/server').TRPCBuiltRouter<
523523
};
524524
meta: object;
525525
}>;
526+
refreshContainerToken: import('@trpc/server').TRPCMutationProcedure<{
527+
input: {
528+
townId: string;
529+
};
530+
output: void;
531+
meta: object;
532+
}>;
526533
getBeadEvents: import('@trpc/server').TRPCQueryProcedure<{
527534
input: {
528535
rigId: string;
@@ -1457,6 +1464,13 @@ export declare const wrappedGastownRouter: import('@trpc/server').TRPCBuiltRoute
14571464
};
14581465
meta: object;
14591466
}>;
1467+
refreshContainerToken: import('@trpc/server').TRPCMutationProcedure<{
1468+
input: {
1469+
townId: string;
1470+
};
1471+
output: void;
1472+
meta: object;
1473+
}>;
14601474
getBeadEvents: import('@trpc/server').TRPCQueryProcedure<{
14611475
input: {
14621476
rigId: string;

0 commit comments

Comments
 (0)