Skip to content

Commit 5b149be

Browse files
authored
fix clickhouse flaky tests (#1196)
<!-- Make sure you've read the CONTRIBUTING.md guidelines: https://github.com/stack-auth/stack-auth/blob/dev/CONTRIBUTING.md -->
1 parent 331dd5d commit 5b149be

4 files changed

Lines changed: 105 additions & 6 deletions

File tree

apps/backend/scripts/clickhouse-migrations.ts

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ export async function runClickhouseMigrations() {
1717
await client.exec({ query: USERS_TABLE_BASE_SQL });
1818
await client.exec({ query: USERS_VIEW_SQL });
1919
await client.exec({ query: TOKEN_REFRESH_EVENT_ROW_FORMAT_MUTATION_SQL });
20+
await client.exec({ query: SIGN_UP_RULE_TRIGGER_EVENT_ROW_FORMAT_MUTATION_SQL });
2021
const queries = [
2122
"REVOKE ALL PRIVILEGES ON *.* FROM limited_user;",
2223
"REVOKE ALL FROM limited_user;",
@@ -93,6 +94,27 @@ WHERE event_type = '$token-refresh'
9394
AND JSONHas(toJSONString(data), 'refreshTokenId');
9495
`;
9596

97+
// Normalizes legacy $sign-up-rule-trigger rows (camelCase JSON) to the new format:
98+
// - Row identity stays in columns (project_id/branch_id)
99+
// - data JSON becomes { project_id, branch_id, rule_id, action, email, auth_method, oauth_provider } (snake_case)
100+
const SIGN_UP_RULE_TRIGGER_EVENT_ROW_FORMAT_MUTATION_SQL = `
101+
ALTER TABLE analytics_internal.events
102+
UPDATE
103+
data = CAST(concat(
104+
'{',
105+
'"project_id":', toJSONString(JSONExtractString(toJSONString(data), 'projectId')), ',',
106+
'"branch_id":', toJSONString(JSONExtractString(toJSONString(data), 'branchId')), ',',
107+
'"rule_id":', toJSONString(JSONExtractString(toJSONString(data), 'ruleId')), ',',
108+
'"action":', toJSONString(JSONExtractString(toJSONString(data), 'action')), ',',
109+
'"email":', toJSONString(JSONExtract(toJSONString(data), 'email', 'Nullable(String)')), ',',
110+
'"auth_method":', toJSONString(JSONExtract(toJSONString(data), 'authMethod', 'Nullable(String)')), ',',
111+
'"oauth_provider":', toJSONString(JSONExtract(toJSONString(data), 'oauthProvider', 'Nullable(String)')),
112+
'}'
113+
) AS JSON)
114+
WHERE event_type = '$sign-up-rule-trigger'
115+
AND JSONHas(toJSONString(data), 'ruleId');
116+
`;
117+
96118
const USERS_TABLE_BASE_SQL = `
97119
CREATE TABLE IF NOT EXISTS analytics_internal.users (
98120
project_id String,
@@ -103,9 +125,9 @@ CREATE TABLE IF NOT EXISTS analytics_internal.users (
103125
primary_email Nullable(String),
104126
primary_email_verified UInt8,
105127
signed_up_at DateTime64(3, 'UTC'),
106-
client_metadata JSON,
107-
client_read_only_metadata JSON,
108-
server_metadata JSON,
128+
client_metadata String,
129+
client_read_only_metadata String,
130+
server_metadata String,
109131
is_anonymous UInt8,
110132
restricted_by_admin UInt8,
111133
restricted_by_admin_reason Nullable(String),

apps/backend/src/app/api/latest/internal/metrics/route.tsx

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,12 @@ async function loadUsersByCountry(tenancy: Tenancy, prisma: PrismaClientTransact
4646
const userIds = users.map((user) => user.projectUserId);
4747
const scalingFactor = totalUsers > users.length ? totalUsers / users.length : 1;
4848

49+
// Build ClickHouse array literal inline in the query body (sent via POST) instead of
50+
// passing as query_params (sent as URL params) to avoid the HTTP form field size limit
51+
// when there are many user IDs. UUIDs contain only hex chars and dashes, but we escape
52+
// single quotes for safety.
53+
const userIdsArrayLiteral = `[${userIds.map(id => `'${id.replace(/'/g, "''")}'`).join(',')}]`;
54+
4955
const clickhouseClient = getClickhouseAdminClient();
5056
const res = await clickhouseClient.query({
5157
query: `
@@ -67,7 +73,7 @@ async function loadUsersByCountry(tenancy: Tenancy, prisma: PrismaClientTransact
6773
AND project_id = {projectId:String}
6874
AND branch_id = {branchId:String}
6975
AND user_id IS NOT NULL
70-
AND has({userIds:Array(String)}, assumeNotNull(user_id))
76+
AND has(${userIdsArrayLiteral}, assumeNotNull(user_id))
7177
)
7278
WHERE cc IS NOT NULL
7379
AND ({includeAnonymous:UInt8} = 1 OR is_anonymous = 0)
@@ -80,7 +86,6 @@ async function loadUsersByCountry(tenancy: Tenancy, prisma: PrismaClientTransact
8086
query_params: {
8187
projectId: tenancy.project.id,
8288
branchId: tenancy.branchId,
83-
userIds,
8489
includeAnonymous: includeAnonymous ? 1 : 0,
8590
},
8691
format: "JSONEachRow",

apps/backend/src/lib/external-db-sync.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,9 @@ async function pushRowsToClickhouse(
430430
return {
431431
...rest,
432432
sync_sequence_id: sequenceId,
433+
client_metadata: JSON.stringify(rest.client_metadata),
434+
client_read_only_metadata: JSON.stringify(rest.client_read_only_metadata),
435+
server_metadata: JSON.stringify(rest.server_metadata),
433436
primary_email_verified: normalizeClickhouseBoolean(rest.primary_email_verified, "primary_email_verified"),
434437
is_anonymous: normalizeClickhouseBoolean(rest.is_anonymous, "is_anonymous"),
435438
restricted_by_admin: normalizeClickhouseBoolean(rest.restricted_by_admin, "restricted_by_admin"),

apps/e2e/tests/backend/endpoints/api/v1/external-db-sync-advanced.test.ts

Lines changed: 70 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
import { wait } from "@stackframe/stack-shared/dist/utils/promises";
2+
import { StackAssertionError } from "@stackframe/stack-shared/dist/utils/errors";
13
import { Client } from 'pg';
24
import { afterAll, beforeAll, describe, expect } from 'vitest';
35
import { test } from '../../../../helpers';
4-
import { InternalApiKey, User, backendContext, niceBackendFetch } from '../../../backend-helpers';
6+
import { InternalApiKey, Project, User, backendContext, niceBackendFetch } from '../../../backend-helpers';
57
import {
68
HIGH_VOLUME_TIMEOUT,
79
POSTGRES_HOST,
@@ -19,6 +21,38 @@ import {
1921

2022
const COMPLEX_SEQUENCE_TIMEOUT = TEST_TIMEOUT * 2 + 30_000;
2123

24+
async function runQueryForCurrentProject(body: { query: string, params?: Record<string, string>, timeout_ms?: number }) {
25+
return await niceBackendFetch("/api/v1/internal/analytics/query", {
26+
method: "POST",
27+
accessType: "admin",
28+
body,
29+
});
30+
}
31+
32+
async function waitForClickhouseUser(email: string, expectedDisplayName: string) {
33+
const timeoutMs = 180_000;
34+
const intervalMs = 2_000;
35+
const start = performance.now();
36+
37+
while (performance.now() - start < timeoutMs) {
38+
const response = await runQueryForCurrentProject({
39+
query: "SELECT primary_email, display_name FROM users WHERE primary_email = {email:String}",
40+
params: { email },
41+
});
42+
if (
43+
response.status === 200
44+
&& Array.isArray(response.body?.result)
45+
&& response.body.result.length === 1
46+
&& response.body.result[0]?.display_name === expectedDisplayName
47+
) {
48+
return response;
49+
}
50+
await wait(intervalMs);
51+
}
52+
53+
throw new StackAssertionError(`Timed out waiting for ClickHouse user ${email} to sync.`);
54+
}
55+
2256
describe.sequential('External DB Sync - Advanced Tests', () => {
2357
let dbManager: TestDbManager;
2458
const createProjectWithExternalDb = (
@@ -1126,4 +1160,39 @@ $$;`);
11261160
await internalClient.end();
11271161
}
11281162
}, HIGH_VOLUME_TIMEOUT);
1163+
1164+
/**
1165+
* What it does:
1166+
* - Configures a project with a bad postgres connection string (simulating postgres being down).
1167+
* - Creates a user and verifies it still syncs to ClickHouse despite the postgres failure.
1168+
* - Then configures a separate project with a valid postgres DB and verifies postgres sync works
1169+
* even though ClickHouse sync runs independently in the same cycle.
1170+
*
1171+
* Why it matters:
1172+
* - Proves that ClickHouse and postgres sync targets are independent: a failure in one
1173+
* does not block the other from completing successfully.
1174+
*/
1175+
test('Cross-DB resilience: postgres down does not block ClickHouse sync', async () => {
1176+
const badConnectionString = 'postgresql://invalid:invalid@invalid:5432/invalid';
1177+
1178+
// Create a project with only a bad postgres DB — ClickHouse syncs automatically via env var
1179+
await createProjectWithExternalDb({
1180+
bad_pg: {
1181+
type: 'postgres',
1182+
connectionString: badConnectionString,
1183+
},
1184+
});
1185+
1186+
const email = 'cross-db-resilience@example.com';
1187+
const user = await User.create({ primary_email: email });
1188+
await niceBackendFetch(`/api/v1/users/${user.userId}`, {
1189+
accessType: 'admin',
1190+
method: 'PATCH',
1191+
body: { display_name: 'Cross DB User' },
1192+
});
1193+
1194+
// ClickHouse should still receive the data even though postgres sync fails
1195+
await waitForClickhouseUser(email, 'Cross DB User');
1196+
1197+
}, TEST_TIMEOUT);
11291198
});

0 commit comments

Comments
 (0)