Skip to content

Commit 006aecb

Browse files
authored
fix: add backfill migration for routing response denormalized tables (calcom#21474)
* fix: add backfill migration for routing response denormalized tables * update script * implement select type * improve number handling * clean up queries * rename * handle zero record case * revert * rename * fix division problem * log clean up * force adding new rows after removing existing rows * backfill only missing or wrong data * remove sleep
1 parent 0829929 commit 006aecb

2 files changed

Lines changed: 322 additions & 0 deletions

File tree

  • packages/prisma/migrations
    • 20250618093846_routing_form_response_denormalize_backfill
    • 20250618093923_routing_form_response_field_backfill
Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
DO $$
2+
DECLARE
3+
chunk_size INTEGER := 1000;
4+
start_id INTEGER := 1; -- Starting ID
5+
end_id INTEGER; -- Will be set dynamically
6+
current_id INTEGER;
7+
8+
processed_count INTEGER := 0;
9+
chunk_updated_count INTEGER := 0;
10+
total_count INTEGER;
11+
BEGIN
12+
-- Get the maximum ID and total count from the App_RoutingForms_FormResponse table
13+
SELECT COALESCE(MAX(id), 0) INTO end_id FROM "App_RoutingForms_FormResponse";
14+
SELECT COUNT(*) INTO total_count FROM "App_RoutingForms_FormResponse";
15+
16+
-- Handle case where there are no records to process
17+
IF total_count = 0 THEN
18+
RAISE NOTICE 'No records found in App_RoutingForms_FormResponse table. Migration completed.';
19+
RETURN;
20+
END IF;
21+
22+
RAISE NOTICE 'Starting migration: processing up to ID % (total responses: %)', end_id, total_count;
23+
24+
FOR current_id IN SELECT * FROM generate_series(start_id, end_id, chunk_size)
25+
LOOP
26+
-- Use UPSERT to only update records that are missing or have incorrect data
27+
WITH expected_data AS (
28+
SELECT
29+
r.id,
30+
r."formId" as expected_form_id,
31+
f.name as expected_form_name,
32+
f."teamId" as expected_form_team_id,
33+
f."userId" as expected_form_user_id,
34+
b.uid as expected_booking_uid,
35+
b.id as expected_booking_id,
36+
b.status as expected_booking_status,
37+
calculate_booking_status_order(b.status::text) as expected_booking_status_order,
38+
b."createdAt" as expected_booking_created_at,
39+
b."startTime" as expected_booking_start_time,
40+
b."endTime" as expected_booking_end_time,
41+
b."userId" as expected_booking_user_id,
42+
u.name as expected_booking_user_name,
43+
u.email as expected_booking_user_email,
44+
u."avatarUrl" as expected_booking_user_avatar_url,
45+
COALESCE(
46+
(
47+
SELECT ar."reasonString"
48+
FROM "AssignmentReason" ar
49+
WHERE ar."bookingId" = b.id
50+
LIMIT 1
51+
),
52+
''
53+
) as expected_booking_assignment_reason,
54+
et.id as expected_event_type_id,
55+
et."parentId" as expected_event_type_parent_id,
56+
et."schedulingType"::text as expected_event_type_scheduling_type,
57+
r."createdAt" as expected_created_at,
58+
t.utm_source as expected_utm_source,
59+
t.utm_medium as expected_utm_medium,
60+
t.utm_campaign as expected_utm_campaign,
61+
t.utm_term as expected_utm_term,
62+
t.utm_content as expected_utm_content
63+
FROM "App_RoutingForms_FormResponse" r
64+
INNER JOIN "App_RoutingForms_Form" f ON r."formId" = f.id
65+
LEFT JOIN "users" u ON f."userId" = u.id
66+
LEFT JOIN "Booking" b ON b.uid = r."routedToBookingUid"
67+
LEFT JOIN "EventType" et ON b."eventTypeId" = et.id
68+
LEFT JOIN "Tracking" t ON t."bookingId" = b.id
69+
WHERE r.id BETWEEN current_id AND current_id + chunk_size - 1
70+
),
71+
records_to_update AS (
72+
SELECT e.*
73+
FROM expected_data e
74+
LEFT JOIN "RoutingFormResponseDenormalized" d ON e.id = d.id
75+
WHERE CASE
76+
WHEN d.id IS NULL THEN true -- Include missing records
77+
WHEN d."formId" IS DISTINCT FROM e.expected_form_id THEN true
78+
WHEN d."formName" IS DISTINCT FROM e.expected_form_name THEN true
79+
WHEN d."formTeamId" IS DISTINCT FROM e.expected_form_team_id THEN true
80+
WHEN d."formUserId" IS DISTINCT FROM e.expected_form_user_id THEN true
81+
WHEN d."bookingUid" IS DISTINCT FROM e.expected_booking_uid THEN true
82+
WHEN d."bookingId" IS DISTINCT FROM e.expected_booking_id THEN true
83+
WHEN d."bookingStatus" IS DISTINCT FROM e.expected_booking_status THEN true
84+
WHEN d."bookingStatusOrder" IS DISTINCT FROM e.expected_booking_status_order THEN true
85+
WHEN d."bookingCreatedAt" IS DISTINCT FROM e.expected_booking_created_at THEN true
86+
WHEN d."bookingStartTime" IS DISTINCT FROM e.expected_booking_start_time THEN true
87+
WHEN d."bookingEndTime" IS DISTINCT FROM e.expected_booking_end_time THEN true
88+
WHEN d."bookingUserId" IS DISTINCT FROM e.expected_booking_user_id THEN true
89+
WHEN d."bookingUserName" IS DISTINCT FROM e.expected_booking_user_name THEN true
90+
WHEN d."bookingUserEmail" IS DISTINCT FROM e.expected_booking_user_email THEN true
91+
WHEN d."bookingUserAvatarUrl" IS DISTINCT FROM e.expected_booking_user_avatar_url THEN true
92+
WHEN d."bookingAssignmentReason" IS DISTINCT FROM e.expected_booking_assignment_reason THEN true
93+
WHEN d."eventTypeId" IS DISTINCT FROM e.expected_event_type_id THEN true
94+
WHEN d."eventTypeParentId" IS DISTINCT FROM e.expected_event_type_parent_id THEN true
95+
WHEN d."eventTypeSchedulingType"::text IS DISTINCT FROM e.expected_event_type_scheduling_type THEN true
96+
WHEN d."createdAt" IS DISTINCT FROM e.expected_created_at THEN true
97+
WHEN d."utm_source" IS DISTINCT FROM e.expected_utm_source THEN true
98+
WHEN d."utm_medium" IS DISTINCT FROM e.expected_utm_medium THEN true
99+
WHEN d."utm_campaign" IS DISTINCT FROM e.expected_utm_campaign THEN true
100+
WHEN d."utm_term" IS DISTINCT FROM e.expected_utm_term THEN true
101+
WHEN d."utm_content" IS DISTINCT FROM e.expected_utm_content THEN true
102+
ELSE false -- Don't include valid records
103+
END
104+
)
105+
INSERT INTO "RoutingFormResponseDenormalized" (
106+
id,
107+
"formId",
108+
"formName",
109+
"formTeamId",
110+
"formUserId",
111+
"bookingUid",
112+
"bookingId",
113+
"bookingStatus",
114+
"bookingStatusOrder",
115+
"bookingCreatedAt",
116+
"bookingStartTime",
117+
"bookingEndTime",
118+
"bookingUserId",
119+
"bookingUserName",
120+
"bookingUserEmail",
121+
"bookingUserAvatarUrl",
122+
"bookingAssignmentReason",
123+
"eventTypeId",
124+
"eventTypeParentId",
125+
"eventTypeSchedulingType",
126+
"createdAt",
127+
"utm_source",
128+
"utm_medium",
129+
"utm_campaign",
130+
"utm_term",
131+
"utm_content"
132+
)
133+
SELECT
134+
r.id,
135+
r.expected_form_id,
136+
r.expected_form_name,
137+
r.expected_form_team_id,
138+
r.expected_form_user_id,
139+
r.expected_booking_uid,
140+
r.expected_booking_id,
141+
r.expected_booking_status,
142+
r.expected_booking_status_order,
143+
r.expected_booking_created_at,
144+
r.expected_booking_start_time,
145+
r.expected_booking_end_time,
146+
r.expected_booking_user_id,
147+
r.expected_booking_user_name,
148+
r.expected_booking_user_email,
149+
r.expected_booking_user_avatar_url,
150+
r.expected_booking_assignment_reason,
151+
r.expected_event_type_id,
152+
r.expected_event_type_parent_id,
153+
r.expected_event_type_scheduling_type::text,
154+
r.expected_created_at,
155+
r.expected_utm_source,
156+
r.expected_utm_medium,
157+
r.expected_utm_campaign,
158+
r.expected_utm_term,
159+
r.expected_utm_content
160+
FROM records_to_update r
161+
ON CONFLICT (id) DO UPDATE SET
162+
"formId" = EXCLUDED."formId",
163+
"formName" = EXCLUDED."formName",
164+
"formTeamId" = EXCLUDED."formTeamId",
165+
"formUserId" = EXCLUDED."formUserId",
166+
"bookingUid" = EXCLUDED."bookingUid",
167+
"bookingId" = EXCLUDED."bookingId",
168+
"bookingStatus" = EXCLUDED."bookingStatus",
169+
"bookingStatusOrder" = EXCLUDED."bookingStatusOrder",
170+
"bookingCreatedAt" = EXCLUDED."bookingCreatedAt",
171+
"bookingStartTime" = EXCLUDED."bookingStartTime",
172+
"bookingEndTime" = EXCLUDED."bookingEndTime",
173+
"bookingUserId" = EXCLUDED."bookingUserId",
174+
"bookingUserName" = EXCLUDED."bookingUserName",
175+
"bookingUserEmail" = EXCLUDED."bookingUserEmail",
176+
"bookingUserAvatarUrl" = EXCLUDED."bookingUserAvatarUrl",
177+
"bookingAssignmentReason" = EXCLUDED."bookingAssignmentReason",
178+
"eventTypeId" = EXCLUDED."eventTypeId",
179+
"eventTypeParentId" = EXCLUDED."eventTypeParentId",
180+
"eventTypeSchedulingType" = EXCLUDED."eventTypeSchedulingType",
181+
"createdAt" = EXCLUDED."createdAt",
182+
"utm_source" = EXCLUDED."utm_source",
183+
"utm_medium" = EXCLUDED."utm_medium",
184+
"utm_campaign" = EXCLUDED."utm_campaign",
185+
"utm_term" = EXCLUDED."utm_term",
186+
"utm_content" = EXCLUDED."utm_content";
187+
188+
GET DIAGNOSTICS chunk_updated_count = ROW_COUNT;
189+
processed_count := processed_count + chunk_updated_count;
190+
191+
RAISE NOTICE 'Chunk processed: IDs %-% (updated/inserted: % records, total updated: %)',
192+
current_id, current_id + chunk_size - 1, chunk_updated_count, processed_count;
193+
END LOOP;
194+
195+
RAISE NOTICE 'Migration completed: processed up to ID % (total updated/inserted: % records out of % total records)',
196+
end_id, processed_count, total_count;
197+
END $$;
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
DO $$
2+
DECLARE
3+
chunk_size INTEGER := 1000;
4+
sleep_interval FLOAT := 1;
5+
start_id INTEGER := 1;
6+
end_id INTEGER;
7+
current_id INTEGER;
8+
response_record RECORD;
9+
processed_count INTEGER := 0;
10+
chunk_updated_count INTEGER := 0;
11+
total_count INTEGER;
12+
BEGIN
13+
-- Get the maximum ID and total count from the App_RoutingForms_FormResponse table
14+
SELECT COALESCE(MAX(id), 0) INTO end_id FROM "App_RoutingForms_FormResponse";
15+
SELECT COUNT(*) INTO total_count FROM "App_RoutingForms_FormResponse";
16+
17+
-- Handle case where there are no records to process
18+
IF total_count = 0 THEN
19+
RAISE NOTICE 'No records found in App_RoutingForms_FormResponse table. Migration completed.';
20+
RETURN;
21+
END IF;
22+
23+
RAISE NOTICE 'Starting migration: processing up to ID % (total responses: %)', end_id, total_count;
24+
25+
FOR current_id IN SELECT * FROM generate_series(start_id, end_id, chunk_size)
26+
LOOP
27+
-- Process only responses that have missing or incorrect field data
28+
FOR response_record IN
29+
WITH form_response_fields AS (
30+
-- Extract expected fields from the response data
31+
SELECT
32+
r.id as "responseId",
33+
r."formId",
34+
field->>'id' as "fieldId",
35+
field->>'type' as "fieldType",
36+
response_value->>'value' as raw_value,
37+
jsonb_typeof(response_value->'value') as value_type,
38+
-- Extract array values for multiselect
39+
CASE
40+
WHEN field->>'type' = 'multiselect' AND jsonb_typeof(response_value->'value') = 'array'
41+
THEN ARRAY(SELECT jsonb_array_elements_text(response_value->'value'))
42+
END as expected_array,
43+
-- Extract first element for select type when it's an array
44+
CASE
45+
WHEN field->>'type' = 'select' AND jsonb_typeof(response_value->'value') = 'array'
46+
THEN (response_value->'value'->0)::text
47+
WHEN field->>'type' = 'select' AND jsonb_typeof(response_value->'value') = 'string'
48+
THEN response_value->>'value'
49+
END as expected_select_value
50+
FROM "App_RoutingForms_FormResponse" r
51+
CROSS JOIN LATERAL jsonb_array_elements(
52+
(
53+
SELECT fields::jsonb
54+
FROM "App_RoutingForms_Form" f
55+
WHERE f.id = r."formId"
56+
)
57+
) as field
58+
CROSS JOIN LATERAL (
59+
SELECT r.response::jsonb->(field->>'id') as response_value
60+
) as rv
61+
WHERE r.id BETWEEN current_id AND current_id + chunk_size - 1
62+
AND r.response::jsonb ? (field->>'id') -- Only include fields that exist in response
63+
),
64+
responses_needing_update AS (
65+
SELECT DISTINCT f."responseId"
66+
FROM form_response_fields f
67+
LEFT JOIN "RoutingFormResponseField" rf ON
68+
rf."responseId" = f."responseId" AND
69+
rf."fieldId" = f."fieldId"
70+
WHERE
71+
-- Case 1: Field doesn't exist in denormalized table
72+
rf.id IS NULL OR
73+
-- Case 2: Multiselect field validation - compare actual array contents
74+
(f."fieldType" = 'multiselect' AND f.value_type = 'array' AND (
75+
rf."valueStringArray" IS NULL OR
76+
rf."valueStringArray" != f.expected_array OR
77+
array_length(rf."valueStringArray", 1) != array_length(f.expected_array, 1)
78+
)) OR
79+
-- Case 3: Number field validation - compare as numbers
80+
(f."fieldType" = 'number' AND f.value_type = 'number' AND (
81+
rf."valueNumber" IS NULL OR
82+
rf."valueNumber" != (f.raw_value)::decimal
83+
)) OR
84+
-- Case 4: Select field validation - compare expected select value
85+
(f."fieldType" = 'select' AND (
86+
rf."valueString" IS NULL OR
87+
rf."valueString" != f.expected_select_value
88+
)) OR
89+
-- Case 5: Other string field validation
90+
(f."fieldType" NOT IN ('multiselect', 'number', 'select') AND (
91+
rf."valueString" IS NULL OR
92+
rf."valueString" != f.raw_value
93+
))
94+
)
95+
SELECT r.id
96+
FROM responses_needing_update rnu
97+
INNER JOIN "App_RoutingForms_FormResponse" r ON r.id = rnu."responseId"
98+
ORDER BY r.id
99+
LOOP
100+
BEGIN
101+
-- Use the reprocess_routing_form_response_fields function
102+
PERFORM reprocess_routing_form_response_fields(response_record.id);
103+
chunk_updated_count := chunk_updated_count + 1;
104+
105+
EXCEPTION WHEN OTHERS THEN
106+
RAISE WARNING 'Failed to process responseId %: %', response_record.id, SQLERRM;
107+
CONTINUE;
108+
END;
109+
END LOOP;
110+
111+
processed_count := processed_count + chunk_updated_count;
112+
113+
RAISE NOTICE 'Chunk processed: IDs %-% (updated: % records, total updated: %)',
114+
current_id, current_id + chunk_size - 1, chunk_updated_count, processed_count;
115+
116+
-- Reset chunk counter for next iteration
117+
chunk_updated_count := 0;
118+
119+
-- Sleep after each chunk (outside transaction to avoid holding locks)
120+
PERFORM pg_sleep(sleep_interval);
121+
END LOOP;
122+
123+
RAISE NOTICE 'Migration completed: processed up to ID % (total updated: % records out of % total records)',
124+
end_id, processed_count, total_count;
125+
END $$;

0 commit comments

Comments
 (0)