Skip to content

Commit f2d01fe

Browse files
authored
feat: add migration model for edX.org to MITx Online program certificates (#2197)
* feat: add migration model for edX.org to MITx Online program certificates * improve user ID matching * add certificate_page_revision_id; feedback * feat: update migration model for program certificates with new fields * feat: normalize program titles in migration model and update certificate page revision logic
1 parent ff69dbe commit f2d01fe

2 files changed

Lines changed: 144 additions & 0 deletions

File tree

src/ol_dbt/models/migration/_migration__models.yml

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,47 @@ models:
107107
- name: user_country
108108
description: string, user address country
109109

110+
- name: edxorg_to_mitxonline_program_certificates
111+
description: edX.org MITx program certificates not yet migrated to MITx Online
112+
columns:
113+
- name: user_edxorg_id
114+
data_type: integer
115+
description: user ID on the edX.org platform.
116+
- name: user_mitxonline_id
117+
data_type: integer
118+
description: user ID on the MITx Online platform, if the user has a MITx Online
119+
account.
120+
- name: user_email
121+
data_type: varchar
122+
description: user email, preferring the MITx Online email when available, otherwise
123+
the edX.org email.
124+
- name: program_title
125+
data_type: varchar
126+
description: title of the program for which the certificate was awarded on edX.org.
127+
tests:
128+
- not_null
129+
- name: program_type
130+
data_type: varchar
131+
description: type of program (e.g. MicroMasters).
132+
- name: program_id
133+
data_type: integer
134+
description: program ID on MITx Online, populated when the program title matches
135+
an existing MITx Online program.
136+
- name: program_readable_id
137+
data_type: varchar
138+
description: readable ID of the program on MITx Online.
139+
- name: program_certificate_issued_on
140+
data_type: timestamp
141+
description: date and time when the program certificate was awarded on edX.org.
142+
- name: certificate_page_revision_id
143+
data_type: integer
144+
description: the live wagtail page revision ID for the program certificate page
145+
on MITx Online, used when creating the migrated certificate.
146+
tests:
147+
- dbt_expectations.expect_compound_columns_to_be_unique:
148+
arguments:
149+
column_list: ["user_email", "program_title"]
150+
110151
- name: edxorg_to_mitxonline_program_entitlements
111152
description: Program entitlements to program orders to be migrated to MITx Online
112153
columns:
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
with edxorg_program_certificates_raw as (
2+
select
3+
*
4+
, case
5+
when program_title = 'MIT Finance' then 'Finance'
6+
when program_title = 'Supply Chain Management' then 'MITx MicroMasters® Program in Supply Chain Management'
7+
else program_title
8+
end as normalized_program_title
9+
from {{ ref('int__edxorg__mitx_program_certificates') }}
10+
where user_username not like 'retired__user%'
11+
)
12+
13+
, edxorg_program_certificates as (
14+
select * from (
15+
select
16+
*
17+
, row_number() over (
18+
partition by user_id, normalized_program_title
19+
order by program_certificate_awarded_on
20+
) as row_num
21+
from edxorg_program_certificates_raw
22+
)
23+
where row_num = 1
24+
)
25+
26+
, mitxonline_programs as (
27+
select
28+
program_id
29+
, program_title
30+
, program_readable_id
31+
from {{ ref('int__mitxonline__programs') }}
32+
)
33+
34+
, mitx__users as (
35+
select * from {{ ref('int__mitx__users') }}
36+
)
37+
38+
, mitxonline_program_certificates as (
39+
select
40+
user_id
41+
, program_id
42+
, user_email
43+
from {{ ref('int__mitxonline__program_certificates') }}
44+
)
45+
46+
, program_pages as (
47+
select * from {{ ref('stg__mitxonline__app__postgres__cms_programpage') }}
48+
)
49+
50+
, wagtail_page as (
51+
select * from {{ ref('stg__mitxonline__app__postgres__cms_wagtail_page') }}
52+
)
53+
54+
, mitxonline_program_certificate_page as (
55+
select
56+
program_pages.program_id
57+
, min(certificate_page.wagtail_page_live_pagerevision_id) as certificate_page_revision_id
58+
from program_pages
59+
join wagtail_page
60+
on program_pages.wagtail_page_id = wagtail_page.wagtail_page_id
61+
join wagtail_page as certificate_page
62+
on certificate_page.wagtail_page_path like wagtail_page.wagtail_page_path || '%'
63+
and certificate_page.wagtail_page_path <> wagtail_page.wagtail_page_path
64+
and certificate_page.wagtail_page_slug like 'certificate%'
65+
group by program_pages.program_id
66+
)
67+
68+
select
69+
edxorg_program_certificates.user_id as user_edxorg_id
70+
, coalesce(mitx_users_by_email.user_mitxonline_id, mitx__users.user_mitxonline_id) as user_mitxonline_id
71+
, coalesce(
72+
mitx_users_by_email.user_mitxonline_email
73+
, mitx__users.user_mitxonline_email
74+
, mitx__users.user_edxorg_email
75+
) as user_email
76+
, edxorg_program_certificates.program_title
77+
, edxorg_program_certificates.program_type
78+
, mitxonline_programs.program_id
79+
, mitxonline_programs.program_readable_id
80+
, edxorg_program_certificates.program_certificate_awarded_on as program_certificate_issued_on
81+
, mitxonline_program_certificate_page.certificate_page_revision_id
82+
from edxorg_program_certificates
83+
left join mitxonline_programs
84+
on lower(mitxonline_programs.program_title) = lower(edxorg_program_certificates.normalized_program_title)
85+
left join mitx__users
86+
on edxorg_program_certificates.user_id = mitx__users.user_edxorg_id
87+
left join mitx__users as mitx_users_by_email
88+
on lower(mitx__users.user_edxorg_email) = lower(mitx_users_by_email.user_mitxonline_email)
89+
left join mitxonline_program_certificates
90+
on coalesce(mitx_users_by_email.user_mitxonline_id, mitx__users.user_mitxonline_id)
91+
= mitxonline_program_certificates.user_id
92+
and mitxonline_programs.program_id = mitxonline_program_certificates.program_id
93+
left join mitxonline_program_certificates as mitxonline_program_certificates_by_email
94+
on lower(
95+
coalesce(mitx_users_by_email.user_mitxonline_email, mitx__users.user_mitxonline_email)
96+
) = lower(mitxonline_program_certificates_by_email.user_email)
97+
and mitxonline_programs.program_id = mitxonline_program_certificates_by_email.program_id
98+
left join mitxonline_program_certificate_page
99+
on mitxonline_programs.program_id = mitxonline_program_certificate_page.program_id
100+
where
101+
--- Exclude certificates already present on MITx Online (matched by user ID or email)
102+
mitxonline_program_certificates.user_id is null
103+
and mitxonline_program_certificates_by_email.user_email is null

0 commit comments

Comments
 (0)