Skip to content

Commit 2f8e760

Browse files
authored
feat: add program certificates to tfact_certificate and improve watermark (#2209)
* feat: enhance certificate model to support program and course distinctions * feat: add updated_on timestamp to certificate model for incremental processing * feat: update program_id handling in certificate SQL and improve documentation for issued date key * feat: rename certificate_type to certificate_scope for clarity in SQL queries * yml file update
1 parent 8f6148a commit 2f8e760

2 files changed

Lines changed: 124 additions & 13 deletions

File tree

src/ol_dbt/models/dimensional/_fact_tables.yml

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -257,12 +257,13 @@ models:
257257
- name: tfact_certificate
258258
description: >
259259
Transaction fact table for certificate issuance events.
260-
Grain: One row per certificate issued.
260+
Grain: One row per certificate issued (course or program).
261261
tests:
262262
- dbt_utils.unique_combination_of_columns:
263263
combination_of_columns:
264264
- certificate_id
265265
- platform
266+
- certificate_scope
266267
columns:
267268
- name: certificate_key
268269
description: Surrogate key for certificate
@@ -274,9 +275,9 @@ models:
274275
tests:
275276
- not_null
276277
- name: certificate_issued_date_key
277-
description: Foreign key to dim_date for certificate issuance date
278+
description: Foreign key to dim_date for certificate issuance date. Very small
279+
number of program certificates have null issued dates in edx.org source data
278280
tests:
279-
- not_null
280281
- relationships:
281282
to: ref('dim_date')
282283
field: date_key
@@ -290,11 +291,23 @@ models:
290291
config:
291292
severity: warn
292293
- name: courserun_fk
293-
description: Foreign key to dim_course_run
294+
description: Foreign key to dim_course_run (null for program certificates)
294295
tests:
295296
- relationships:
296297
to: ref('dim_course_run')
297298
field: courserun_pk
299+
where: "courserun_fk is not null"
300+
config:
301+
severity: warn
302+
- name: program_fk
303+
description: Foreign key to dim_program (null for course certificates)
304+
tests:
305+
- relationships:
306+
to: ref('dim_program')
307+
field: program_pk
308+
where: "program_fk is not null"
309+
config:
310+
severity: warn
298311
- name: platform_fk
299312
description: Foreign key to dim_platform
300313
tests:
@@ -312,6 +325,13 @@ models:
312325
description: Platform identifier string
313326
tests:
314327
- not_null
328+
- name: certificate_scope
329+
description: Scope of certificate - 'course' for course run certificates, 'program'
330+
for program certificates
331+
tests:
332+
- not_null
333+
- accepted_values:
334+
values: ['course', 'program']
315335
- name: certificate_uuid
316336
description: Universally unique identifier for certificate
317337
- name: certificate_is_revoked
@@ -320,3 +340,5 @@ models:
320340
- not_null
321341
- name: certificate_created_on
322342
description: Certificate creation timestamp (for incremental processing)
343+
- name: certificate_updated_on
344+
description: Certificate last update timestamp (for incremental processing)

src/ol_dbt/models/dimensional/tfact_certificate.sql

Lines changed: 98 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,11 @@ with mitxonline_certificates as (
1515
, courseruncertificate_uuid as certificate_uuid
1616
, courseruncertificate_is_revoked as certificate_is_revoked
1717
, courseruncertificate_created_on as certificate_created_on
18+
, courseruncertificate_updated_on as certificate_updated_on
1819
, 'verified' as certificate_type_code
1920
, 'mitxonline' as platform
2021
, cast(null as varchar) as user_email -- micromasters join key only
22+
, cast(null as varchar) as program_id
2123
from {{ ref('int__mitxonline__courserun_certificates') }}
2224
)
2325

@@ -30,9 +32,11 @@ with mitxonline_certificates as (
3032
, courseruncertificate_uuid as certificate_uuid
3133
, courseruncertificate_is_revoked as certificate_is_revoked
3234
, courseruncertificate_created_on as certificate_created_on
35+
, courseruncertificate_updated_on as certificate_updated_on
3336
, 'professional' as certificate_type_code
3437
, 'mitxpro' as platform
3538
, cast(null as varchar) as user_email -- micromasters join key only
39+
, cast(null as varchar) as program_id
3640
from {{ ref('int__mitxpro__courserun_certificates') }}
3741
)
3842

@@ -45,9 +49,11 @@ with mitxonline_certificates as (
4549
, cast(null as varchar) as certificate_uuid
4650
, false as certificate_is_revoked -- edxorg doesn't track revocations
4751
, courseruncertificate_created_on as certificate_created_on
52+
, courseruncertificate_updated_on as certificate_updated_on
4853
, courseruncertificate_mode as certificate_type_code
4954
, 'edxorg' as platform
5055
, cast(null as varchar) as user_email -- micromasters join key only
56+
, cast(null as varchar) as program_id
5157
from {{ ref('int__edxorg__mitx_courserun_certificates') }}
5258
)
5359

@@ -60,12 +66,65 @@ with mitxonline_certificates as (
6066
, courseruncertificate_uuid as certificate_uuid
6167
, false as certificate_is_revoked
6268
, courseruncertificate_created_on as certificate_created_on
63-
, 'honor' as certificate_type_code -- MicroMasters edxorg certs are honor mode
69+
, courseruncertificate_created_on as certificate_updated_on --- micromasters only has created_on timestamp
70+
, 'verified' as certificate_type_code
6471
, 'micromasters' as platform
6572
, user_email
73+
, cast(null as varchar) as program_id
6674
from {{ ref('int__micromasters__course_certificates') }}
6775
)
6876

77+
, mitxonline_program_certificates as (
78+
select
79+
cast(programcertificate_id as varchar) as certificate_id
80+
, user_id
81+
, cast(null as integer) as courserun_id
82+
, cast(null as varchar) as courserun_readable_id
83+
, programcertificate_uuid as certificate_uuid
84+
, programcertificate_is_revoked as certificate_is_revoked
85+
, programcertificate_created_on as certificate_created_on
86+
, programcertificate_updated_on as certificate_updated_on
87+
, 'verified' as certificate_type_code
88+
, 'mitxonline' as platform
89+
, user_email
90+
, cast(program_id as varchar) as program_id
91+
from {{ ref('int__mitxonline__program_certificates') }}
92+
)
93+
94+
, mitxpro_program_certificates as (
95+
select
96+
cast(programcertificate_id as varchar) as certificate_id
97+
, user_id
98+
, cast(null as integer) as courserun_id
99+
, cast(null as varchar) as courserun_readable_id
100+
, programcertificate_uuid as certificate_uuid
101+
, programcertificate_is_revoked as certificate_is_revoked
102+
, programcertificate_created_on as certificate_created_on
103+
, programcertificate_updated_on as certificate_updated_on
104+
, 'professional' as certificate_type_code
105+
, 'mitxpro' as platform
106+
, user_email
107+
, cast(program_id as varchar) as program_id
108+
from {{ ref('int__mitxpro__program_certificates') }}
109+
)
110+
111+
, edxorg_program_certificates as (
112+
select
113+
program_certificate_hashed_id as certificate_id
114+
, user_id
115+
, cast(null as integer) as courserun_id
116+
, cast(null as varchar) as courserun_readable_id
117+
, program_certificate_hashed_id as certificate_uuid
118+
, false as certificate_is_revoked
119+
, program_certificate_awarded_on as certificate_created_on
120+
, program_certificate_awarded_on as certificate_updated_on -- edxorg only has awarded_on timestamp
121+
, 'verified' as certificate_type_code
122+
, 'edxorg' as platform
123+
, cast(null as varchar) as user_email -- micromasters join key only
124+
, program_uuid as program_id -- matches dim_program.source_id as edxorg doesn't have integer program IDs
125+
from {{ ref('int__edxorg__mitx_program_certificates') }}
126+
)
127+
69128
, combined_certificates as (
70129
select * from mitxonline_certificates
71130
union all
@@ -74,6 +133,12 @@ with mitxonline_certificates as (
74133
select * from edxorg_certificates
75134
union all
76135
select * from micromasters_certificates
136+
union all
137+
select * from mitxonline_program_certificates
138+
union all
139+
select * from mitxpro_program_certificates
140+
union all
141+
select * from edxorg_program_certificates
77142
)
78143

79144
, user_lookup as (
@@ -104,6 +169,11 @@ with mitxonline_certificates as (
104169
from {{ ref('dim_certificate_type') }}
105170
)
106171

172+
, dim_program as (
173+
select program_pk, source_id, platform_code
174+
from {{ ref('dim_program') }}
175+
)
176+
107177
, certificates_with_fks as (
108178
select
109179
combined_certificates.*
@@ -124,6 +194,8 @@ with mitxonline_certificates as (
124194
, dim_course_run.courserun_pk as courserun_fk
125195
, dim_platform_lookup.platform_pk as platform_fk
126196
, dim_certificate_type.certificate_type_pk as certificate_type_fk
197+
, dim_program.program_pk as program_fk
198+
, case when combined_certificates.program_id is not null then 'program' else 'course' end as certificate_scope
127199
, {{ iso8601_to_date_key('certificate_created_on') }} as certificate_issued_date_key
128200
from combined_certificates
129201
left join user_lookup as ul_mitxonline
@@ -151,6 +223,9 @@ with mitxonline_certificates as (
151223
on combined_certificates.platform = dim_platform_lookup.platform_readable_id
152224
left join dim_certificate_type
153225
on dim_certificate_type.certificate_type_code = combined_certificates.certificate_type_code
226+
left join dim_program
227+
on combined_certificates.program_id = dim_program.source_id
228+
and combined_certificates.platform = dim_program.platform_code
154229
)
155230

156231
{% if is_incremental() %}
@@ -160,36 +235,44 @@ with mitxonline_certificates as (
160235
-- StreamingAggregate, producing a 25B-row intermediate at 7TB.
161236
-- A pre-computed CTE + regular equijoin eliminates that fan-out entirely.
162237
, incremental_watermarks as (
163-
select platform as watermark_platform, max(certificate_created_on) as max_created_on
238+
select
239+
platform as watermark_platform
240+
, certificate_scope as watermark_certificate_type
241+
, max(coalesce(certificate_updated_on, certificate_created_on)) as max_activity_on
164242
from {{ this }}
165-
group by platform
166-
)
243+
group by platform, certificate_scope)
167244
{% endif %}
168245

169246
, final as (
170247
select
171248
{{ dbt_utils.generate_surrogate_key([
172249
'cast(certificate_id as varchar)',
173-
'platform'
250+
'platform',
251+
'certificate_scope'
174252
]) }} as certificate_key
175253
, certificate_id
176254
, certificate_issued_date_key
177255
, user_fk
178256
, courserun_fk
257+
, program_fk
179258
, platform_fk
180259
, certificate_type_fk
181260
, cwf.platform
261+
, cwf.certificate_scope
182262
, certificate_uuid
183263
, certificate_is_revoked
184264
, certificate_created_on
265+
, certificate_updated_on
185266
from certificates_with_fks as cwf
186267

187268
{% if is_incremental() %}
188269
-- left join preserves certificates from platforms not yet in the target table
189-
left join incremental_watermarks w on w.watermark_platform = cwf.platform
270+
left join incremental_watermarks w
271+
on w.watermark_platform = cwf.platform
272+
and w.watermark_certificate_type = cwf.certificate_scope
190273
where (
191-
w.max_created_on is null -- platform not yet in target, include all
192-
or cwf.certificate_created_on > w.max_created_on
274+
w.max_created_on is null -- platform/type not yet in target, include all
275+
or coalesce(cwf.certificate_updated_on, cwf.certificate_created_on) >= w.max_activity_on
193276
or cwf.certificate_created_on is null
194277
)
195278
{% endif %}
@@ -206,15 +289,18 @@ with mitxonline_certificates as (
206289
, certificate_issued_date_key
207290
, user_fk
208291
, courserun_fk
292+
, program_fk
209293
, platform_fk
210294
, certificate_type_fk
211295
, platform
296+
, certificate_scope
212297
, certificate_uuid
213298
, certificate_is_revoked
214299
, certificate_created_on
300+
, certificate_updated_on
215301
, row_number() over (
216302
partition by certificate_key
217-
order by certificate_created_on desc nulls last
303+
order by coalesce(certificate_updated_on, certificate_created_on) desc nulls last
218304
) as _row_num
219305
from final
220306
)
@@ -225,11 +311,14 @@ select
225311
, certificate_issued_date_key
226312
, user_fk
227313
, courserun_fk
314+
, program_fk
228315
, platform_fk
229316
, certificate_type_fk
230317
, platform
318+
, certificate_scope
231319
, certificate_uuid
232320
, certificate_is_revoked
233321
, certificate_created_on
322+
, certificate_updated_on
234323
from final_deduped
235324
where _row_num = 1

0 commit comments

Comments
 (0)