|
11 | 11 | OpenLineageJobFacets, |
12 | 12 | OpenLineageJobProcessingType, |
13 | 13 | OpenLineageJobTypeJobFacet, |
| 14 | + OpenLineageSqlJobFacet, |
14 | 15 | ) |
15 | 16 | from data_rentgen.consumer.openlineage.run import OpenLineageRun |
16 | 17 | from data_rentgen.consumer.openlineage.run_event import ( |
|
29 | 30 | from data_rentgen.dto.location import LocationDTO |
30 | 31 | from data_rentgen.dto.operation import OperationTypeDTO |
31 | 32 | from data_rentgen.dto.run import RunDTO |
| 33 | +from data_rentgen.dto.sql_query import SQLQueryDTO |
32 | 34 |
|
33 | 35 |
|
34 | 36 | def test_extractors_extract_operation_spark_job_no_details(): |
@@ -302,3 +304,62 @@ def test_extractors_extract_operation_spark_job_finished( |
302 | 304 | started_at=None, |
303 | 305 | ended_at=ended_at, |
304 | 306 | ) |
| 307 | + |
| 308 | + |
| 309 | +def test_extractors_extract_operation_spark_job_sql_query(): |
| 310 | + now = datetime(2024, 7, 5, 9, 6, 29, 462000, tzinfo=timezone.utc) |
| 311 | + run_id = UUID("01908224-8410-79a2-8de6-a769ad6944c9") |
| 312 | + operation_id = UUID("01908225-1fd7-746b-910c-70d24f2898b1") |
| 313 | + |
| 314 | + operation = OpenLineageRunEvent( |
| 315 | + eventType=OpenLineageRunEventType.START, |
| 316 | + eventTime=now, |
| 317 | + job=OpenLineageJob( |
| 318 | + namespace="anything", |
| 319 | + name="mysession.execute_some_command", |
| 320 | + facets=OpenLineageJobFacets( |
| 321 | + jobType=OpenLineageJobTypeJobFacet( |
| 322 | + processingType=OpenLineageJobProcessingType.BATCH, |
| 323 | + integration="SPARK", |
| 324 | + jobType="SQL_JOB", |
| 325 | + ), |
| 326 | + sql=OpenLineageSqlJobFacet(query="select id, name from schema.table where id = 1"), |
| 327 | + ), |
| 328 | + ), |
| 329 | + run=OpenLineageRun( |
| 330 | + runId=operation_id, |
| 331 | + facets=OpenLineageRunFacets( |
| 332 | + parent=OpenLineageParentRunFacet( |
| 333 | + job=OpenLineageParentJob( |
| 334 | + namespace="anything", |
| 335 | + name="mysession", |
| 336 | + ), |
| 337 | + run=OpenLineageParentRun( |
| 338 | + runId=run_id, |
| 339 | + ), |
| 340 | + ), |
| 341 | + ), |
| 342 | + ), |
| 343 | + ) |
| 344 | + assert extract_operation(operation) == OperationDTO( |
| 345 | + id=operation_id, |
| 346 | + run=RunDTO( |
| 347 | + id=run_id, |
| 348 | + job=JobDTO( |
| 349 | + name="mysession", |
| 350 | + location=LocationDTO( |
| 351 | + type="unknown", |
| 352 | + name="anything", |
| 353 | + addresses={"unknown://anything"}, |
| 354 | + ), |
| 355 | + ), |
| 356 | + ), |
| 357 | + name="execute_some_command", |
| 358 | + type=OperationTypeDTO.BATCH, |
| 359 | + position=None, |
| 360 | + description=None, |
| 361 | + status=OperationStatusDTO.STARTED, |
| 362 | + sql_query=SQLQueryDTO(query="select id, name from schema.table where id = 1"), |
| 363 | + started_at=now, |
| 364 | + ended_at=None, |
| 365 | + ) |
0 commit comments