Skip to content

Commit 6f851c1

Browse files
authored
[DOP-23676] Add external_id and external_url for Datasets (#432)
* [DOP-23676] add external_id and external_url for Dataset * [DOP-23676] add external_id and external_url for Dataset * [DOP-23676] fixes
1 parent 77d1818 commit 6f851c1

10 files changed

Lines changed: 79 additions & 0 deletions

File tree

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# SPDX-FileCopyrightText: 2024-present MTS PJSC
2+
# SPDX-License-Identifier: Apache-2.0
3+
"""Add external_url and external_id for datasets
4+
5+
Revision ID: 947c82ba59ba
6+
Revises: 4e119cb7481e
7+
Create Date: 2026-04-07 14:16:26.411705
8+
9+
"""
10+
11+
import sqlalchemy as sa
12+
from alembic import op
13+
14+
# revision identifiers, used by Alembic.
15+
revision = "947c82ba59ba"
16+
down_revision = "4e119cb7481e"
17+
branch_labels = None
18+
depends_on = None
19+
20+
21+
def upgrade() -> None:
22+
op.add_column("dataset", sa.Column("external_id", sa.String(), nullable=True))
23+
op.add_column("dataset", sa.Column("external_url", sa.String(), nullable=True))
24+
25+
26+
def downgrade() -> None:
27+
op.drop_column("dataset", "external_url")
28+
op.drop_column("dataset", "external_id")

data_rentgen/db/models/dataset.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,16 @@ class Dataset(Base):
5050
lazy="noload",
5151
doc="Dataset tag values",
5252
)
53+
external_id: Mapped[str | None] = mapped_column(
54+
String,
55+
nullable=True,
56+
doc="External ID for integration with other systems",
57+
)
58+
external_url: Mapped[str | None] = mapped_column(
59+
String,
60+
nullable=True,
61+
doc="External link to other systems",
62+
)
5363

5464
search_vector: Mapped[str] = mapped_column(
5565
TSVECTOR,

data_rentgen/server/schemas/v1/dataset.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ class DatasetResponseV1(BaseModel):
3434
id: str = Field(description="Dataset id", coerce_numbers_to_str=True)
3535
location: LocationResponseV1 = Field(description="Corresponding Location")
3636
name: str = Field(description="Dataset name")
37+
external_id: str | None = Field(description="External ID for integration with other systems")
38+
external_url: str | None = Field(description="Link to dataset in a external system")
3739
schema: DatasetSchemaV1 | None = Field( # type: ignore[assignment]
3840
description="Schema",
3941
default=None,

data_rentgen/server/services/dataset.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ class DatasetData:
1818
id: int
1919
name: str
2020
location: Location
21+
external_id: str | None
22+
external_url: str | None
2123

2224

2325
@dataclass
@@ -66,6 +68,8 @@ async def paginate(
6668
id=dataset.id,
6769
name=dataset.name,
6870
location=dataset.location,
71+
external_id=dataset.external_id,
72+
external_url=dataset.external_url,
6973
),
7074
tags=[
7175
TagData(

data_rentgen/server/utils/lineage_response.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,8 @@ def _get_datasets(
268268
id=str(dataset.id),
269269
location=LocationResponseV1.model_validate(dataset.location),
270270
name=dataset.name,
271+
external_id=dataset.external_id,
272+
external_url=dataset.external_url,
271273
schema=schema,
272274
)
273275
return datasets
@@ -330,6 +332,8 @@ def _get_datasets_with_dataset_granularity(
330332
id=str(dataset.id),
331333
location=LocationResponseV1.model_validate(dataset.location),
332334
name=dataset.name,
335+
external_id=dataset.external_id,
336+
external_url=dataset.external_url,
333337
schema=schema,
334338
)
335339
return datasets
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added optional ``external_id`` and ``external_url`` fields on datasets (database, API responses) for linking datasets to external systems.

tests/test_server/fixtures/factories/dataset.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ def dataset_factory(**kwargs):
2424
"id": randint(0, 10000000),
2525
"location_id": randint(0, 10000000),
2626
"name": random_string(32),
27+
"external_id": random_string(),
28+
"external_url": None,
2729
}
2830

2931
data.update(kwargs)

tests/test_server/test_lineage/test_column_lineage.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1342,6 +1342,8 @@ async def test_get_dataset_lineage_with_granularity_dataset_and_column_lineage(
13421342
"name": dataset.name,
13431343
"location": location_to_json(dataset.location),
13441344
"schema": schema_to_json(lineage.outputs[0].schema, "EXACT_MATCH"),
1345+
"external_id": dataset.external_id,
1346+
"external_url": dataset.external_url,
13451347
}
13461348
for dataset in datasets
13471349
},
@@ -1482,6 +1484,8 @@ async def test_get_dataset_lineage_with_granularity_dataset_and_column_lineage_f
14821484
"name": dataset.name,
14831485
"location": location_to_json(dataset.location),
14841486
"schema": schema_to_json(lineage.outputs[0].schema, "EXACT_MATCH"),
1487+
"external_id": dataset.external_id,
1488+
"external_url": dataset.external_url,
14851489
}
14861490
for dataset in datasets
14871491
},

tests/test_server/test_lineage/test_dataset_lineage.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,8 @@ async def test_get_dataset_lineage_with_granularity_dataset(
341341
"name": dataset.name,
342342
"location": location_to_json(dataset.location),
343343
"schema": schema_to_json(lineage.inputs[0].schema, "EXACT_MATCH"),
344+
"external_id": dataset.external_id,
345+
"external_url": dataset.external_url,
344346
}
345347
for dataset in datasets
346348
},
@@ -417,6 +419,8 @@ async def test_get_dataset_lineage_with_granularity_dataset_and_direction(
417419
"name": dataset.name,
418420
"location": location_to_json(dataset.location),
419421
"schema": schema_to_json(lineage.inputs[0].schema, "EXACT_MATCH"),
422+
"external_id": dataset.external_id,
423+
"external_url": dataset.external_url,
420424
}
421425
for dataset in datasets
422426
},
@@ -485,6 +489,8 @@ async def test_get_dataset_lineage_with_granularity_dataset_and_depth(
485489
"name": dataset.name,
486490
"location": location_to_json(dataset.location),
487491
"schema": schema_to_json(lineage.inputs[0].schema, "EXACT_MATCH"),
492+
"external_id": dataset.external_id,
493+
"external_url": dataset.external_url,
488494
}
489495
for dataset in datasets
490496
},
@@ -563,6 +569,8 @@ async def test_get_dataset_lineage_with_granularity_dataset_and_symlinks(
563569
if dataset.id in inputs_by_dataset_id or dataset.id in outputs_by_dataset_id
564570
else None
565571
),
572+
"external_id": dataset.external_id,
573+
"external_url": dataset.external_url,
566574
}
567575
for dataset in datasets
568576
},
@@ -633,6 +641,8 @@ async def test_get_dataset_lineage_with_granularity_dataset_and_until(
633641
"name": dataset.name,
634642
"location": location_to_json(dataset.location),
635643
"schema": schema_to_json(lineage.inputs[0].schema, "EXACT_MATCH"),
644+
"external_id": dataset.external_id,
645+
"external_url": dataset.external_url,
636646
}
637647
for dataset in datasets
638648
},
@@ -1495,6 +1505,8 @@ async def test_get_dataset_lineage_unmergeable_schema_and_output_type(
14951505
"name": dataset.name,
14961506
"location": location_to_json(dataset.location),
14971507
"schema": schema_to_json(response_schema, "LATEST_KNOWN"),
1508+
"external_id": dataset.external_id,
1509+
"external_url": dataset.external_url,
14981510
},
14991511
},
15001512
"jobs": jobs_to_json(jobs),
@@ -1685,18 +1697,24 @@ async def test_get_dataset_lineage_with_granularity_dataset_without_output_schem
16851697
"name": lineage_dataset.name,
16861698
"location": location_to_json(lineage_dataset.location),
16871699
"schema": schema_to_json(response_schema, "EXACT_MATCH"),
1700+
"external_id": lineage_dataset.external_id,
1701+
"external_url": lineage_dataset.external_url,
16881702
},
16891703
str(datasets[0].id): {
16901704
"id": str(datasets[0].id),
16911705
"name": datasets[0].name,
16921706
"location": location_to_json(datasets[0].location),
16931707
"schema": schema_to_json(lineage.inputs[0].schema, "EXACT_MATCH"),
1708+
"external_id": datasets[0].external_id,
1709+
"external_url": datasets[0].external_url,
16941710
},
16951711
str(datasets[2].id): {
16961712
"id": str(datasets[2].id),
16971713
"name": datasets[2].name,
16981714
"location": location_to_json(datasets[2].location),
16991715
"schema": schema_to_json(lineage.inputs[0].schema, "EXACT_MATCH"),
1716+
"external_id": datasets[2].external_id,
1717+
"external_url": datasets[2].external_url,
17001718
},
17011719
},
17021720
"jobs": {},
@@ -1749,6 +1767,8 @@ async def test_get_dataset_lineage_with_granularity_dataset_ignore_self_referenc
17491767
"name": dataset.name,
17501768
"location": location_to_json(dataset.location),
17511769
"schema": None,
1770+
"external_id": dataset.external_id,
1771+
"external_url": dataset.external_url,
17521772
},
17531773
},
17541774
"jobs": {},
@@ -1802,6 +1822,8 @@ async def test_get_dataset_lineage_with_granularity_dataset_ignore_not_connected
18021822
"name": dataset.name,
18031823
"location": location_to_json(dataset.location),
18041824
"schema": None,
1825+
"external_id": dataset.external_id,
1826+
"external_url": dataset.external_url,
18051827
},
18061828
},
18071829
"jobs": {},

tests/test_server/utils/convert_to_json.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,8 @@ def dataset_to_json(
232232
"name": dataset.name,
233233
"location": location_to_json(dataset.location),
234234
"schema": schema,
235+
"external_id": dataset.external_id,
236+
"external_url": dataset.external_url,
235237
}
236238

237239

0 commit comments

Comments
 (0)