Skip to content

Commit 89f78f2

Browse files
authored
Filter schemas by current catalog in Glue (#26908)
1 parent 8d23a35 commit 89f78f2

2 files changed

Lines changed: 32 additions & 1 deletion

File tree

ingestion/src/metadata/ingestion/source/database/glue/metadata.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,9 +185,12 @@ def get_database_schema_names(self) -> Iterable[str]:
185185
"""
186186
return schema names
187187
"""
188+
database_name = self.context.get().database
188189
for page in self._get_glue_database_and_schemas() or []:
189190
for schema in page.DatabaseList:
190191
try:
192+
if schema.CatalogId != database_name:
193+
continue
191194
schema_fqn = fqn.build(
192195
self.metadata,
193196
entity_type=DatabaseSchema,

ingestion/tests/unit/topology/database/test_glue.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,13 @@
3131
OpenMetadataWorkflowConfig,
3232
)
3333
from metadata.generated.schema.type.entityReference import EntityReference
34+
from metadata.generated.schema.type.filterPattern import FilterPattern
3435
from metadata.ingestion.source.database.glue.metadata import GlueSource
35-
from metadata.ingestion.source.database.glue.models import DatabasePage, TablePage
36+
from metadata.ingestion.source.database.glue.models import (
37+
DatabasePage,
38+
GlueSchema,
39+
TablePage,
40+
)
3641

3742
mock_file_path = (
3843
Path(__file__).parent.parent.parent / "resources/datasets/glue_db_dataset.json"
@@ -188,6 +193,29 @@ def test_database_schema_names(self):
188193
self.glue_source.get_database_schema_names()
189194
)
190195

196+
def test_database_schema_names_filters_other_catalogs_before_schema_filter(self):
197+
self.glue_source.source_config.schemaFilterPattern = FilterPattern(
198+
includes=["default"]
199+
)
200+
self.glue_source._get_glue_database_and_schemas = lambda: [
201+
DatabasePage(
202+
DatabaseList=[
203+
GlueSchema(
204+
CatalogId=MOCK_DATABASE.name.root,
205+
Name="default",
206+
Description="current catalog schema",
207+
),
208+
GlueSchema(
209+
CatalogId="different-catalog",
210+
Name="default",
211+
Description="other catalog schema",
212+
),
213+
]
214+
)
215+
]
216+
217+
assert ["default"] == list(self.glue_source.get_database_schema_names())
218+
191219
@patch("metadata.ingestion.source.database.glue.metadata.fqn")
192220
def test_table_names(self, fqn):
193221
fqn.build = mock_fqn_build

0 commit comments

Comments
 (0)