Skip to content

Commit e873c9d

Browse files
ulixius9claude
andauthored
MINOR: Address review comments from PR #27236 (#27255)
- Extract URL credential sanitization to generic `sanitize_url_credentials` in logger utils - Fix misleading log prefix `GitHubCloneReader::_clone` → `_clone_repo` - Add BigQuery INFORMATION_SCHEMA context to `split_table_name` comment - Add unit tests for `sanitize_url_credentials` Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 5e14164 commit e873c9d

4 files changed

Lines changed: 33 additions & 7 deletions

File tree

ingestion/src/metadata/ingestion/source/dashboard/looker/utils.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
"""
1515

1616
import os
17-
import re
1817
import shutil
1918
from typing import Optional, Union
2019

@@ -32,7 +31,7 @@
3231
from metadata.generated.schema.security.credentials.gitlabCredentials import (
3332
GitlabCredentials,
3433
)
35-
from metadata.utils.logger import ingestion_logger
34+
from metadata.utils.logger import ingestion_logger, sanitize_url_credentials
3635

3736
logger = ingestion_logger()
3837

@@ -102,5 +101,5 @@ def _clone_repo(
102101

103102
logger.info(f"repo {repo_name} cloned to {path}")
104103
except Exception as exc:
105-
sanitized_msg = re.sub(r"https://[^@]+@", "https://****@", str(exc))
106-
logger.error(f"GitHubCloneReader::_clone: ERROR {sanitized_msg}")
104+
sanitized_msg = sanitize_url_credentials(str(exc))
105+
logger.error(f"_clone_repo: ERROR {sanitized_msg}")

ingestion/src/metadata/utils/fqn.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -699,8 +699,9 @@ def split_table_name(table_name: str) -> Dict[str, Optional[str]]:
699699
# Revisit: Check the antlr grammer for issue when string has double quotes
700700
# Issue Link: https://github.com/open-metadata/OpenMetadata/issues/8874
701701
details: List[str] = split(table_name.replace('"', ""))
702-
# Pad None to the left until size of list is 3
703-
# If more than 3 parts, take only the last 3 (database, schema, table)
702+
# Handles table names with 4+ parts (e.g., BigQuery INFORMATION_SCHEMA:
703+
# `project-name.region-name.INFORMATION_SCHEMA.table_name`) by taking only
704+
# the last 3 segments (database, schema, table). Pads with None if fewer than 3.
704705
full_details: List[Optional[str]] = ([None] * max(0, 3 - len(details))) + details[
705706
-3:
706707
]

ingestion/src/metadata/utils/logger.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
"""
1414

1515
import logging
16+
import re
1617
from copy import deepcopy
1718
from enum import Enum
1819
from functools import singledispatch
@@ -354,6 +355,11 @@ def emit(self, record: logging.LogRecord) -> None:
354355
self.handleError(record)
355356

356357

358+
def sanitize_url_credentials(message: str) -> str:
359+
"""Mask credentials embedded in URLs (e.g., https://token@host)"""
360+
return re.sub(r"https://[^@]+@", "https://****@", message)
361+
362+
357363
def redacted_config(config: Dict[str, Union[str, dict]]) -> Dict[str, Union[str, dict]]:
358364
config_copy = deepcopy(config)
359365

ingestion/tests/unit/utils/test_logger.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from metadata.utils.logger import redacted_config
1+
from metadata.utils.logger import redacted_config, sanitize_url_credentials
22

33

44
def test_safe_config_logger():
@@ -29,3 +29,23 @@ def test_safe_config_logger():
2929
},
3030
}
3131
assert result == expected
32+
33+
34+
def test_sanitize_url_credentials():
35+
assert (
36+
sanitize_url_credentials("https://my_secret_pat@dev.azure.com/org/repo")
37+
== "https://****@dev.azure.com/org/repo"
38+
)
39+
assert (
40+
sanitize_url_credentials(
41+
"https://x-oauth-basic:token123@github.com/owner/repo.git"
42+
)
43+
== "https://****@github.com/owner/repo.git"
44+
)
45+
assert (
46+
sanitize_url_credentials(
47+
"https://x-token-auth:secret@gitlab.com/owner/repo.git"
48+
)
49+
== "https://****@gitlab.com/owner/repo.git"
50+
)
51+
assert sanitize_url_credentials("no url here") == "no url here"

0 commit comments

Comments
 (0)