Skip to content

Commit 2a95020

Browse files
fix(ingestion): make strip_hostport_scheme public and validate JDBC port
- Renames _strip_hostport_scheme to public strip_hostport_scheme so cross-module imports (db_utils.py) no longer depend on a private symbol; private alias kept for back-compat. - Validates the port in the JDBC-style fallback branch so inputs like 'jdbc:postgresql://host:abc/db' raise ValueError with a clear message, matching the docstring contract instead of silently passing a broken hostPort downstream. - Extends test_clean_host_port with the new fallback raise cases.
1 parent 1623135 commit 2a95020

3 files changed

Lines changed: 33 additions & 5 deletions

File tree

ingestion/src/metadata/ingestion/models/custom_pydantic.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
JSON_ENCODERS = "json_encoders"
3636

3737

38-
def _strip_hostport_scheme(raw: str) -> str:
38+
def strip_hostport_scheme(raw: str) -> str:
3939
"""
4040
Strip an accidental URL scheme from a hostPort string.
4141
@@ -44,7 +44,9 @@ def _strip_hostport_scheme(raw: str) -> str:
4444
bootstrap path of every generated Connection class.
4545
4646
Raises ValueError if the scheme carries a non-numeric port so the user
47-
gets a clear error instead of a silently broken hostPort.
47+
gets a clear error instead of a silently broken hostPort. This applies to
48+
both standard URLs handled by ``urlparse`` and to JDBC-style URLs handled
49+
by the fallback branch (e.g. ``jdbc:postgresql://host:abc/db``).
4850
"""
4951
value = raw.strip()
5052
if "://" not in value:
@@ -78,12 +80,32 @@ def _strip_hostport_scheme(raw: str) -> str:
7880
tail = tail.split(sep, 1)[0]
7981
if "@" in tail:
8082
tail = tail.rsplit("@", 1)[-1]
83+
84+
# Validate the port in the fallback path so the same ValueError
85+
# contract holds for JDBC-style URLs (e.g. 'jdbc:postgresql://host:abc').
86+
fallback_port: Optional[str] = None
87+
if tail.startswith("["):
88+
closing = tail.find("]")
89+
if closing != -1 and len(tail) > closing + 1 and tail[closing + 1] == ":":
90+
fallback_port = tail[closing + 2 :]
91+
elif ":" in tail:
92+
fallback_port = tail.rsplit(":", 1)[1]
93+
if fallback_port and not fallback_port.isdigit():
94+
raise ValueError(
95+
f"Invalid hostPort '{safe_label}'. Expected format is "
96+
"'hostname[:port]' (e.g. 'localhost:3306')."
97+
)
8198
return tail
8299

83100
host = f"[{hostname}]" if ":" in hostname else hostname
84101
return f"{host}:{port}" if port is not None else host
85102

86103

104+
# Backwards-compatible private alias retained for any internal callers that
105+
# pinned to the original underscored symbol while the helper was private.
106+
_strip_hostport_scheme = strip_hostport_scheme
107+
108+
87109
class BaseModel(PydanticBaseModel):
88110
"""
89111
Base model for OpenMetadata generated models.
@@ -108,7 +130,7 @@ def model_post_init(self, context: Any, /):
108130
# Let ValueError propagate: if the hostPort cannot be parsed
109131
# (e.g. non-numeric port), the user must fix their config
110132
# rather than silently getting a broken hostPort.
111-
object.__setattr__(self, "hostPort", _strip_hostport_scheme(raw))
133+
object.__setattr__(self, "hostPort", strip_hostport_scheme(raw))
112134

113135
try:
114136
for field in self.__pydantic_fields__:

ingestion/src/metadata/utils/db_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
get_lineage_by_query,
3434
get_lineage_via_table_entity,
3535
)
36-
from metadata.ingestion.models.custom_pydantic import _strip_hostport_scheme
36+
from metadata.ingestion.models.custom_pydantic import strip_hostport_scheme
3737
from metadata.ingestion.ometa.ometa_api import OpenMetadata
3838
from metadata.ingestion.source.models import TableView
3939
from metadata.utils import fqn
@@ -59,7 +59,7 @@ def clean_host_port(host_port: str) -> str:
5959
value = host_port.strip()
6060
if "://" not in value:
6161
return value.rstrip("/")
62-
return _strip_hostport_scheme(value)
62+
return strip_hostport_scheme(value)
6363

6464

6565
def get_host_from_host_port(uri: str) -> str:

ingestion/tests/unit/test_db_utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,12 @@ def test_clean_host_port(self):
201201
with self.assertRaises(ValueError):
202202
clean_host_port("http://localhost:abc")
203203

204+
# Non-numeric port in JDBC-style fallback also raises ValueError
205+
with self.assertRaises(ValueError):
206+
clean_host_port("jdbc:postgresql://host:abc/db")
207+
with self.assertRaises(ValueError):
208+
clean_host_port("jdbc:postgresql://[::1]:abc")
209+
204210
@patch("metadata.utils.db_utils.ConnectionTypeDialectMapper")
205211
@patch("metadata.utils.db_utils.fqn")
206212
def test_get_view_lineage_success_with_lineage_parser(

0 commit comments

Comments
 (0)