|
12 | 12 | """ |
13 | 13 | Unit tests for db_utils module |
14 | 14 | """ |
| 15 | + |
15 | 16 | import uuid |
16 | 17 | from copy import deepcopy |
17 | 18 | from unittest import TestCase |
|
36 | 37 | from metadata.ingestion.lineage.models import Dialect |
37 | 38 | from metadata.ingestion.lineage.sql_lineage import search_cache |
38 | 39 | from metadata.ingestion.source.models import TableView |
39 | | -from metadata.utils.db_utils import get_host_from_host_port, get_view_lineage |
| 40 | +from metadata.utils.db_utils import ( |
| 41 | + clean_host_port, |
| 42 | + get_host_from_host_port, |
| 43 | + get_view_lineage, |
| 44 | +) |
40 | 45 |
|
41 | 46 |
|
42 | 47 | # Mock LineageTable class to simulate collate_sqllineage.core.models.Table |
@@ -118,6 +123,84 @@ def test_get_host_from_host_port(self): |
118 | 123 | self.assertEqual(get_host_from_host_port("localhost"), "localhost") |
119 | 124 | self.assertEqual(get_host_from_host_port("example.com"), "example.com") |
120 | 125 |
|
| 126 | + # Test with URL scheme prefixes |
| 127 | + self.assertEqual(get_host_from_host_port("http://localhost:3306"), "localhost") |
| 128 | + self.assertEqual( |
| 129 | + get_host_from_host_port("https://example.com:5432"), "example.com" |
| 130 | + ) |
| 131 | + self.assertEqual(get_host_from_host_port("http://localhost"), "localhost") |
| 132 | + |
| 133 | + # Test with IPv6 addresses |
| 134 | + self.assertEqual(get_host_from_host_port("http://[::1]:3306"), "[::1]") |
| 135 | + self.assertEqual(get_host_from_host_port("[::1]:3306"), "[::1]") |
| 136 | + |
| 137 | + def test_clean_host_port(self): |
| 138 | + """Test clean_host_port strips URL scheme prefixes""" |
| 139 | + # Already-clean values pass through unchanged |
| 140 | + self.assertEqual(clean_host_port("localhost:3306"), "localhost:3306") |
| 141 | + self.assertEqual(clean_host_port("127.0.0.1:5432"), "127.0.0.1:5432") |
| 142 | + self.assertEqual(clean_host_port("example.com"), "example.com") |
| 143 | + |
| 144 | + # HTTP prefix is stripped |
| 145 | + self.assertEqual(clean_host_port("http://localhost:3306"), "localhost:3306") |
| 146 | + self.assertEqual(clean_host_port("http://example.com:8080"), "example.com:8080") |
| 147 | + |
| 148 | + # HTTPS prefix is stripped |
| 149 | + self.assertEqual(clean_host_port("https://localhost:5432"), "localhost:5432") |
| 150 | + self.assertEqual( |
| 151 | + clean_host_port("https://mydb.example.com:3306"), "mydb.example.com:3306" |
| 152 | + ) |
| 153 | + |
| 154 | + # Trailing slash is stripped |
| 155 | + self.assertEqual(clean_host_port("http://localhost:3306/"), "localhost:3306") |
| 156 | + |
| 157 | + # Host only with scheme |
| 158 | + self.assertEqual(clean_host_port("http://localhost"), "localhost") |
| 159 | + self.assertEqual(clean_host_port("https://example.com"), "example.com") |
| 160 | + |
| 161 | + # URL with path is handled — path/query/fragment are discarded |
| 162 | + self.assertEqual(clean_host_port("http://localhost:3306/db"), "localhost:3306") |
| 163 | + self.assertEqual( |
| 164 | + clean_host_port("https://example.com:5432/mydb?ssl=true"), |
| 165 | + "example.com:5432", |
| 166 | + ) |
| 167 | + |
| 168 | + # Whitespace is stripped |
| 169 | + self.assertEqual(clean_host_port(" localhost:3306 "), "localhost:3306") |
| 170 | + self.assertEqual(clean_host_port(" http://localhost:3306 "), "localhost:3306") |
| 171 | + |
| 172 | + # JDBC-style URLs fall back to raw extraction |
| 173 | + self.assertEqual(clean_host_port("jdbc:postgresql://host:5432"), "host:5432") |
| 174 | + self.assertEqual(clean_host_port("jdbc:postgresql://host:5432/db"), "host:5432") |
| 175 | + self.assertEqual( |
| 176 | + clean_host_port("jdbc:postgresql://host:5432?ssl=true"), "host:5432" |
| 177 | + ) |
| 178 | + self.assertEqual( |
| 179 | + clean_host_port("jdbc:postgresql://host:5432/db?ssl=true#ref"), |
| 180 | + "host:5432", |
| 181 | + ) |
| 182 | + |
| 183 | + # IPv6 addresses — brackets are preserved |
| 184 | + self.assertEqual(clean_host_port("http://[::1]:3306"), "[::1]:3306") |
| 185 | + self.assertEqual(clean_host_port("https://[::1]:5432"), "[::1]:5432") |
| 186 | + self.assertEqual(clean_host_port("http://[::1]"), "[::1]") |
| 187 | + self.assertEqual( |
| 188 | + clean_host_port("http://[2001:db8::1]:3306"), "[2001:db8::1]:3306" |
| 189 | + ) |
| 190 | + |
| 191 | + # Plain IPv6 without scheme passes through unchanged |
| 192 | + self.assertEqual(clean_host_port("[::1]:3306"), "[::1]:3306") |
| 193 | + |
| 194 | + # JDBC with userinfo — credentials are stripped |
| 195 | + self.assertEqual( |
| 196 | + clean_host_port("jdbc:postgresql://user:pass@host:5432/db"), |
| 197 | + "host:5432", |
| 198 | + ) |
| 199 | + |
| 200 | + # Invalid port raises ValueError |
| 201 | + with self.assertRaises(ValueError): |
| 202 | + clean_host_port("http://localhost:abc") |
| 203 | + |
121 | 204 | @patch("metadata.utils.db_utils.ConnectionTypeDialectMapper") |
122 | 205 | @patch("metadata.utils.db_utils.fqn") |
123 | 206 | def test_get_view_lineage_success_with_lineage_parser( |
|
0 commit comments