Skip to content

Commit c35ecc3

Browse files
committed
wip
1 parent 6447f41 commit c35ecc3

10 files changed

Lines changed: 97 additions & 155 deletions

File tree

.vscode/settings.json

Lines changed: 0 additions & 7 deletions
This file was deleted.

requirements.txt

Lines changed: 30 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,47 @@
11
babel==2.17.0
2-
backrefs==5.8
3-
certifi==2025.1.31
4-
charset-normalizer==3.4.1
5-
click==8.1.8
2+
backrefs==5.9
3+
certifi==2025.10.5
4+
charset-normalizer==3.4.3
5+
click==8.3.0
66
colorama==0.4.6
7-
duckdb==1.2.1
7+
duckdb==1.4.1
88
duckdb_engine==0.17.0
9-
elementpath==4.8.0
9+
elementpath==5.0.4
1010
ghp-import==2.1.0
11-
greenlet==3.1.1
12-
griffe==1.7.2
11+
greenlet==3.2.4
12+
griffe==1.14.0
1313
idna==3.10
1414
iniconfig==2.1.0
1515
Jinja2==3.1.6
16-
lxml==5.3.2
17-
Markdown==3.7
18-
MarkupSafe==3.0.2
16+
lxml==6.0.2
17+
Markdown==3.9
18+
MarkupSafe==3.0.3
1919
mergedeep==1.3.4
2020
mkdocs==1.6.1
21-
mkdocs-autorefs==1.4.1
21+
mkdocs-autorefs==1.4.3
2222
mkdocs-get-deps==0.2.0
23-
mkdocs-material==9.6.11
23+
mkdocs-material==9.6.21
2424
mkdocs-material-extensions==1.3.1
25-
mkdocstrings==0.29.1
26-
mkdocstrings-python==1.16.10
27-
packaging==24.2
25+
mkdocstrings==0.30.1
26+
mkdocstrings-python==1.18.2
27+
packaging==25.0
2828
paginate==0.5.7
2929
pathspec==0.12.1
30-
platformdirs==4.3.7
31-
pluggy==1.5.0
32-
psycopg2-binary==2.9.10
33-
Pygments==2.19.1
34-
pymdown-extensions==10.14.3
35-
PyMySQL==1.1.1
30+
platformdirs==4.4.0
31+
pluggy==1.6.0
32+
psycopg2==2.9.10
33+
Pygments==2.19.2
34+
pymdown-extensions==10.16.1
35+
PyMySQL==1.1.2
3636
pyodbc==5.2.0
37-
pytest==8.3.5
37+
pytest==8.4.2
3838
python-dateutil==2.9.0.post0
39-
PyYAML==6.0.2
40-
pyyaml_env_tag==0.1
41-
requests==2.32.3
39+
PyYAML==6.0.3
40+
pyyaml_env_tag==1.1
41+
requests==2.32.5
4242
six==1.17.0
43-
SQLAlchemy==2.0.40
44-
typing_extensions==4.13.1
45-
tzlocal==5.3.1
46-
urllib3==2.3.0
43+
SQLAlchemy==2.0.43
44+
typing_extensions==4.15.0
45+
urllib3==2.5.0
4746
watchdog==6.0.0
48-
xmlschema==3.4.5
47+
xmlschema==4.1.0

src/xml2db/model.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -196,8 +196,12 @@ def _create_table_model(
196196
A data model instance.
197197
"""
198198
table_config = self.tables_config.get(table_name, {})
199-
table_config["shorten_temp_table_names"] = self.model_config["shorten_temp_table_names"]
200-
table_config["shorten_rel_table_names"] = self.model_config["shorten_rel_table_names"]
199+
table_config["shorten_temp_table_names"] = self.model_config[
200+
"shorten_temp_table_names"
201+
]
202+
table_config["shorten_rel_table_names"] = self.model_config[
203+
"shorten_rel_table_names"
204+
]
201205
if table_config.get("reuse", True):
202206
return DataModelTableReused(
203207
table_name,

src/xml2db/table/duplicated_table.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,12 @@ def get_col(temp=False) -> Iterable[Column]:
119119
)
120120
)
121121

122-
temp_table_name = f"{prefix}{self.name}"
123-
temp_table_name = self.truncate_long_name(temp_table_name) if self.config.get("shorten_temp_table_names") else temp_table_name
122+
temp_table_name = f"{prefix}{self.name}"
123+
temp_table_name = (
124+
self.truncate_long_name(temp_table_name)
125+
if self.config.get("shorten_table_names")
126+
else temp_table_name
127+
)
124128

125129
# build temporary table
126130
self.temp_table = Table(

src/xml2db/table/relations.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -101,9 +101,13 @@ def build_relation_tables(self) -> None:
101101
)
102102
prefix = f"temp_{self.table.temp_prefix}_"
103103
if self.other_table.is_reused:
104-
temp_table_name = f"{prefix}{self.rel_table_name}"
105-
temp_table_name = self.table.truncate_long_name(temp_table_name) if self.table.config.get("shorten_temp_table_names") else temp_table_name
106-
104+
temp_table_name = f"{prefix}{self.rel_table_name}"
105+
temp_table_name = (
106+
self.table.truncate_long_name(temp_table_name)
107+
if self.table.config.get("shorten_temp_table_names")
108+
else temp_table_name
109+
)
110+
107111
self.temp_rel_table = Table(
108112
temp_table_name,
109113
self.table.metadata,
@@ -135,7 +139,11 @@ def build_relation_tables(self) -> None:
135139
),
136140
)
137141

138-
table_name = self.table.truncate_long_name(self.rel_table_name) if self.table.config.get("shorten_rel_table_names") else self.rel_table_name
142+
table_name = (
143+
self.table.truncate_long_name(self.rel_table_name)
144+
if self.table.config.get("shorten_rel_table_names")
145+
else self.rel_table_name
146+
)
139147
self.rel_table = Table(
140148
table_name,
141149
self.table.metadata,

src/xml2db/table/reused_table.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,14 @@
1515
from .column import DataModelColumn
1616
from .transformed_table import DataModelTableTransformed
1717

18+
1819
def shorten_str(x: str, max_len: int = 30) -> str:
1920
if len(x) > max_len:
2021
h = sha1(x.encode("utf8"))
2122
return f"{x[:(max_len - 7)]}_{h.hexdigest()[1:6]}"
2223
return x
2324

25+
2426
class DataModelTableReused(DataModelTableTransformed):
2527
"""A table data model which de-duplicates records in the database based on their hash value.
2628
@@ -132,8 +134,12 @@ def get_col(temp=False):
132134
)
133135
)
134136

135-
temp_table_name = f"{prefix}{self.name}"
136-
temp_table_name = self.truncate_long_name(temp_table_name) if self.config.get("shorten_temp_table_names") else temp_table_name
137+
temp_table_name = f"{prefix}{self.name}"
138+
temp_table_name = (
139+
self.truncate_long_name(temp_table_name)
140+
if self.config.get("shorten_temp_table_names")
141+
else temp_table_name
142+
)
137143

138144
# build temporary table
139145
self.temp_table = Table(

src/xml2db/table/table.py

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import hashlib
2-
import base64
2+
import re
33
from typing import Iterable, List, Any, Union, TYPE_CHECKING
44
import logging
55
import sqlalchemy
@@ -15,6 +15,7 @@
1515

1616
logger = logging.getLogger(__name__)
1717

18+
1819
class DataModelTable:
1920
"""A class representing a database table translated from an XML schema complex type
2021
@@ -100,8 +101,9 @@ def _validate_config(self, cfg, db_type):
100101
config = {
101102
"reuse": check_type(cfg, "reuse", bool, True),
102103
"as_columnstore": check_type(cfg, "as_columnstore", bool, False),
103-
"shorten_temp_table_names": check_type(cfg, "shorten_temp_table_names", bool, False),
104-
"shorten_rel_table_names": check_type(cfg, "shorten_rel_table_names", bool, False)
104+
"shorten_table_names": check_type(
105+
cfg, "shorten_table_names", bool, db_type == "postgresql"
106+
),
105107
}
106108
if "extra_args" in cfg and not (
107109
isinstance(cfg["extra_args"], list)
@@ -120,7 +122,7 @@ def _validate_config(self, cfg, db_type):
120122
logger.warning(
121123
"Clustered columnstore indexes are only supported with MS SQL Server database"
122124
)
123-
125+
124126
config["fields"] = cfg.get("fields", {})
125127

126128
return config
@@ -409,28 +411,33 @@ def get_entity_rel_diagram(self) -> List:
409411
+ ["}"]
410412
)
411413
return [f" {line}" for line in out]
412-
414+
413415
def truncate_long_name(self, table_name: str) -> str:
414-
max_len = 63 #both postgres and mysql safe table name len
416+
max_len = 63 # both postgres and mysql safe table name len
415417
new_name = table_name
416-
417-
short_name = ""
418-
shorter_name = ""
418+
419419
is_tmp = "temp" in table_name
420420
suffix = f"_{hashlib.md5(table_name.encode('utf-8')).hexdigest()}"
421421

422422
if len(table_name) > max_len:
423-
words = table_name.split("_")
423+
# extract words for camelCase and snake_case identifiers
424+
s = re.sub(r"(?<=[a-z0-9])([A-Z])", r"_\1", table_name)
425+
s = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1_\2", s)
426+
words = [word for word in s.split("_") if word]
424427

428+
short_name = ""
429+
shorter_name = ""
425430
for word in words:
426-
if len(short_name) + len(word)<= (max_len - 1):
427-
if len(short_name) > 0: short_name += "_"
431+
if len(short_name) + len(word) <= (max_len - 1):
432+
if len(short_name) > 0:
433+
short_name += "_"
428434
short_name += f"{word}"
429435
if len(shorter_name) + len(word) <= (max_len - 10):
430-
if len(shorter_name) > 0: shorter_name += "_"
436+
if len(shorter_name) > 0:
437+
shorter_name += "_"
431438
shorter_name += f"{word}"
432439

433-
#check if sliced name already exists:
440+
# check if sliced name already exists:
434441
sentinel = False
435442
if is_tmp:
436443
# just cut the name up and append the full suffix
@@ -446,8 +453,8 @@ def truncate_long_name(self, table_name: str) -> str:
446453
if relation.rel_table_name == short_name:
447454
sentinel = True
448455
break
449-
450-
# an existing table or relation was found: append a
456+
457+
# an existing table or relation was found: append a
451458
# random-ish suffix to help prevent name collisions
452459
if sentinel:
453460
# create a more useable/legible short table name

src/xml2db/table/transformed_table.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from .relations import DataModelRelation1, DataModelRelationN
66
from .table import DataModelTable
77

8+
89
class DataModelTableTransformed(DataModelTable):
910
"""A class extending DataModelTable with transformations
1011

tests/conftest.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,4 @@ def setup_db_model(conn_string, model_config):
3737

3838
yield model
3939

40-
try:
41-
model.drop_all_tables()
42-
except Exception as e:
43-
print(f"Unable to drop all tables: {e}")
44-
pass
40+
model.drop_all_tables()

0 commit comments

Comments
 (0)