Skip to content

Commit 36ab82e

Browse files
committed
temporarily remove different SchemaFeatureSupport levels for simplicity. Also introduce caching of attempted sub-paths
1 parent e907f97 commit 36ab82e

9 files changed

Lines changed: 4010 additions & 69 deletions

.gitignore

Lines changed: 3938 additions & 0 deletions
Large diffs are not rendered by default.

schema-conversion-orchestrator/ConvertersLinkMl.py

Lines changed: 5 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -23,32 +23,6 @@
2323
from linkml.generators.sqlalchemygen import SQLAlchemyGenerator
2424

2525

26-
class ConverterLinkMlToJsonSchema(ConverterInternal):
27-
def __init__(self):
28-
super().__init__(
29-
name="LinkML JsonSchemaGenerator",
30-
service_address="internal",
31-
service_name="FlaskApp",
32-
source_format=SchemaLanguage.LinkMl,
33-
target_format=SchemaLanguage.JsonSchema,
34-
supported_features=None
35-
)
36-
37-
def converter_logic(self, schema: str) -> str:
38-
with tempfile.NamedTemporaryFile("w+", suffix=".yaml") as f:
39-
f.write(schema)
40-
f.flush()
41-
json_schema = JsonSchemaGenerator(f.name).serialize()
42-
return json_schema
43-
44-
def validate_input(self, schema: str) -> bool:
45-
# Implement validation logic for LinkML schema
46-
return True
47-
48-
def validate_output(self, schema: str) -> bool:
49-
# Implement validation logic for JSON Schema
50-
return True
51-
5226
class ConverterFromLinkMl(ConverterInternal):
5327
def __init__(self, target_format: SchemaLanguage):
5428
super().__init__(
@@ -57,7 +31,7 @@ def __init__(self, target_format: SchemaLanguage):
5731
service_name="FlaskApp",
5832
source_format=SchemaLanguage.LinkMl,
5933
target_format=target_format,
60-
supported_features=None
34+
supported_features=set()
6135
)
6236

6337
def converter_logic(self, schema: str) -> str:
@@ -102,6 +76,7 @@ def validate_output(self, schema: str) -> bool:
10276
# Implement validation logic for LinkML schema
10377
return True
10478

79+
10580
class ConverterJsonSchemaToLinkMl(ConverterInternal):
10681
def __init__(self):
10782
super().__init__(
@@ -110,7 +85,7 @@ def __init__(self):
11085
service_name="FlaskApp",
11186
source_format=SchemaLanguage.JsonSchema,
11287
target_format=SchemaLanguage.LinkMl,
113-
supported_features=None
88+
supported_features=set()
11489
)
11590

11691
def converter_logic(self, schema: str) -> str:
@@ -150,7 +125,7 @@ def __init__(self):
150125
service_name="FlaskApp",
151126
source_format=SchemaLanguage.Owl,
152127
target_format=SchemaLanguage.LinkMl,
153-
supported_features=None
128+
supported_features=set()
154129
)
155130

156131
def converter_logic(self, schema: str) -> str:
@@ -181,4 +156,4 @@ def validate_input(self, schema: str) -> bool:
181156

182157
def validate_output(self, schema: str) -> bool:
183158
# Implement validation logic for LinkML schema
184-
return True
159+
return True

schema-conversion-orchestrator/ConvertersMdModels.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def __init__(self, target_format: SchemaLanguage):
1414
service_name="FlaskApp",
1515
source_format=SchemaLanguage.MdModels,
1616
target_format=target_format,
17-
supported_features=None
17+
supported_features=set()
1818
)
1919

2020
def converter_logic(self, schema: str) -> str:
@@ -64,7 +64,7 @@ def __init__(self):
6464
service_name="FlaskApp",
6565
source_format=SchemaLanguage.JsonSchema,
6666
target_format=SchemaLanguage.MdModels,
67-
supported_features=None
67+
supported_features=set()
6868
)
6969

7070
def converter_logic(self, schema: str) -> str:

schema-conversion-orchestrator/ConvertersXsd.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def __init__(self):
1313
service_name="FlaskApp",
1414
source_format=SchemaLanguage.Xsd,
1515
target_format=SchemaLanguage.JsonSchema,
16-
supported_features=None
16+
supported_features=set()
1717
)
1818

1919
def converter_logic(self, schema: str) -> str:

schema-conversion-orchestrator/conversion_strategies.py

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@
22
from typing import List
33
from strenum import StrEnum
44

5-
from converter import (ConversionResult, ConversionResults, ConversionPaths, Converter)
5+
from converter import (ConversionResult, ConversionResults, ConversionPaths, Converter, conversion_path_to_string, ConversionsCache)
66
from schema_types import SchemaLanguage
77
from logic import identify_schema_features, rank_paths
8-
from app import DETAILED_ERROR_OUTPUT, schema_languages_features
8+
from app import DETAILED_ERROR_OUTPUT
99

1010

1111
class ConversionStrategy(StrEnum):
@@ -20,8 +20,9 @@ def convert_with_strategy_most_features_preserved(source: SchemaLanguage, target
2020
doc_features = set(identify_schema_features(schema, source))
2121
if not doc_features:
2222
print("Warning: No schema feature identification available for the given schema format '" + source + "'.")
23-
ranked_paths = rank_paths(paths, doc_features, schema_languages_features)
23+
ranked_paths = rank_paths(paths, doc_features)
2424
all_attempts: List[ConversionResult] = []
25+
conversions_cache = {} # cache for all conversion sub-paths
2526
result_schema = None
2627

2728
# attempt conversion via best path and if it fails, try remaining paths and print error message only to console
@@ -30,7 +31,9 @@ def convert_with_strategy_most_features_preserved(source: SchemaLanguage, target
3031
while result_schema is None and len(ranked_paths) > 0:
3132
best_path, unsupported_features = ranked_paths[0]
3233
try:
33-
result_schema = attempt_conversion_path(source, target, best_path, schema)
34+
result_schema, conversions_cache_update = attempt_conversion_path(source, target, best_path, schema,
35+
conversions_cache)
36+
conversions_cache = conversions_cache_update
3437

3538
if not result_schema:
3639
all_attempts.append((False, "Conversion resulted in 'None' schema.", best_path))
@@ -56,10 +59,13 @@ def convert_with_strategy_least_character_loss(source: SchemaLanguage, target: S
5659
Does not stop at success but explores all paths. Trivial feature loss strategy which is character based.
5760
Much less effort than a proper feature loss analysis and still effective."""
5861
all_attempts: List[ConversionResult] = []
62+
conversions_cache = {} # cache for all conversion sub-paths
5963
for path in paths:
6064
result_schema = None
6165
try:
62-
result_schema = attempt_conversion_path(source, target, path, schema)
66+
result_schema, conversions_cache_update = attempt_conversion_path(source, target, path, schema,
67+
conversions_cache)
68+
conversions_cache = conversions_cache_update
6369

6470
if not result_schema:
6571
all_attempts.append((False, "Conversion resulted in 'None' schema.", path))
@@ -78,17 +84,41 @@ def convert_with_strategy_least_character_loss(source: SchemaLanguage, target: S
7884
return all_attempts
7985

8086

81-
def attempt_conversion_path(source: str, target: str, path: List[Converter], schema: str) -> str:
87+
# the conversions cache contains the results of all previously attempted conversion sub-paths
88+
def attempt_conversion_path(source: str, target: str, path: List[Converter], schema: str,
89+
conversions_cache: ConversionsCache) -> tuple[str, ConversionsCache]:
8290
print_conversion_path(source, target, path)
8391
current_schema = schema
8492
current_converter = None
93+
conversion_sub_path = []
8594
try:
8695
for conv in path:
8796
current_converter = conv
88-
current_schema = conv.convert(current_schema)
89-
print(
90-
"Intermediate schema of format " + conv.target_format + " after conversion via " + conv.service_name + ": " + current_schema)
91-
return current_schema
97+
conversion_sub_path.append(conv)
98+
99+
# check cache
100+
conversion_sub_path_hash = conversion_path_to_string(conversion_sub_path)
101+
if conversion_sub_path_hash in conversions_cache:
102+
# cache hit
103+
cached_result = conversions_cache[conversion_sub_path_hash]
104+
if cached_result is None:
105+
# previously failed conversion for this sub-path
106+
raise Exception("Previously failed conversion for this sub-path.")
107+
else:
108+
# use cached result in case of cache hit and good previous conversion
109+
current_schema = cached_result
110+
print(
111+
"Using cached intermediate schema of format " + conv.target_format + " after conversion via " + conv.service_name + ": " + current_schema)
112+
continue
113+
else:
114+
# cache miss - perform conversion
115+
current_schema = conv.convert(current_schema)
116+
print(
117+
"Intermediate schema of format " + conv.target_format + " after conversion via " + conv.service_name + ": " + current_schema)
118+
# store in cache
119+
conversions_cache[conversion_sub_path_hash] = current_schema
120+
121+
return current_schema, conversions_cache
92122
except Exception as e:
93123
print(
94124
"Conversion failed at step from " + current_converter.source_format + " to " + current_converter.target_format + " via " + current_converter.service_name + " because of error: " + str(

schema-conversion-orchestrator/converter.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,8 @@
22
import os
33
import subprocess
44
import tempfile
5-
from typing import List, Dict, Tuple
6-
from schema_types import SchemaLanguage, SchemaFeature, SchemaFeatureSupport
7-
5+
from typing import List, Tuple
6+
from schema_types import SchemaLanguage, SchemaFeature
87

98

109
class Converter:
@@ -19,7 +18,7 @@ class Converter:
1918
"""
2019

2120
def __init__(self, name: str, service_address: str, service_name: str, source_format: SchemaLanguage,
22-
target_format: SchemaLanguage, supported_features: Dict[SchemaFeature, SchemaFeatureSupport] | None):
21+
target_format: SchemaLanguage, supported_features: set[SchemaFeature]):
2322
self.name = name
2423
self.service_address = service_address
2524
self.service_name = service_name
@@ -33,7 +32,7 @@ def convert(self, schema: str) -> str:
3332

3433
class ConverterExternal(Converter):
3534
def __init__(self, name: str, executable_path: str, service_name: str, source_format: SchemaLanguage,
36-
target_format: SchemaLanguage, supported_features: Dict[SchemaFeature, SchemaFeatureSupport] | None):
35+
target_format: SchemaLanguage, supported_features: set[SchemaFeature]):
3736
super().__init__(name, executable_path, service_name, source_format, target_format, supported_features)
3837
self.executable_path = executable_path
3938

@@ -80,7 +79,7 @@ def convert(self, schema: str) -> str:
8079

8180
class ConverterInternal(Converter):
8281
def __init__(self, name: str, service_address: str, service_name: str, source_format: SchemaLanguage,
83-
target_format: SchemaLanguage, supported_features: Dict[SchemaFeature, SchemaFeatureSupport] | None):
82+
target_format: SchemaLanguage, supported_features: set[SchemaFeature]):
8483
super().__init__(name, service_address, service_name, source_format, target_format, supported_features)
8584

8685
def convert(self, schema: str) -> str:
@@ -111,7 +110,7 @@ class ConverterExternalGeneric(ConverterExternal):
111110
"""Generic external converter that can handle multiple conversion types"""
112111

113112
def __init__(self, name: str, executable_path: str, source_format: SchemaLanguage,
114-
target_format: SchemaLanguage, supported_features: Dict[SchemaFeature, SchemaFeatureSupport] | None,
113+
target_format: SchemaLanguage, supported_features: set[SchemaFeature],
115114
converter_type: str):
116115
super().__init__(name, executable_path, converter_type, source_format, target_format, supported_features)
117116
self.converter_type = converter_type
@@ -177,3 +176,9 @@ def convert(self, schema: str) -> str:
177176

178177
# List of results ranked by success
179178
ConversionResults = List[ConversionResult]
179+
180+
ConversionsCache = dict[str, str | None]
181+
182+
183+
def conversion_path_to_string(path: ConversionPath) -> str:
184+
return " -> ".join([f"{conv.source_format.value} to {conv.target_format.value} via {conv.service_name}" for conv in path])

schema-conversion-orchestrator/logic.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from typing import List, Set, Tuple, Dict
22
from converter import Converter, ConversionGraph, ConversionPath, ConversionPaths
3-
from schema_types import SchemaLanguage, SchemaFeature, SchemaLanguagesFeatures, SchemaFeatureSupport
3+
from schema_types import SchemaLanguage, SchemaFeature
44
from identify_schema_features_json_schema import identify_schema_features_json_schema
55

66

@@ -56,13 +56,8 @@ def find_paths(source: SchemaLanguage, target: SchemaLanguage, conversion_graph:
5656

5757
# ranks the paths based on how many features they support from the document schema plus returns a list of unsupported
5858
# features for each path
59-
# Feature support is not a yes or no but an enum of preserved, approximated, weakened, lost.
60-
# We start with the state of features of the user document. Then for each path step we remove the unsupported features and apply
61-
# downgrades to the others, depending on 1. feature support by the schema language of this step and 2. feature support
62-
# by the conversion step itself.
63-
def rank_paths(paths: ConversionPaths, doc_features: set[SchemaFeature] | None,
64-
schema_languages_features: SchemaLanguagesFeatures) -> List[
65-
Tuple[ConversionPath, Dict[SchemaFeature, SchemaFeatureSupport]]]:
59+
def rank_paths(paths: ConversionPaths, doc_features: set[SchemaFeature] | None) -> List[
60+
Tuple[ConversionPath, set[SchemaFeature]]]:
6661
if doc_features is None:
6762
# todo: assume ALL features in case of no known list of features
6863
doc_features = set()

schema-conversion-orchestrator/register_external_converters.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,10 @@ def register_external_converters() -> List[Converter]:
5353
for info in converter_infos:
5454
try:
5555
# Parse supported features
56-
features = {}
57-
for feature_str in info.get('supportedFeatures', {}):
56+
features = set()
57+
for feature_str in info.get('supportedFeatures', []):
5858
try:
59-
features[SchemaFeature(feature_str)] = info['supportedFeatures'][feature_str]
59+
features.add(SchemaFeature(feature_str))
6060
except ValueError:
6161
print(f"Unknown feature: {feature_str}")
6262

@@ -85,10 +85,10 @@ def register_external_converters() -> List[Converter]:
8585
for info in converter_infos:
8686
try:
8787
# Parse supported features
88-
features = {}
89-
for feature_str in info.get('supportedFeatures', {}):
88+
features = set()
89+
for feature_str in info.get('supportedFeatures', []):
9090
try:
91-
features[SchemaFeature(feature_str)] = info['supportedFeatures'][feature_str]
91+
features.add(SchemaFeature(feature_str))
9292
except ValueError:
9393
print(f"Unknown feature: {feature_str}")
9494

@@ -107,4 +107,4 @@ def register_external_converters() -> List[Converter]:
107107
else:
108108
print(f"Java converter not found at: {java_jar}")
109109

110-
return converters
110+
return converters

schema-conversion-orchestrator/register_python_converters.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from typing import List
2-
from ConvertersLinkMl import ConverterFromLinkMl, ConverterJsonSchemaToLinkMl, ConverterLinkMlToJsonSchema, ConverterOwlToLinkMl
2+
from ConvertersLinkMl import ConverterFromLinkMl, ConverterJsonSchemaToLinkMl, ConverterOwlToLinkMl
33
from ConvertersMdModels import ConverterFromMdModels, ConverterJsonSchemaToMdModels
44

55
from converter import Converter
@@ -38,6 +38,4 @@ def register_python_converters() -> List[Converter]:
3838

3939
ConverterJsonSchemaToMdModels(),
4040

41-
ConverterLinkMlToJsonSchema() # redundant
42-
43-
]
41+
]

0 commit comments

Comments
 (0)