Skip to content

Commit f5f0254

Browse files
committed
different improvements and remove old code
1 parent d954e0d commit f5f0254

9 files changed

Lines changed: 159 additions & 53 deletions

generate_diagrams.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#!/bin/bash
2+
3+
python3 schema-conversion-orchestrator/output_generate_diagrams.py

schema-conversion-orchestrator/app.py

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from conversion_strategies import ConversionStrategy, \
22
convert_with_strategy_least_character_loss
3-
from converter import (Converter, ConverterExternal, prepare_conversion_results_for_serializing)
3+
from converter import (Converter, prepare_conversion_results_for_serializing)
44
from schema_types import schema_language_from_string
55
from logic import build_conversion_graph, find_paths
66
from register_converters import register_converters
@@ -25,23 +25,6 @@ def health():
2525
return {"status": "ok"}, 200
2626

2727

28-
@app.route("/registerConversion", methods=["POST"])
29-
def register_conversion():
30-
data = request.json
31-
conv = ConverterExternal(
32-
data["name"],
33-
data["serviceAddress"],
34-
data["sourceFormat"],
35-
data["targetFormat"],
36-
)
37-
converters.append(conv)
38-
global conversion_graph
39-
conversion_graph = build_conversion_graph(converters)
40-
print(
41-
f"Registered new converter: {conv.name} from {conv.source_format} to {conv.target_format} at {conv.service_address}.")
42-
return {"status": "registered"}, 200
43-
44-
4528
@app.route("/convert", methods=["POST"])
4629
def convert():
4730
data = request.json

schema-conversion-orchestrator/conversion_strategies.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22
from typing import List
33
from strenum import StrEnum
44

5-
from converter import (ConversionResult, ConversionResults, ConversionPaths, Converter, conversion_path_to_string, ConversionsCache)
5+
from converter import (ConversionResult, ConversionResults, ConversionPaths, Converter, conversion_path_to_string,
6+
ConversionsCache)
67
from schema_types import SchemaLanguage
78

89
DETAILED_ERROR_OUTPUT = False
10+
DETAILED_RESULT_OUTPUT = True
911

1012

1113
class ConversionStrategy(StrEnum):
@@ -20,7 +22,6 @@ def convert_with_strategy_least_character_loss(source: SchemaLanguage, target: S
2022
all_attempts: List[ConversionResult] = []
2123
conversions_cache = {} # cache for all conversion sub-paths
2224
for path in paths:
23-
result_schema = None
2425
try:
2526
result_schema, conversions_cache_update = attempt_conversion_path(source, target, path, schema,
2627
conversions_cache)
@@ -40,6 +41,18 @@ def convert_with_strategy_least_character_loss(source: SchemaLanguage, target: S
4041
# sort all attempts by success: success first. Then by length of resulting schema (descending)
4142
all_attempts.sort(key=lambda x: (not x[0], -len(x[1]) if x[0] else float('inf')))
4243

44+
# print overall result: how many succeeded/failed and their character lengths
45+
success_count = sum(1 for attempt in all_attempts if attempt[0])
46+
failure_count = len(all_attempts) - success_count
47+
print(f"Conversion attempts completed: {success_count} succeeded, {failure_count} failed.")
48+
for i, attempt in enumerate(all_attempts):
49+
success, result_schema_or_error, path = attempt
50+
if success:
51+
print(
52+
f"- Attempt {i + 1} ({conversion_path_to_string(path)}): Success, Resulting schema length: {len(result_schema_or_error)} characters.")
53+
else:
54+
print(f"- Attempt {i + 1} ({conversion_path_to_string(path)}): Failure, Error: {result_schema_or_error}")
55+
4356
return all_attempts
4457

4558

@@ -72,8 +85,9 @@ def attempt_conversion_path(source: str, target: str, path: List[Converter], sch
7285
else:
7386
# cache miss - perform conversion
7487
current_schema = conv.convert(current_schema)
75-
print(
76-
"Intermediate schema of format " + conv.target_format + " after conversion via " + conv.service_name + ": " + current_schema)
88+
if DETAILED_RESULT_OUTPUT:
89+
print(
90+
"\n\nIntermediate schema of format " + conv.target_format + " after conversion via " + conv.service_name + ": \n" + current_schema + "\n\n\n\n")
7791
# store in cache
7892
conversions_cache[conversion_sub_path_hash] = current_schema
7993

@@ -88,6 +102,6 @@ def attempt_conversion_path(source: str, target: str, path: List[Converter], sch
88102

89103

90104
def print_conversion_path(source: str, target: str, path: List[Converter]) -> None:
91-
print("Given the source format " + source + " and target format " + target + ", the best available path is:")
105+
print("Conversion path for source format '" + source + "' and target format '" + target + "':")
92106
for conv in path:
93-
print(f"{conv.source_format} -> {conv.target_format} via {conv.name} ({conv.service_address})")
107+
print(f"- {conv.source_format} --({conv.name} ({conv.service_address})--> {conv.target_format})")

schema-conversion-orchestrator/converter.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,14 @@ def convert(self, schema: str) -> str:
3030

3131

3232
class ConverterExternal(Converter):
33+
"""
34+
External converter that calls a subprocess to perform conversion
35+
:param name: Name of the converter
36+
:param executable_path: Path to the converter executable
37+
:param service_name: Name of the converter service
38+
:param source_format: Source schema language
39+
:param target_format: Target schema language
40+
"""
3341
def __init__(self, name: str, executable_path: str, service_name: str, source_format: SchemaLanguage,
3442
target_format: SchemaLanguage):
3543
super().__init__(name, executable_path, service_name, source_format, target_format)
@@ -77,6 +85,14 @@ def convert(self, schema: str) -> str:
7785

7886

7987
class ConverterInternal(Converter):
88+
"""
89+
Internal converter that performs conversion using built-in logic, in Python
90+
:param name: Name of the converter
91+
:param service_address: Address of the converter service or path to executable
92+
:param service_name: Name of the converter service
93+
:param source_format: Source schema language
94+
:param target_format: Target schema language
95+
"""
8096
def __init__(self, name: str, service_address: str, service_name: str, source_format: SchemaLanguage,
8197
target_format: SchemaLanguage):
8298
super().__init__(name, service_address, service_name, source_format, target_format)

schema-conversion-orchestrator/build_conversion_matrix.py renamed to schema-conversion-orchestrator/output_build_conversion_matrix.py

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def build_conversion_matrix(conversion_graph: ConversionGraph) -> pd.DataFrame:
3939
return matrix
4040

4141

42-
def plot_conversion_matrix(matrix: pd.DataFrame):
42+
def plot_conversion_matrix(matrix: pd.DataFrame, output_path: str = None) -> None:
4343
# Build weighted numeric matrix for coloring
4444
numeric_matrix = matrix.copy()
4545

@@ -76,20 +76,8 @@ def plot_conversion_matrix(matrix: pd.DataFrame):
7676
plt.xticks(rotation=45, ha="right")
7777
plt.yticks(rotation=0)
7878
plt.tight_layout()
79-
plt.show()
80-
81-
82-
if __name__ == "__main__":
83-
from register_converters import register_converters
84-
from logic import build_conversion_graph
85-
86-
converters: List[Converter] = register_converters()
87-
conversion_graph: ConversionGraph = build_conversion_graph(converters)
88-
89-
df = build_conversion_matrix(conversion_graph)
90-
91-
# Print in console
92-
print(df.to_string())
93-
94-
# Plot nicely
95-
plot_conversion_matrix(df)
79+
if output_path:
80+
plt.savefig(output_path)
81+
else:
82+
plt.show()
83+
plt.close()
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
from rdflib import Graph, Namespace, Literal
2+
from rdflib.namespace import RDF, RDFS
3+
from converter import ConversionGraph
4+
5+
6+
def conversion_graph_to_rdf(conversion_graph: ConversionGraph, output_path):
7+
EX = Namespace("http://example.org/conversions/")
8+
9+
g = Graph()
10+
g.bind("ex", EX)
11+
g.bind("rdfs", RDFS)
12+
13+
# define a class and a property for conversions
14+
CONVERSION = EX.Conversion
15+
HAS_NAME = EX.name
16+
HAS_SOURCE = EX.sourceFormat
17+
HAS_TARGET = EX.targetFormat
18+
19+
for source, converters in conversion_graph.items():
20+
for converter in converters:
21+
src = string_normalize(str(converter.source_format))
22+
tgt = string_normalize(str(converter.target_format))
23+
24+
# create URIs for formats
25+
src_uri = EX[f"format/{src}"]
26+
tgt_uri = EX[f"format/{tgt}"]
27+
28+
# add label for readability
29+
g.add((src_uri, RDFS.label, Literal(src)))
30+
g.add((tgt_uri, RDFS.label, Literal(tgt)))
31+
32+
# create a conversion node (reified edge)
33+
conv_uri = EX[f"conversion/{string_normalize(converter.name)}"]
34+
35+
g.add((conv_uri, RDF.type, CONVERSION))
36+
g.add((conv_uri, HAS_NAME, Literal(converter.name)))
37+
g.add((conv_uri, HAS_SOURCE, src_uri))
38+
g.add((conv_uri, HAS_TARGET, tgt_uri))
39+
40+
# serialize as Turtle to file
41+
g.serialize(output_path, format="turtle")
42+
43+
44+
# replace intermediate whitespaces by underline and trim and avoid other issues to make it a valid URI fragment
45+
def string_normalize(s: str) -> str:
46+
return "_".join(s.strip().split())
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import os
2+
from typing import List
3+
from converter import ConversionGraph, Converter
4+
from output_visualize_conversion_graph import visualize_conversion_graph
5+
from output_conversion_graph_to_rdf import conversion_graph_to_rdf
6+
from output_build_conversion_matrix import build_conversion_matrix, plot_conversion_matrix
7+
from register_converters import register_converters
8+
from logic import build_conversion_graph
9+
10+
11+
def generate_diagrams(conversion_graph: ConversionGraph, outputs_dir: str):
12+
conversion_graph_img_output_path = os.path.join(outputs_dir, "conversion_graph.png")
13+
turtle_output_path = os.path.join(outputs_dir, "conversion_graph.ttl")
14+
conversion_matrix_path = os.path.join(outputs_dir, "conversion_matrix.png")
15+
16+
visualize_conversion_graph(conversion_graph, output_path=conversion_graph_img_output_path)
17+
conversion_graph_to_rdf(conversion_graph, output_path=turtle_output_path)
18+
19+
conversion_matrix = build_conversion_matrix(conversion_graph)
20+
plot_conversion_matrix(conversion_matrix, output_path=conversion_matrix_path)
21+
22+
23+
24+
if __name__ == "__main__":
25+
converters_full: List[Converter] = register_converters()
26+
conversion_graph_full: ConversionGraph = build_conversion_graph(converters_full)
27+
28+
converters_core: List[Converter] = register_converters(True)
29+
conversion_graph_core: ConversionGraph = build_conversion_graph(converters_core)
30+
31+
# dir of current file
32+
python_code_dir = os.path.dirname(os.path.abspath(__file__))
33+
root_dir = os.path.dirname(python_code_dir)
34+
outputs_dir_full = os.path.join(root_dir, "outputs", "full")
35+
outputs_dir_core = os.path.join(root_dir, "outputs", "core")
36+
37+
# create output directories if they don't exist
38+
os.makedirs(outputs_dir_full, exist_ok=True)
39+
os.makedirs(outputs_dir_core, exist_ok=True)
40+
41+
generate_diagrams(conversion_graph_full, outputs_dir_full)
42+
generate_diagrams(conversion_graph_core, outputs_dir_core)

schema-conversion-orchestrator/visualize_conversion_graph.py renamed to schema-conversion-orchestrator/output_visualize_conversion_graph.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,10 @@
22
matplotlib.use("TkAgg") # or "Qt5Agg"
33
import matplotlib.pyplot as plt
44
import networkx as nx
5-
from typing import List
6-
from converter import Converter, ConversionGraph
5+
from converter import ConversionGraph
76

87

9-
def visualize_conversion_graph(conversion_graph: ConversionGraph):
8+
def visualize_conversion_graph(conversion_graph: ConversionGraph, output_path):
109
# Build directed graph
1110
G = nx.DiGraph()
1211
edge_labels = {}
@@ -73,12 +72,7 @@ def visualize_conversion_graph(conversion_graph: ConversionGraph):
7372
plt.title("Schema Conversion Graph", fontsize=16)
7473
plt.axis("off")
7574
plt.tight_layout()
76-
plt.show()
7775

76+
# save plot to file
77+
plt.savefig(output_path)
7878

79-
if __name__ == "__main__":
80-
from register_converters import register_converters
81-
from logic import build_conversion_graph
82-
converters: List[Converter] = register_converters()
83-
conversion_graph: ConversionGraph = build_conversion_graph(converters)
84-
visualize_conversion_graph(conversion_graph)
Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,30 @@
11
from typing import List
22
from converter import Converter
3+
from schema_types import SchemaLanguage
34
from register_python_converters import register_python_converters
45
from register_external_converters import register_external_converters
56

67

7-
def register_converters():
8+
CORE_SCHEMA_LANGUAGES: List[SchemaLanguage] = [
9+
SchemaLanguage.JsonSchema,
10+
SchemaLanguage.Owl,
11+
SchemaLanguage.Xsd,
12+
SchemaLanguage.SHACL,
13+
SchemaLanguage.MdModels,
14+
SchemaLanguage.LinkMl,
15+
SchemaLanguage.JsonLD
16+
]
17+
18+
19+
def register_converters(only_core_languages: bool = False) -> List[Converter]:
820
converters: List[Converter] = register_python_converters()
921
converters.extend(register_external_converters())
22+
23+
# remove converters that do involve non-core schema languages if only_core_languages is True
24+
if only_core_languages:
25+
converters = [
26+
conv for conv in converters
27+
if conv.source_format in CORE_SCHEMA_LANGUAGES and conv.target_format in CORE_SCHEMA_LANGUAGES
28+
]
29+
1030
return converters

0 commit comments

Comments
 (0)