Skip to content

Commit 7be220f

Browse files
committed
WIP more connections and started eval
1 parent 9417c42 commit 7be220f

24 files changed

Lines changed: 5148 additions & 418 deletions

eval/evaluate.py

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
import os
2+
from typing import Tuple
3+
import pandas as pd
4+
import matplotlib.pyplot as plt
5+
import seaborn as sns
6+
7+
import requests
8+
9+
SERVER_URL = "http://localhost:5002/convert"
10+
SCHEMA_LANGUAGES = ["JsonSchema", "Xsd", "Dtd", "Shacl_TTL", "Owl_TTL"]
11+
12+
13+
def send_conversion_request(source_language: str, target_language: str, schema: str) -> any | Tuple[int, str]:
14+
# Build payload
15+
payload = {
16+
"sourceLanguage": source_language,
17+
"targetLanguage": target_language,
18+
"schema": schema
19+
}
20+
21+
# Send request
22+
print(f"Sending conversion request {source_language}{target_language} ...")
23+
resp = requests.post(SERVER_URL, json=payload)
24+
25+
# Handle response
26+
if resp.status_code == 200:
27+
result = resp.json()
28+
return result
29+
else:
30+
print(f"Error {resp.status_code}: {resp.text}")
31+
return resp.status_code, resp.text
32+
33+
34+
def load_golden_schemas(schema_languages: list[str]) -> dict[str, str]:
35+
"""
36+
Loads the golden schemas for each schema language from the 'golden_schemas/' folder.
37+
38+
:param schema_languages: List of schema languages to load.
39+
:return: A map of schema language to its corresponding golden schema string.
40+
"""
41+
golden_schemas = {}
42+
for language in schema_languages:
43+
source_folder = os.path.join("golden_schemas", language)
44+
for filename in os.listdir(source_folder):
45+
with open(os.path.join(source_folder, filename), "r") as f:
46+
golden_schemas[language] = f.read()
47+
break # There is only one schema file per language
48+
return golden_schemas
49+
50+
51+
def request_conversion_results(schema_languages: list[str], golden_schemas: dict[str, str]) -> dict[str, dict[str, any]]:
52+
results = {} # multi-dimensional map [source_language][target_language] = result_schema
53+
successes = 0
54+
errors = 0
55+
for source_language in schema_languages:
56+
57+
input_schema = golden_schemas[source_language]
58+
59+
source_language_results = {}
60+
for target_language in schema_languages:
61+
if source_language == target_language:
62+
continue
63+
64+
request_result = send_conversion_request(source_language, target_language, input_schema)
65+
if isinstance(request_result, tuple):
66+
error_code, error_message = request_result
67+
errors += 1
68+
print("Conversion request error code " + str(error_code) + ": " + error_message)
69+
70+
else:
71+
successes += 1
72+
# results is an object { "results": [ "conversionPath": [...], "result": {...}, "success": bool ] }
73+
# we want to extract all result schemas and their corresponding success value and prettified path
74+
formatted_results = []
75+
for idx, result_entry in enumerate(request_result["results"]):
76+
conversion_path_short = "__".join(
77+
[f"to_{step['targetLanguage']}_via_{step['serviceName']}" for step in result_entry["conversionPath"]]
78+
)
79+
conversion_path_full = " -> ".join(
80+
[f"{step['sourceLanguage']} to {step['targetLanguage']} via {step['serviceName']}[{step['converterName']}]" for step in result_entry["conversionPath"]]
81+
)
82+
formatted_results.append({
83+
"attempt": idx + 1,
84+
"success": result_entry["success"],
85+
"result_schema": result_entry.get("result", None),
86+
"conversion_path": conversion_path_short,
87+
"conversion_path_full": conversion_path_full
88+
})
89+
90+
source_language_results[target_language] = formatted_results
91+
92+
# if source_language_results is not empty, store it
93+
if source_language_results:
94+
results[source_language] = source_language_results
95+
else:
96+
results[source_language] = {
97+
target_language: [] for target_language in schema_languages if target_language != source_language
98+
}
99+
print(f"Conversions completed: {successes} successful conversions, {errors} errors.")
100+
return results
101+
102+
103+
def store_conversion_results(results: dict[str, dict[str, any]]) -> None:
104+
# Store result schemas in output_schemas/source_language/target_language/attempt_<attempt>_<success>_
105+
# <conversion_path>.txt
106+
output_base_folder = "output_schemas"
107+
for source_language, target_language_results in results.items():
108+
for target_language, result_entries in target_language_results.items():
109+
output_folder = os.path.join(output_base_folder, source_language, target_language)
110+
os.makedirs(output_folder, exist_ok=True)
111+
112+
for entry in result_entries:
113+
attempt = entry["attempt"]
114+
success = entry["success"]
115+
result_schema = entry["result_schema"]
116+
conversion_path = entry["conversion_path"]
117+
conversion_path_full = entry["conversion_path_full"]
118+
119+
output_filename = f"attempt_{attempt}_success_{success}_path_{conversion_path}.txt"
120+
output_filepath = os.path.join(output_folder, output_filename)
121+
122+
with open(output_filepath, "w") as f:
123+
if result_schema is not None:
124+
f.write(result_schema)
125+
else:
126+
f.write("No result schema.")
127+
128+
# Additionally store full conversion path in a separate metadata file
129+
output_metadata_filepath = os.path.join(output_folder, f"attempt_{attempt}_success_{success}_path_{conversion_path}_metadata.txt")
130+
with open(output_metadata_filepath, "w") as f:
131+
f.write(f"Full Conversion Path:\n{conversion_path_full}\n")
132+
133+
134+
def evaluate():
135+
"""
136+
Runs the conversion of the golden schemas from each schema language (source) to each other schema language (target).
137+
Assumes the Schema Conversion Orchestrator (the subject to be evaluated) is already running locally.
138+
139+
All golden schemas are stored in a dedicated folder structure under 'golden_schemas/'.
140+
The output schemas resulting from the conversions will be stored in 'output_schemas/'.
141+
142+
"""
143+
print("Running evaluation...")
144+
145+
# List of all relevant schema languages
146+
147+
# Extract golden schema (source schema) for each schema language.
148+
golden_schemas = load_golden_schemas(SCHEMA_LANGUAGES)
149+
150+
# Go through each combination of source language and target language
151+
results = request_conversion_results(SCHEMA_LANGUAGES, golden_schemas)
152+
153+
# Store result schemas
154+
store_conversion_results(results)
155+
156+
# Build conversion matrix
157+
conversion_matrix = compute_conversion_matrix(golden_schemas, results)
158+
159+
# Plot conversion matrix
160+
plot_conversion_matrix(conversion_matrix, output_path="conversion_matrix.png")
161+
162+
163+
def compute_conversion_matrix(golden_schemas: dict[str, str], results: dict[str, dict[str, any]]) -> pd.DataFrame:
164+
"""
165+
Computes a matrix that shows the character lengths of the converted schemas for each source-target language pair.
166+
For a source-target pair with multiple conversion attempts, the lengths are comma-separated.
167+
For unconvertible pairs, a dash ("—") is used.
168+
For paths where source and target language are the same, the length of the original schema is used.
169+
:param golden_schemas:
170+
:param results:
171+
:return:
172+
"""
173+
matrix = pd.DataFrame(index=results.keys(), columns=results.keys())
174+
175+
for source_language, target_language_results in results.items():
176+
for target_language in matrix.columns:
177+
if source_language == target_language:
178+
# length of original schema
179+
original_schema_length = len(golden_schemas[source_language])
180+
matrix.loc[source_language, target_language] = str(original_schema_length)
181+
continue
182+
183+
result_entries = target_language_results.get(target_language, [])
184+
if not result_entries:
185+
matrix.loc[source_language, target_language] = "—"
186+
continue
187+
188+
lengths = []
189+
for entry in result_entries:
190+
if not entry["success"]:
191+
continue
192+
result_schema = entry["result_schema"]
193+
if result_schema is not None:
194+
lengths.append(str(len(result_schema)))
195+
196+
if lengths:
197+
matrix.loc[source_language, target_language] = ", ".join(lengths)
198+
else:
199+
matrix.loc[source_language, target_language] = "—"
200+
201+
matrix.index.name = "Source Language"
202+
matrix.columns.name = "Target Language"
203+
return matrix
204+
205+
206+
def plot_conversion_matrix(matrix: pd.DataFrame, output_path: str = None) -> None:
207+
numeric_matrix = matrix.copy()
208+
209+
for i in numeric_matrix.index:
210+
for j in numeric_matrix.columns:
211+
val = matrix.loc[i, j]
212+
213+
if val == "—":
214+
numeric_matrix.loc[i, j] = 0
215+
else:
216+
# take the minimum length if multiple attempts
217+
lengths = list(map(int, val.split(", ")))
218+
numeric_matrix.loc[i, j] = min(lengths)
219+
220+
plt.figure(figsize=(8, 6))
221+
sns.heatmap(numeric_matrix.astype(float), annot=matrix, fmt='', cmap="YlGnBu", cbar_kws={'label': 'Schema Length'})
222+
plt.title("Schema Conversion Length Matrix")
223+
plt.tight_layout()
224+
225+
if output_path:
226+
plt.savefig(output_path)
227+
else:
228+
plt.show()
229+
230+
231+
if __name__ == "__main__":
232+
evaluate()
File renamed without changes.

0 commit comments

Comments
 (0)