|
| 1 | +import os |
| 2 | +from typing import Tuple |
| 3 | +import pandas as pd |
| 4 | +import matplotlib.pyplot as plt |
| 5 | +import seaborn as sns |
| 6 | + |
| 7 | +import requests |
| 8 | + |
| 9 | +SERVER_URL = "http://localhost:5002/convert" |
| 10 | +SCHEMA_LANGUAGES = ["JsonSchema", "Xsd", "Dtd", "Shacl_TTL", "Owl_TTL"] |
| 11 | + |
| 12 | + |
| 13 | +def send_conversion_request(source_language: str, target_language: str, schema: str) -> any | Tuple[int, str]: |
| 14 | + # Build payload |
| 15 | + payload = { |
| 16 | + "sourceLanguage": source_language, |
| 17 | + "targetLanguage": target_language, |
| 18 | + "schema": schema |
| 19 | + } |
| 20 | + |
| 21 | + # Send request |
| 22 | + print(f"Sending conversion request {source_language} → {target_language} ...") |
| 23 | + resp = requests.post(SERVER_URL, json=payload) |
| 24 | + |
| 25 | + # Handle response |
| 26 | + if resp.status_code == 200: |
| 27 | + result = resp.json() |
| 28 | + return result |
| 29 | + else: |
| 30 | + print(f"Error {resp.status_code}: {resp.text}") |
| 31 | + return resp.status_code, resp.text |
| 32 | + |
| 33 | + |
| 34 | +def load_golden_schemas(schema_languages: list[str]) -> dict[str, str]: |
| 35 | + """ |
| 36 | + Loads the golden schemas for each schema language from the 'golden_schemas/' folder. |
| 37 | +
|
| 38 | + :param schema_languages: List of schema languages to load. |
| 39 | + :return: A map of schema language to its corresponding golden schema string. |
| 40 | + """ |
| 41 | + golden_schemas = {} |
| 42 | + for language in schema_languages: |
| 43 | + source_folder = os.path.join("golden_schemas", language) |
| 44 | + for filename in os.listdir(source_folder): |
| 45 | + with open(os.path.join(source_folder, filename), "r") as f: |
| 46 | + golden_schemas[language] = f.read() |
| 47 | + break # There is only one schema file per language |
| 48 | + return golden_schemas |
| 49 | + |
| 50 | + |
| 51 | +def request_conversion_results(schema_languages: list[str], golden_schemas: dict[str, str]) -> dict[str, dict[str, any]]: |
| 52 | + results = {} # multi-dimensional map [source_language][target_language] = result_schema |
| 53 | + successes = 0 |
| 54 | + errors = 0 |
| 55 | + for source_language in schema_languages: |
| 56 | + |
| 57 | + input_schema = golden_schemas[source_language] |
| 58 | + |
| 59 | + source_language_results = {} |
| 60 | + for target_language in schema_languages: |
| 61 | + if source_language == target_language: |
| 62 | + continue |
| 63 | + |
| 64 | + request_result = send_conversion_request(source_language, target_language, input_schema) |
| 65 | + if isinstance(request_result, tuple): |
| 66 | + error_code, error_message = request_result |
| 67 | + errors += 1 |
| 68 | + print("Conversion request error code " + str(error_code) + ": " + error_message) |
| 69 | + |
| 70 | + else: |
| 71 | + successes += 1 |
| 72 | + # results is an object { "results": [ "conversionPath": [...], "result": {...}, "success": bool ] } |
| 73 | + # we want to extract all result schemas and their corresponding success value and prettified path |
| 74 | + formatted_results = [] |
| 75 | + for idx, result_entry in enumerate(request_result["results"]): |
| 76 | + conversion_path_short = "__".join( |
| 77 | + [f"to_{step['targetLanguage']}_via_{step['serviceName']}" for step in result_entry["conversionPath"]] |
| 78 | + ) |
| 79 | + conversion_path_full = " -> ".join( |
| 80 | + [f"{step['sourceLanguage']} to {step['targetLanguage']} via {step['serviceName']}[{step['converterName']}]" for step in result_entry["conversionPath"]] |
| 81 | + ) |
| 82 | + formatted_results.append({ |
| 83 | + "attempt": idx + 1, |
| 84 | + "success": result_entry["success"], |
| 85 | + "result_schema": result_entry.get("result", None), |
| 86 | + "conversion_path": conversion_path_short, |
| 87 | + "conversion_path_full": conversion_path_full |
| 88 | + }) |
| 89 | + |
| 90 | + source_language_results[target_language] = formatted_results |
| 91 | + |
| 92 | + # if source_language_results is not empty, store it |
| 93 | + if source_language_results: |
| 94 | + results[source_language] = source_language_results |
| 95 | + else: |
| 96 | + results[source_language] = { |
| 97 | + target_language: [] for target_language in schema_languages if target_language != source_language |
| 98 | + } |
| 99 | + print(f"Conversions completed: {successes} successful conversions, {errors} errors.") |
| 100 | + return results |
| 101 | + |
| 102 | + |
| 103 | +def store_conversion_results(results: dict[str, dict[str, any]]) -> None: |
| 104 | + # Store result schemas in output_schemas/source_language/target_language/attempt_<attempt>_<success>_ |
| 105 | + # <conversion_path>.txt |
| 106 | + output_base_folder = "output_schemas" |
| 107 | + for source_language, target_language_results in results.items(): |
| 108 | + for target_language, result_entries in target_language_results.items(): |
| 109 | + output_folder = os.path.join(output_base_folder, source_language, target_language) |
| 110 | + os.makedirs(output_folder, exist_ok=True) |
| 111 | + |
| 112 | + for entry in result_entries: |
| 113 | + attempt = entry["attempt"] |
| 114 | + success = entry["success"] |
| 115 | + result_schema = entry["result_schema"] |
| 116 | + conversion_path = entry["conversion_path"] |
| 117 | + conversion_path_full = entry["conversion_path_full"] |
| 118 | + |
| 119 | + output_filename = f"attempt_{attempt}_success_{success}_path_{conversion_path}.txt" |
| 120 | + output_filepath = os.path.join(output_folder, output_filename) |
| 121 | + |
| 122 | + with open(output_filepath, "w") as f: |
| 123 | + if result_schema is not None: |
| 124 | + f.write(result_schema) |
| 125 | + else: |
| 126 | + f.write("No result schema.") |
| 127 | + |
| 128 | + # Additionally store full conversion path in a separate metadata file |
| 129 | + output_metadata_filepath = os.path.join(output_folder, f"attempt_{attempt}_success_{success}_path_{conversion_path}_metadata.txt") |
| 130 | + with open(output_metadata_filepath, "w") as f: |
| 131 | + f.write(f"Full Conversion Path:\n{conversion_path_full}\n") |
| 132 | + |
| 133 | + |
| 134 | +def evaluate(): |
| 135 | + """ |
| 136 | + Runs the conversion of the golden schemas from each schema language (source) to each other schema language (target). |
| 137 | + Assumes the Schema Conversion Orchestrator (the subject to be evaluated) is already running locally. |
| 138 | +
|
| 139 | + All golden schemas are stored in a dedicated folder structure under 'golden_schemas/'. |
| 140 | + The output schemas resulting from the conversions will be stored in 'output_schemas/'. |
| 141 | +
|
| 142 | + """ |
| 143 | + print("Running evaluation...") |
| 144 | + |
| 145 | + # List of all relevant schema languages |
| 146 | + |
| 147 | + # Extract golden schema (source schema) for each schema language. |
| 148 | + golden_schemas = load_golden_schemas(SCHEMA_LANGUAGES) |
| 149 | + |
| 150 | + # Go through each combination of source language and target language |
| 151 | + results = request_conversion_results(SCHEMA_LANGUAGES, golden_schemas) |
| 152 | + |
| 153 | + # Store result schemas |
| 154 | + store_conversion_results(results) |
| 155 | + |
| 156 | + # Build conversion matrix |
| 157 | + conversion_matrix = compute_conversion_matrix(golden_schemas, results) |
| 158 | + |
| 159 | + # Plot conversion matrix |
| 160 | + plot_conversion_matrix(conversion_matrix, output_path="conversion_matrix.png") |
| 161 | + |
| 162 | + |
| 163 | +def compute_conversion_matrix(golden_schemas: dict[str, str], results: dict[str, dict[str, any]]) -> pd.DataFrame: |
| 164 | + """ |
| 165 | + Computes a matrix that shows the character lengths of the converted schemas for each source-target language pair. |
| 166 | + For a source-target pair with multiple conversion attempts, the lengths are comma-separated. |
| 167 | + For unconvertible pairs, a dash ("—") is used. |
| 168 | + For paths where source and target language are the same, the length of the original schema is used. |
| 169 | + :param golden_schemas: |
| 170 | + :param results: |
| 171 | + :return: |
| 172 | + """ |
| 173 | + matrix = pd.DataFrame(index=results.keys(), columns=results.keys()) |
| 174 | + |
| 175 | + for source_language, target_language_results in results.items(): |
| 176 | + for target_language in matrix.columns: |
| 177 | + if source_language == target_language: |
| 178 | + # length of original schema |
| 179 | + original_schema_length = len(golden_schemas[source_language]) |
| 180 | + matrix.loc[source_language, target_language] = str(original_schema_length) |
| 181 | + continue |
| 182 | + |
| 183 | + result_entries = target_language_results.get(target_language, []) |
| 184 | + if not result_entries: |
| 185 | + matrix.loc[source_language, target_language] = "—" |
| 186 | + continue |
| 187 | + |
| 188 | + lengths = [] |
| 189 | + for entry in result_entries: |
| 190 | + if not entry["success"]: |
| 191 | + continue |
| 192 | + result_schema = entry["result_schema"] |
| 193 | + if result_schema is not None: |
| 194 | + lengths.append(str(len(result_schema))) |
| 195 | + |
| 196 | + if lengths: |
| 197 | + matrix.loc[source_language, target_language] = ", ".join(lengths) |
| 198 | + else: |
| 199 | + matrix.loc[source_language, target_language] = "—" |
| 200 | + |
| 201 | + matrix.index.name = "Source Language" |
| 202 | + matrix.columns.name = "Target Language" |
| 203 | + return matrix |
| 204 | + |
| 205 | + |
| 206 | +def plot_conversion_matrix(matrix: pd.DataFrame, output_path: str = None) -> None: |
| 207 | + numeric_matrix = matrix.copy() |
| 208 | + |
| 209 | + for i in numeric_matrix.index: |
| 210 | + for j in numeric_matrix.columns: |
| 211 | + val = matrix.loc[i, j] |
| 212 | + |
| 213 | + if val == "—": |
| 214 | + numeric_matrix.loc[i, j] = 0 |
| 215 | + else: |
| 216 | + # take the minimum length if multiple attempts |
| 217 | + lengths = list(map(int, val.split(", "))) |
| 218 | + numeric_matrix.loc[i, j] = min(lengths) |
| 219 | + |
| 220 | + plt.figure(figsize=(8, 6)) |
| 221 | + sns.heatmap(numeric_matrix.astype(float), annot=matrix, fmt='', cmap="YlGnBu", cbar_kws={'label': 'Schema Length'}) |
| 222 | + plt.title("Schema Conversion Length Matrix") |
| 223 | + plt.tight_layout() |
| 224 | + |
| 225 | + if output_path: |
| 226 | + plt.savefig(output_path) |
| 227 | + else: |
| 228 | + plt.show() |
| 229 | + |
| 230 | + |
| 231 | +if __name__ == "__main__": |
| 232 | + evaluate() |
0 commit comments