|
14 | 14 |
|
15 | 15 | import logging |
16 | 16 | import time |
| 17 | +import numpy as np |
17 | 18 | from abc import ABC |
18 | 19 | from rdflib import RDF, RDFS, OWL |
19 | 20 | from collections import defaultdict |
@@ -186,6 +187,56 @@ def compute_topology_metrics(ontology: BaseOntology) -> TopologyMetrics: |
186 | 187 |
|
187 | 188 | return metrics |
188 | 189 |
|
| 190 | + @staticmethod |
| 191 | + def compute_complexity_score( |
| 192 | + topology_metrics: TopologyMetrics, |
| 193 | + dataset_metrics: DatasetMetrics, |
| 194 | + a: float = 0.4, |
| 195 | + b: float = 6.0, |
| 196 | + eps: float = 1e-12 |
| 197 | + ) -> float: |
| 198 | + """ |
| 199 | + Compute a single normalized complexity score for an ontology. |
| 200 | +
|
| 201 | + This function combines structural topology metrics and dataset quality metrics |
| 202 | + into a weighted aggregate score, then applies a logistic transformation to |
| 203 | + normalize it to the range [0, 1]. The score reflects overall ontology complexity, |
| 204 | + considering graph structure, hierarchy, breadth, coverage, and dataset richness. |
| 205 | +
|
| 206 | + Args: |
| 207 | + topology_metrics (TopologyMetrics): Precomputed structural metrics of the ontology graph. |
| 208 | + dataset_metrics (DatasetMetrics): Precomputed metrics of extracted learning datasets. |
| 209 | + a (float, optional): Steepness parameter for the logistic normalization function. Default is 0.4. |
| 210 | + b (float, optional): Centering parameter for the logistic function, should be tuned to match the scale of aggregated metrics. Default is 6.0. |
| 211 | + eps (float, optional): Small epsilon to prevent numerical issues in logistic computation. Default is 1e-12. |
| 212 | +
|
| 213 | + Returns: |
| 214 | + float: Normalized complexity score in [0, 1], where higher values indicate more complex ontologies. |
| 215 | +
|
| 216 | + Notes: |
| 217 | + - Weights are assigned to different metric categories: graph metrics, coverage metrics, hierarchy metrics, |
| 218 | + breadth metrics, and dataset metrics (term-types, taxonomic, non-taxonomic relations). |
| 219 | + - Metrics are log-normalized before weighting to reduce scale differences. |
| 220 | + - The logistic transformation ensures the final score is bounded and interpretable. |
| 221 | + """ |
| 222 | + # Define metric categories with their weights |
| 223 | + metric_categories = { |
| 224 | + 0.3: ["total_nodes", "total_edges", "num_root_nodes", "num_leaf_nodes"], |
| 225 | + 0.25: ["num_classes", "num_properties", "num_individuals"], |
| 226 | + 0.10: ["max_depth", "min_depth", "avg_depth", "depth_variance"], |
| 227 | + 0.20: ["max_breadth", "min_breadth", "avg_breadth", "breadth_variance"], |
| 228 | + 0.15: ["num_term_types", "num_taxonomic_relations", "num_non_taxonomic_relations", "avg_terms"] |
| 229 | + } |
| 230 | + weights = {metric: weight for weight, metrics in metric_categories.items() for metric in metrics} |
| 231 | + metrics = [metric for _, metric_list in metric_categories.items() for metric in metric_list] |
| 232 | + onto_metrics = {**topology_metrics.__dict__, **dataset_metrics.__dict__} |
| 233 | + norm_weighted_values = [np.log1p(onto_metrics[m]) * weights[m] for m in metrics if m in onto_metrics] |
| 234 | + total_weight = sum(weights[m] for m in metrics if m in onto_metrics) |
| 235 | + weighted_sum = sum(norm_weighted_values) / total_weight if total_weight > 0 else 0.0 |
| 236 | + complexity_score = 1.0 / (1.0 + np.exp(-a * (weighted_sum - b) + eps)) |
| 237 | + return complexity_score |
| 238 | + |
| 239 | + |
189 | 240 | @staticmethod |
190 | 241 | def compute_dataset_metrics(ontology: BaseOntology) -> DatasetMetrics: |
191 | 242 | """ |
|
0 commit comments