Skip to content

Commit 25b456b

Browse files
committed
✨ add complexity score to analyzer
1 parent 5c00677 commit 25b456b

1 file changed

Lines changed: 51 additions & 0 deletions

File tree

ontolearner/tools/analyzer.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
import logging
1616
import time
17+
import numpy as np
1718
from abc import ABC
1819
from rdflib import RDF, RDFS, OWL
1920
from collections import defaultdict
@@ -186,6 +187,56 @@ def compute_topology_metrics(ontology: BaseOntology) -> TopologyMetrics:
186187

187188
return metrics
188189

190+
@staticmethod
191+
def compute_complexity_score(
192+
topology_metrics: TopologyMetrics,
193+
dataset_metrics: DatasetMetrics,
194+
a: float = 0.4,
195+
b: float = 6.0,
196+
eps: float = 1e-12
197+
) -> float:
198+
"""
199+
Compute a single normalized complexity score for an ontology.
200+
201+
This function combines structural topology metrics and dataset quality metrics
202+
into a weighted aggregate score, then applies a logistic transformation to
203+
normalize it to the range [0, 1]. The score reflects overall ontology complexity,
204+
considering graph structure, hierarchy, breadth, coverage, and dataset richness.
205+
206+
Args:
207+
topology_metrics (TopologyMetrics): Precomputed structural metrics of the ontology graph.
208+
dataset_metrics (DatasetMetrics): Precomputed metrics of extracted learning datasets.
209+
a (float, optional): Steepness parameter for the logistic normalization function. Default is 0.4.
210+
b (float, optional): Centering parameter for the logistic function, should be tuned to match the scale of aggregated metrics. Default is 6.0.
211+
eps (float, optional): Small epsilon to prevent numerical issues in logistic computation. Default is 1e-12.
212+
213+
Returns:
214+
float: Normalized complexity score in [0, 1], where higher values indicate more complex ontologies.
215+
216+
Notes:
217+
- Weights are assigned to different metric categories: graph metrics, coverage metrics, hierarchy metrics,
218+
breadth metrics, and dataset metrics (term-types, taxonomic, non-taxonomic relations).
219+
- Metrics are log-normalized before weighting to reduce scale differences.
220+
- The logistic transformation ensures the final score is bounded and interpretable.
221+
"""
222+
# Define metric categories with their weights
223+
metric_categories = {
224+
0.3: ["total_nodes", "total_edges", "num_root_nodes", "num_leaf_nodes"],
225+
0.25: ["num_classes", "num_properties", "num_individuals"],
226+
0.10: ["max_depth", "min_depth", "avg_depth", "depth_variance"],
227+
0.20: ["max_breadth", "min_breadth", "avg_breadth", "breadth_variance"],
228+
0.15: ["num_term_types", "num_taxonomic_relations", "num_non_taxonomic_relations", "avg_terms"]
229+
}
230+
weights = {metric: weight for weight, metrics in metric_categories.items() for metric in metrics}
231+
metrics = [metric for _, metric_list in metric_categories.items() for metric in metric_list]
232+
onto_metrics = {**topology_metrics.__dict__, **dataset_metrics.__dict__}
233+
norm_weighted_values = [np.log1p(onto_metrics[m]) * weights[m] for m in metrics if m in onto_metrics]
234+
total_weight = sum(weights[m] for m in metrics if m in onto_metrics)
235+
weighted_sum = sum(norm_weighted_values) / total_weight if total_weight > 0 else 0.0
236+
complexity_score = 1.0 / (1.0 + np.exp(-a * (weighted_sum - b) + eps))
237+
return complexity_score
238+
239+
189240
@staticmethod
190241
def compute_dataset_metrics(ontology: BaseOntology) -> DatasetMetrics:
191242
"""

0 commit comments

Comments
 (0)