Skip to content

Commit 452cf48

Browse files
committed
Add structured AutoML report API for LLM-friendly output (#807)
1 parent 0edc04f commit 452cf48

7 files changed

Lines changed: 764 additions & 3 deletions

File tree

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import os
2+
3+
from sklearn.datasets import make_classification
4+
5+
from supervised import AutoML
6+
7+
8+
def main():
9+
X, y = make_classification(
10+
n_samples=300,
11+
n_features=12,
12+
n_informative=6,
13+
n_redundant=2,
14+
random_state=123,
15+
)
16+
17+
results_path = "AutoML_report_structured_classification"
18+
automl = AutoML(
19+
mode="Explain",
20+
total_time_limit=300,
21+
results_path=results_path,
22+
random_state=123,
23+
verbose=0,
24+
)
25+
automl.fit(X, y)
26+
27+
print("\n=== report_structured(model_details=False) ===\n")
28+
print(automl.report_structured(model_details=False))
29+
30+
print("\n=== report_structured(model_details=True) ===\n")
31+
print(automl.report_structured(model_details=True))
32+
33+
payload = automl.report_structured(format="dict", model_details=False)
34+
print("\nTop-level keys:", sorted(payload.keys()))
35+
print("Number of models in report:", len(payload.get("models", [])))
36+
37+
report_path = os.path.join(results_path, "report_structured.json")
38+
print("Structured report JSON:", report_path)
39+
print("Exists:", os.path.exists(report_path))
40+
41+
42+
if __name__ == "__main__":
43+
main()
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
import os
2+
3+
import pandas as pd
4+
from sklearn.datasets import make_classification
5+
6+
from supervised import AutoML
7+
8+
9+
def main():
10+
X, y = make_classification(
11+
n_samples=300,
12+
n_features=10,
13+
n_informative=5,
14+
n_redundant=1,
15+
random_state=123,
16+
)
17+
18+
# Construct two categorical sensitive features.
19+
sensitive_features = pd.DataFrame(
20+
{
21+
"gender": ["female" if i % 2 == 0 else "male" for i in range(len(y))],
22+
"group": ["A" if i % 3 == 0 else "B" for i in range(len(y))],
23+
}
24+
)
25+
26+
results_path = "AutoML_report_structured_fairness"
27+
automl = AutoML(
28+
mode="Explain",
29+
total_time_limit=300,
30+
fairness_metric="demographic_parity_ratio",
31+
fairness_threshold=0.8,
32+
privileged_groups=[{"gender": "male"}],
33+
underprivileged_groups=[{"gender": "female"}],
34+
results_path=results_path,
35+
random_state=123,
36+
verbose=0,
37+
)
38+
automl.fit(X, y, sensitive_features=sensitive_features)
39+
40+
print("\n=== report_structured(model_details=False) ===\n")
41+
print(automl.report_structured(model_details=False))
42+
43+
print("\n=== report_structured(model_details=True) ===\n")
44+
print(automl.report_structured(model_details=True))
45+
46+
payload = automl.report_structured(format="dict", model_details=False)
47+
print("\nTop-level keys:", sorted(payload.keys()))
48+
print("Number of models in report:", len(payload.get("models", [])))
49+
print("Fairness summary available:", payload.get("fairness_summary") is not None)
50+
51+
report_path = os.path.join(results_path, "report_structured.json")
52+
print("Structured report JSON:", report_path)
53+
print("Exists:", os.path.exists(report_path))
54+
55+
56+
if __name__ == "__main__":
57+
main()
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import os
2+
3+
from sklearn.datasets import make_regression
4+
5+
from supervised import AutoML
6+
7+
8+
def main():
9+
X, y = make_regression(
10+
n_samples=300,
11+
n_features=12,
12+
n_informative=8,
13+
noise=2.0,
14+
random_state=123,
15+
)
16+
17+
results_path = "AutoML_report_structured_regression"
18+
automl = AutoML(
19+
mode="Explain",
20+
total_time_limit=300,
21+
results_path=results_path,
22+
random_state=123,
23+
verbose=0,
24+
)
25+
automl.fit(X, y)
26+
27+
print("\n=== report_structured(model_details=False) ===\n")
28+
print(automl.report_structured(model_details=False))
29+
30+
print("\n=== report_structured(model_details=True) ===\n")
31+
print(automl.report_structured(model_details=True))
32+
33+
payload = automl.report_structured(format="dict", model_details=False)
34+
print("\nTop-level keys:", sorted(payload.keys()))
35+
print("Number of models in report:", len(payload.get("models", [])))
36+
37+
report_path = os.path.join(results_path, "report_structured.json")
38+
print("Structured report JSON:", report_path)
39+
print("Exists:", os.path.exists(report_path))
40+
41+
42+
if __name__ == "__main__":
43+
main()

supervised/automl.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,9 @@ def score(
535535
def report(self, width=900, height=1200):
536536
return self._report(width, height)
537537

538+
def report_structured(self, format="markdown", model_details=True):
539+
return self._report_structured(format, model_details)
540+
538541
def need_retrain(
539542
self,
540543
X: Union[numpy.ndarray, pandas.DataFrame],

supervised/base_automl.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,11 @@
4646
from supervised.utils.jsonencoder import MLJSONEncoder
4747
from supervised.utils.leaderboard_plots import LeaderboardPlots
4848
from supervised.utils.metric import Metric, UserDefinedEvalMetric
49+
from supervised.utils.report_structured import (
50+
build_structured_report,
51+
save_structured_report,
52+
to_markdown,
53+
)
4954
from supervised.utils.utils import dump_data, load_data
5055

5156
logger = logging.getLogger(__name__)
@@ -2475,6 +2480,34 @@ def _report(self, width=900, height=1200):
24752480

24762481
return self._show_report(main_readme_html, width, height)
24772482

2483+
def _report_structured(self, format="markdown", model_details=True):
2484+
self._results_path = self._get_results_path()
2485+
if self._fit_level != "finished":
2486+
self.load(self._results_path)
2487+
elif self._models is None or len(self._models) == 0:
2488+
# Handle objects where fit() returned early because results already exist.
2489+
# In that case, fit_level can be "finished" but models might not be loaded.
2490+
self.load(self._results_path)
2491+
2492+
if self._models is None or len(self._models) == 0:
2493+
raise AutoMLException(
2494+
"This model has not been fitted yet. Please call `fit()` first."
2495+
)
2496+
2497+
if format not in ["markdown", "dict", "json"]:
2498+
raise ValueError(
2499+
f"Wrong format '{format}'. Allowed formats are: markdown, dict, json."
2500+
)
2501+
2502+
payload = build_structured_report(self)
2503+
save_structured_report(payload, self._results_path)
2504+
2505+
if format == "dict":
2506+
return payload
2507+
if format == "json":
2508+
return json.dumps(payload, indent=4)
2509+
return to_markdown(payload, model_details)
2510+
24782511
def _need_retrain(self, X, y, sample_weight, decrease):
24792512
metric = self._best_model.get_metric()
24802513

0 commit comments

Comments
 (0)