sbi-dev
diff --git a/‎docs/advanced_tutorials/13_diagnostics_lc2st.ipynb‎
Lines changed: 25 additions & 29 deletions b/‎docs/advanced_tutorials/13_diagnostics_lc2st.ipynb‎
Lines changed: 25 additions & 29 deletions
diff --git a/‎docs/how_to_guide/13_diagnostics_lc2st.ipynb‎
Lines changed: 8 additions & 6 deletions b/‎docs/how_to_guide/13_diagnostics_lc2st.ipynb‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎sbi/diagnostics/__init__.py‎
Lines changed: 4 additions & 1 deletion b/‎sbi/diagnostics/__init__.py‎
Lines changed: 4 additions & 1 deletion
@@ -252,7 +252,7 @@
    "source": [
     "# set up the LC2ST: train the classifiers\n",
     "lc2st = LC2ST(\n",
-    "    thetas=theta_cal,\n",
+    "    prior_samples=theta_cal,\n",
     "    xs=x_cal,\n",
     "    posterior_samples=post_samples_cal,\n",
     "    classifier=\"mlp\",\n",
@@ -301,20 +301,21 @@
    "source": [
     "fig, axes = plt.subplots(1,len(thetas_star), figsize=(12,3))\n",
     "for i in range(len(thetas_star)):\n",
-    "    probs, scores = lc2st.get_scores(\n",
+    "    lc2st_scores = lc2st.get_scores(\n",
     "        theta_o=post_samples_star[i],\n",
     "        x_o=xs_star[i],\n",
-    "        return_probs=True,\n",
     "        trained_clfs=lc2st.trained_clfs\n",
     "    )\n",
+    "    probs = lc2st_scores.probabilities\n",
+    "    scores = lc2st_scores.scores\n",
     "    T_data = lc2st.get_statistic_on_observed_data(\n",
     "        theta_o=post_samples_star[i],\n",
     "        x_o=xs_star[i]\n",
     "    )\n",
     "    T_null = lc2st.get_statistics_under_null_hypothesis(\n",
     "        theta_o=post_samples_star[i],\n",
     "        x_o=xs_star[i]\n",
-    "    )\n",
+    "    ).scores\n",
     "    p_value = lc2st.p_value(post_samples_star[i], xs_star[i])\n",
     "    reject = lc2st.reject_test(post_samples_star[i], xs_star[i], alpha=conf_alpha)\n",
     "\n",
@@ -383,17 +384,15 @@
     "\n",
     "fig, axes = plt.subplots(1,len(thetas_star), figsize=(12,3))\n",
     "for i in range(len(thetas_star)):\n",
-    "    probs_data, _ = lc2st.get_scores(\n",
+    "    probs_data = lc2st.get_scores(\n",
     "        theta_o=post_samples_star[i],\n",
     "        x_o=xs_star[i],\n",
-    "        return_probs=True,\n",
     "        trained_clfs=lc2st.trained_clfs\n",
-    "    )\n",
-    "    probs_null, _ = lc2st.get_statistics_under_null_hypothesis(\n",
+    "    ).probabilities\n",
+    "    probs_null = lc2st.get_statistics_under_null_hypothesis(\n",
     "        theta_o=post_samples_star[i],\n",
-    "        x_o=xs_star[i],\n",
-    "        return_probs=True\n",
-    "    )\n",
+    "        x_o=xs_star[i]\n",
+    "    ).probabilities\n",
     "\n",
     "    pp_plot_lc2st(\n",
     "        probs=[probs_data],\n",
@@ -451,12 +450,11 @@
     "\n",
     "fig, axes = plt.subplots(len(thetas_star), 3, figsize=(9,6), constrained_layout=True)\n",
     "for i in range(len(thetas_star)):\n",
-    "    probs_data, _ = lc2st.get_scores(\n",
+    "    probs_data = lc2st.get_scores(\n",
     "        theta_o=post_samples_star[i][:1000],\n",
     "        x_o=xs_star[i],\n",
-    "        return_probs=True,\n",
     "        trained_clfs=lc2st.trained_clfs\n",
-    "    )\n",
+    "    ).probabilities\n",
     "    dict_probs_marginals = get_probs_per_marginal(\n",
     "        probs_data[0],\n",
     "        post_samples_star[i][:1000].numpy()\n",
@@ -543,7 +541,7 @@
     "For different classifier architectures, you should choose the one with the smallest variance. \n",
     "\n",
     "### Number of calibration samples\n",
-    "A similar check can also be performed via cross-validation: set the `num_folds` parameter of your `LC2ST` object, train on observed data and call `lc2st.get_scores(theta_o, x_o, lc2st.trained_clfs)`. This outputs the test statistics obtained for each cv-fold. You should choose the smallest calibration set size that gives you a small enough variance over the test statistics. \n",
+    "A similar check can also be performed via cross-validation: set the `num_folds` parameter of your `LC2ST` object, train on observed data and call `lc2st.get_scores(theta_o, x_o, lc2st.trained_clfs)`. This returns an `LC2STScores` object with the test statistics (`.scores`) for each cv-fold. You should choose the smallest calibration set size that gives you a small enough variance over the test statistics. \n",
     "\n",
     "> Note: Ideally, these checks should be performed in a **separable data setting**, i.e. for a dataset `theta_o, x_o` coming from a sub-optimal estimator: the classifier is supposed to be able to discriminate between the two classes; the test is supposed to be rejected; the variance is supposed to be small. In other words, we are ensuring a **high statistical power** (our true positive rate) of our test. If you want to be really rigurous, you should also check the type I error (or false positive rate), that should be controlled by the significance level of your test (cf. Figure 2 in [[Linhart et al., 2023]](https://arxiv.org/abs/2306.03580)).\n",
     "\n",
@@ -612,7 +610,7 @@
     ") # same as npe.net._distribution\n",
     "\n",
     "lc2st_nf = LC2ST_NF(\n",
-    "    thetas=theta_cal,\n",
+    "    prior_samples=theta_cal,\n",
     "    xs=x_cal,\n",
     "    posterior_samples=post_samples_cal,\n",
     "    flow_inverse_transform=flow_inverse_transform,\n",
@@ -660,13 +658,14 @@
    "source": [
     "fig, axes = plt.subplots(1,len(thetas_star), figsize=(12,3))\n",
     "for i in range(len(thetas_star)):\n",
-    "    probs, scores = lc2st_nf.get_scores(\n",
+    "    lc2st_scores = lc2st_nf.get_scores(\n",
     "        x_o=xs_star[i],\n",
-    "        return_probs=True,\n",
     "        trained_clfs=lc2st_nf.trained_clfs\n",
     "    )\n",
+    "    probs = lc2st_scores.probabilities\n",
+    "    scores = lc2st_scores.scores\n",
     "    T_data = lc2st_nf.get_statistic_on_observed_data(x_o=xs_star[i])\n",
-    "    T_null = lc2st_nf.get_statistics_under_null_hypothesis(x_o=xs_star[i])\n",
+    "    T_null = lc2st_nf.get_statistics_under_null_hypothesis(x_o=xs_star[i]).scores\n",
     "    p_value = lc2st_nf.p_value(xs_star[i])\n",
     "    reject = lc2st_nf.reject_test(xs_star[i], alpha=conf_alpha)\n",
     "\n",
@@ -731,15 +730,13 @@
     "\n",
     "fig, axes = plt.subplots(1,len(thetas_star), figsize=(12,3))\n",
     "for i in range(len(thetas_star)):\n",
-    "    probs_data, _ = lc2st_nf.get_scores(\n",
+    "    probs_data = lc2st_nf.get_scores(\n",
     "        x_o=xs_star[i],\n",
-    "        return_probs=True,\n",
     "        trained_clfs=lc2st_nf.trained_clfs\n",
-    "    )\n",
-    "    probs_null, _ = lc2st_nf.get_statistics_under_null_hypothesis(\n",
-    "        x_o=xs_star[i],\n",
-    "        return_probs=True\n",
-    "    )\n",
+    "    ).probabilities\n",
+    "    probs_null = lc2st_nf.get_statistics_under_null_hypothesis(\n",
+    "        x_o=xs_star[i]\n",
+    "    ).probabilities\n",
     "\n",
     "    pp_plot_lc2st(\n",
     "        probs=[probs_data],\n",
@@ -791,11 +788,10 @@
     "    inv_ref_samples = lc2st_nf.flow_inverse_transform(\n",
     "        ref_samples_star[i], xs_star[i]\n",
     "    ).detach()\n",
-    "    probs_data, _ = lc2st_nf.get_scores(\n",
+    "    probs_data = lc2st_nf.get_scores(\n",
     "        x_o=xs_star[i],\n",
-    "        return_probs=True,\n",
     "        trained_clfs=lc2st_nf.trained_clfs\n",
-    "    )\n",
+    "    ).probabilities\n",
     "    marginal_probs = get_probs_per_marginal(\n",
     "        probs_data[0],\n",
     "        lc2st_nf.theta_o.numpy()\n",
 
@@ -39,7 +39,7 @@
     "\n",
     "# Train the L-C2ST classifier.\n",
     "lc2st = LC2ST(\n",
-    "    thetas=prior_samples,\n",
+    "    prior_samples=prior_samples,\n",
     "    xs=prior_predictives,\n",
     "    posterior_samples=post_samples_cal,\n",
     "    classifier=\"mlp\",\n",
@@ -50,17 +50,19 @@
     "\n",
     "# Note: x_o must have a batch-dimension. I.e. `x_o.shape == (1, observation_shape)`.\n",
     "post_samples_star = posterior.sample((10_000,), x=x_o)\n",
-    "probs_data, scores_data = lc2st.get_scores(\n",
+    "scores_data = lc2st.get_scores(\n",
     "    theta_o=post_samples_star,\n",
     "    x_o=x_o,\n",
-    "    return_probs=True,\n",
     "    trained_clfs=lc2st.trained_clfs\n",
     ")\n",
-    "probs_null, scores_null = lc2st.get_statistics_under_null_hypothesis(\n",
+    "probs_data = scores_data.probabilities\n",
+    "scores_data = scores_data.scores\n",
+    "scores_null = lc2st.get_statistics_under_null_hypothesis(\n",
     "    theta_o=post_samples_star,\n",
     "    x_o=x_o,\n",
-    "    return_probs=True,\n",
     ")\n",
+    "probs_null = scores_null.probabilities\n",
+    "scores_null = scores_null.scores\n",
     "\n",
     "conf_alpha = 0.05\n",
     "p_value = lc2st.p_value(post_samples_star, torch.as_tensor(x_o).unsqueeze(0))\n",
@@ -94,7 +96,7 @@
    "source": [
     "If the red line is outside of the two dotted black lines (as above), then L-C2ST rejects the null-hypothesis that the approximate posterior matches the true posterior (i.e., your posterior is likely wrong).\n",
     "\n",
-    "If the posterior is wrong, then you can get insights into whether the posterior is under- or over-confident as follows:"
+    "If the posterior is wrong, then you can get insights into whether the posterior is under- or over-confident as follows. The call above returns an `LC2STScores` object; use `.probabilities` for the classifier probabilities and `.scores` for the test statistics."
    ]
   },
   {
 
@@ -1,7 +1,7 @@
 # This file is part of sbi, a toolkit for simulation-based inference. sbi is licensed
 # under the Apache License Version 2.0, see <https://www.apache.org/licenses/>
 
-from sbi.diagnostics.lc2st import LC2ST
+from sbi.diagnostics.lc2st import LC2ST, LC2ST_NF, LC2STScores, LC2STState
 from sbi.diagnostics.misspecification import (
     calc_misspecification_logprob,
     calc_misspecification_mmd,
@@ -16,6 +16,9 @@
     "check_tarp",
     "run_tarp",
     "LC2ST",
+    "LC2ST_NF",
+    "LC2STScores",
+    "LC2STState",
     "calc_misspecification_logprob",
     "calc_misspecification_mmd",
 ]