|
1 | 1 | import numpy as np |
| 2 | +import pandas as pd |
2 | 3 | import pytest |
3 | 4 | from sklearn.model_selection import train_test_split |
4 | 5 |
|
@@ -1578,6 +1579,29 @@ def test_cloglog_ordinal_bart_with_gfr(self): |
1578 | 1579 | assert bart_model.y_hat_test.shape == (n_test, num_mcmc) |
1579 | 1580 | assert bart_model.cloglog_cutpoint_samples.shape == (2, num_mcmc) |
1580 | 1581 |
|
| 1582 | + def test_categorical_covariates_mean_only(self): |
| 1583 | + """A mean-only BART model with categorical (one-hot expanded) covariates |
| 1584 | + must sample and predict without error. |
| 1585 | +
|
| 1586 | + Regression test: the "zero out excluded variable weights" step ran |
| 1587 | + outside the include_*_forest guards, so variable_weights_variance was |
| 1588 | + never expanded to the processed (one-hot) length for a model without a |
| 1589 | + variance forest, and indexing it raised an IndexError. |
| 1590 | + """ |
| 1591 | + rng = np.random.default_rng(0) |
| 1592 | + n = 100 |
| 1593 | + X_num = rng.uniform(0, 1, (n, 3)) |
| 1594 | + X = pd.DataFrame(X_num, columns=["a", "b", "c"]) |
| 1595 | + X["cat"] = pd.Categorical(rng.choice(["x", "y", "z"], size=n)) |
| 1596 | + y = X_num[:, 0] + rng.normal(scale=0.5, size=n) |
| 1597 | + |
| 1598 | + model = BARTModel() |
| 1599 | + # Mean forest only (no variance forest) is the failing configuration. |
| 1600 | + model.sample(X_train=X, y_train=y, num_gfr=0, num_burnin=0, num_mcmc=5) |
| 1601 | + |
| 1602 | + preds = model.predict(X) |
| 1603 | + assert preds["y_hat"].shape[0] == n |
| 1604 | + |
1581 | 1605 |
|
1582 | 1606 | class TestBARTFloat32: |
1583 | 1607 | """Tests that float32 inputs are accepted and produce valid results (GH #389).""" |
|
0 commit comments