|
| 1 | +""" |
| 2 | +test_Q1_quantiles_profile_adf.py — ADF ↔ dfdraw quantiles parity |
| 3 | +
|
| 4 | +Verifies ADF.draw() correctly forwards quantiles=, central=, quantile_mode= |
| 5 | +kwargs to dfdraw's profile(). No formal phase — test infrastructure only. |
| 6 | +
|
| 7 | +Q1_1: error_bars mode via ADF |
| 8 | +Q1_2: band mode via ADF |
| 9 | +Q1_3: parity — ADF path vs direct dfdraw path produce identical stats |
| 10 | +Q1_4: central='median' forwarded correctly |
| 11 | +Q1_5: group_by + quantiles via ADF |
| 12 | +""" |
| 13 | + |
| 14 | +import os |
| 15 | +import sys |
| 16 | +import pytest |
| 17 | +import numpy as np |
| 18 | +import pandas as pd |
| 19 | + |
| 20 | +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
| 21 | +from AliasDataFrame import AliasDataFrame |
| 22 | + |
| 23 | +try: |
| 24 | + import matplotlib |
| 25 | + matplotlib.use('Agg') |
| 26 | + _HAS_MPL = True |
| 27 | +except ImportError: |
| 28 | + _HAS_MPL = False |
| 29 | + |
| 30 | +try: |
| 31 | + from dfdraw import DFDraw |
| 32 | + _HAS_DFDRAW = True |
| 33 | +except ImportError: |
| 34 | + _HAS_DFDRAW = False |
| 35 | + |
| 36 | + |
| 37 | +@pytest.fixture |
| 38 | +def adf_gaussian(): |
| 39 | + """ADF with Gaussian data suitable for profile + quantiles.""" |
| 40 | + rng = np.random.default_rng(2025) |
| 41 | + n = 2000 |
| 42 | + x = rng.uniform(0, 10, n).astype(np.float32) |
| 43 | + y = 2 * x + rng.normal(0, 1, n).astype(np.float32) |
| 44 | + grp = (x // 2.5).astype(np.int8) # 4 groups |
| 45 | + df = pd.DataFrame({'x': x, 'y': y, 'grp': grp}) |
| 46 | + adf = AliasDataFrame(df) |
| 47 | + adf.add_alias('y_shifted', 'y - 5', dtype=np.float32) |
| 48 | + return adf |
| 49 | + |
| 50 | + |
| 51 | +@pytest.mark.skipif(not _HAS_MPL or not _HAS_DFDRAW, reason="Requires matplotlib + dfdraw") |
| 52 | +class TestQ1QuantilesADFPassthrough: |
| 53 | + |
| 54 | + @pytest.mark.invariance |
| 55 | + def test_Q1_1_error_bars_via_adf(self, adf_gaussian): |
| 56 | + """ADF.draw with quantiles=[0.16, 0.84] produces error_bars mode stats.""" |
| 57 | + result = adf_gaussian.draw( |
| 58 | + 'y:x', type='profile', quantiles=[0.16, 0.84]) |
| 59 | + |
| 60 | + # draw() returns fig or (fig, ax, stats) depending on version |
| 61 | + if isinstance(result, tuple): |
| 62 | + fig, ax, stats = result[0], result[1], result[2] if len(result) > 2 else {} |
| 63 | + else: |
| 64 | + stats = {} |
| 65 | + |
| 66 | + # At minimum, the call should not raise |
| 67 | + assert result is not None, "Q1_1: draw with quantiles should not fail" |
| 68 | + |
| 69 | + @pytest.mark.invariance |
| 70 | + def test_Q1_2_band_via_adf(self, adf_gaussian): |
| 71 | + """ADF.draw with quantiles=[0.16, 0.5, 0.84] produces band mode.""" |
| 72 | + result = adf_gaussian.draw( |
| 73 | + 'y:x', type='profile', quantiles=[0.16, 0.5, 0.84]) |
| 74 | + assert result is not None, "Q1_2: draw with band quantiles should not fail" |
| 75 | + |
| 76 | + @pytest.mark.invariance |
| 77 | + def test_Q1_3_parity_adf_vs_dfdraw(self, adf_gaussian): |
| 78 | + """ADF path and direct dfdraw path produce identical stats.""" |
| 79 | + adf = adf_gaussian |
| 80 | + adf.materialize_aliases(names=['y_shifted']) |
| 81 | + |
| 82 | + # ADF path |
| 83 | + result_adf = adf.draw( |
| 84 | + 'y_shifted:x', type='profile', quantiles=[0.16, 0.84]) |
| 85 | + |
| 86 | + # dfdraw direct path |
| 87 | + result_dd = DFDraw(adf.df).profile( |
| 88 | + 'y_shifted:x', quantiles=[0.16, 0.84]) |
| 89 | + |
| 90 | + # Both should succeed |
| 91 | + assert result_adf is not None, "Q1_3: ADF path failed" |
| 92 | + assert result_dd is not None, "Q1_3: dfdraw path failed" |
| 93 | + |
| 94 | + # If both return stats dicts, compare quantile values |
| 95 | + if isinstance(result_adf, tuple) and len(result_adf) > 2: |
| 96 | + stats_adf = result_adf[2] |
| 97 | + if isinstance(result_dd, tuple) and len(result_dd) > 2: |
| 98 | + stats_dd = result_dd[2] |
| 99 | + if 'q_lower_per_bin' in stats_adf and 'q_lower_per_bin' in stats_dd: |
| 100 | + np.testing.assert_allclose( |
| 101 | + stats_adf['q_lower_per_bin'], |
| 102 | + stats_dd['q_lower_per_bin'], |
| 103 | + rtol=1e-12, |
| 104 | + err_msg="Q1_3: q_lower parity broken" |
| 105 | + ) |
| 106 | + np.testing.assert_allclose( |
| 107 | + stats_adf['q_upper_per_bin'], |
| 108 | + stats_dd['q_upper_per_bin'], |
| 109 | + rtol=1e-12, |
| 110 | + err_msg="Q1_3: q_upper parity broken" |
| 111 | + ) |
| 112 | + |
| 113 | + @pytest.mark.invariance |
| 114 | + def test_Q1_4_central_median_forwarded(self, adf_gaussian): |
| 115 | + """central='median' kwarg forwarded to dfdraw.""" |
| 116 | + result = adf_gaussian.draw( |
| 117 | + 'y:x', type='profile', |
| 118 | + quantiles=[0.16, 0.5, 0.84], central='median') |
| 119 | + assert result is not None, "Q1_4: central='median' should not fail" |
| 120 | + |
| 121 | + @pytest.mark.invariance |
| 122 | + def test_Q1_5_groupby_with_quantiles(self, adf_gaussian): |
| 123 | + """group_by + quantiles via ADF — production-typical pattern.""" |
| 124 | + result = adf_gaussian.draw( |
| 125 | + 'y:x', type='profile', |
| 126 | + group_by='grp', quantiles=[0.16, 0.84]) |
| 127 | + assert result is not None, "Q1_5: group_by + quantiles should not fail" |
| 128 | + |
| 129 | + |
| 130 | +if __name__ == '__main__': |
| 131 | + pytest.main([__file__, '-v', '-s']) |
0 commit comments