Skip to content

Commit a9798e6

Browse files
committed
feat(computations): add new discrete distribution CDF option
1 parent 96faae2 commit a9798e6

2 files changed

Lines changed: 290 additions & 22 deletions

File tree

src/pysatl_core/distributions/computations/discrete.py

Lines changed: 100 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,18 @@
77
Option taxonomy used here
88
-------------------------
99
``CharacteristicOption``
10-
None of the current discrete fitters have characteristic options because
11-
the discrete support fully determines the characteristic domain.
10+
* ``_fit_pmf_to_cdf_1D``, ``_fit_ppf_to_cdf_1D``: ``right_closed`` —
11+
controls the CDF convention:
12+
13+
* ``True`` (default): right-closed, standard convention
14+
``F(x) = P(ξ ≤ x)``.
15+
* ``False``: right-open convention ``F⁻(x) = P(ξ < x)``.
16+
17+
The right-open form is useful when computing the CDF of ``-ξ``:
18+
``P(-ξ ≤ -x) = P(ξ ≥ x) = 1 - P(ξ < x) = 1 - F⁻(x)``.
19+
20+
Because this option changes the *meaning* of the result it is a
21+
``CharacteristicOption`` and is encoded into the cache key.
1222
1323
``ComputationOption``
1424
* ``_fit_ppf_to_cdf_1D``: ``n_q_grid`` — grid resolution for probing the
@@ -22,7 +32,7 @@
2232
__copyright__ = "Copyright (c) 2025 PySATL project"
2333
__license__ = "SPDX-License-Identifier: MIT"
2434

25-
from typing import TYPE_CHECKING, Any
35+
from typing import TYPE_CHECKING, Any, Literal
2636

2737
import numpy as np
2838

@@ -34,7 +44,7 @@
3444
)
3545
from pysatl_core.distributions.computations.computation import FittedComputationMethod
3646
from pysatl_core.distributions.computations.descriptors import FitterDescriptor
37-
from pysatl_core.distributions.computations.options import ComputationOption
47+
from pysatl_core.distributions.computations.options import CharacteristicOption, ComputationOption
3848
from pysatl_core.distributions.support import (
3949
DiscreteSupport,
4050
IntegerLatticeDiscreteSupport,
@@ -57,6 +67,7 @@ def _fit_pmf_to_cdf_1D(
5767
distribution: Distribution,
5868
/,
5969
eps: float = 1e-12,
70+
right_closed: bool = True,
6071
) -> FittedComputationMethod[NumericArray, NumericArray]:
6172
"""
6273
Fit a ``pmf -> cdf`` conversion for discrete distributions.
@@ -75,6 +86,15 @@ def _fit_pmf_to_cdf_1D(
7586
* **Left-bounded, right-unbounded**: the upward walk (via mirroring)
7687
continues while ``1 - cumulative_sum >= eps``; once the remaining
7788
right-tail probability falls below *eps* it is considered negligible.
89+
right_closed : bool, default True
90+
*(Characteristic option)* CDF convention:
91+
92+
* ``True``: right-closed ``F(x) = P(ξ ≤ x)`` (standard).
93+
* ``False``: right-open ``F⁻(x) = P(ξ < x)``.
94+
95+
The right-open form satisfies
96+
``1 - F⁻(x) = P(ξ ≥ x)``, which is needed when computing the CDF
97+
of ``-ξ``.
7898
7999
Returns
80100
-------
@@ -86,8 +106,8 @@ def _fit_pmf_to_cdf_1D(
86106
If the support is missing, empty, or a two-sided infinite lattice.
87107
"""
88108
support = _require_discrete_support(distribution, "pmf->cdf")
89-
90109
pmf_func = resolve(distribution, CharacteristicName.PMF)
110+
side: Literal["left", "right"] = "right" if right_closed else "left"
91111

92112
if (
93113
isinstance(support, IntegerLatticeDiscreteSupport)
@@ -99,9 +119,12 @@ def _fit_pmf_to_cdf_1D(
99119

100120
def _cdf_tail(x: NumericArray, **options: Any) -> NumericArray:
101121
x_arr = np.atleast_1d(np.asarray(x, dtype=float))
102-
idx = np.searchsorted(xs, x_arr, side="right")
103-
result = np.clip(1.0 - tail_from[idx], 0.0, 1.0)
104-
result[x_arr >= max_point] = 1.0
122+
idx = np.searchsorted(xs, x_arr, side=side)
123+
result: NumericArray = np.clip(np.asarray(1.0 - tail_from[idx], dtype=float), 0.0, 1.0)
124+
if right_closed:
125+
result[x_arr >= max_point] = 1.0
126+
else:
127+
result[x_arr > max_point] = 1.0
105128
return result
106129

107130
return FittedComputationMethod[NumericArray, NumericArray](
@@ -121,12 +144,12 @@ def _cdf_tail(x: NumericArray, **options: Any) -> NumericArray:
121144
def _cdf_head(x: NumericArray, **options: Any) -> NumericArray:
122145
x_arr = np.atleast_1d(np.asarray(x, dtype=float))
123146
result = np.empty_like(x_arr)
124-
below = x_arr < min_point
147+
below = x_arr < min_point if right_closed else x_arr <= min_point
125148
result[below] = 0.0
126149
if xs.size == 0:
127150
result[~below] = 0.0
128151
return result
129-
idx = np.searchsorted(xs, x_arr[~below], side="right") - 1
152+
idx = np.searchsorted(xs, x_arr[~below], side=side) - 1
130153
idx = np.clip(idx, 0, cdf_at.size - 1)
131154
result[~below] = cdf_at[idx]
132155
return result
@@ -157,7 +180,7 @@ def _cdf_head(x: NumericArray, **options: Any) -> NumericArray:
157180

158181
def _cdf(x: NumericArray, **options: Any) -> NumericArray:
159182
x_arr = np.atleast_1d(np.asarray(x, dtype=float))
160-
idx = np.searchsorted(xs, x_arr, side="right") - 1
183+
idx = np.searchsorted(xs, x_arr, side=side) - 1
161184
result = np.where(idx < 0, 0.0, cdf_vals[np.clip(idx, 0, cdf_vals.size - 1)])
162185
return result
163186

@@ -174,7 +197,19 @@ def _build_pmf_to_cdf_1D() -> FitterDescriptor:
174197
target=CharacteristicName.CDF,
175198
sources=[CharacteristicName.PMF],
176199
fitter=_fit_pmf_to_cdf_1D,
177-
characteristic_options=(),
200+
characteristic_options=(
201+
CharacteristicOption(
202+
name="right_closed",
203+
type=bool,
204+
default=True,
205+
description=(
206+
"CDF convention. True (default): right-closed F(x) = P(ξ ≤ x). "
207+
"False: right-open F⁻(x) = P(ξ < x). "
208+
"The right-open form satisfies 1 - F⁻(x) = P(ξ ≥ x), "
209+
"which is needed when computing the CDF of -ξ."
210+
),
211+
),
212+
),
178213
computation_options=(
179214
ComputationOption(
180215
name="eps",
@@ -195,7 +230,8 @@ def _build_pmf_to_cdf_1D() -> FitterDescriptor:
195230
constraint_tags=frozenset({"discrete", "univariate"}),
196231
description=(
197232
"PMF -> CDF via prefix-sum (finite support) or tail summation "
198-
"(left-unbounded or right-unbounded)."
233+
"(left-unbounded or right-unbounded). Supports right-closed and "
234+
"right-open CDF conventions via the ``right_closed`` characteristic option."
199235
),
200236
)
201237

@@ -350,6 +386,7 @@ def _fit_ppf_to_cdf_1D(
350386
distribution: Distribution,
351387
/,
352388
n_q_grid: int = 4096,
389+
right_closed: bool = True,
353390
) -> FittedComputationMethod[NumericArray, NumericArray]:
354391
"""
355392
Fit a ``ppf -> cdf`` conversion for discrete distributions.
@@ -362,6 +399,15 @@ def _fit_ppf_to_cdf_1D(
362399
*(Computation option)* Grid resolution for probing the PPF at
363400
fit-time. Increase if the distribution has many closely-spaced
364401
support points.
402+
right_closed : bool, default True
403+
*(Characteristic option)* CDF convention:
404+
405+
* ``True``: right-closed ``F(x) = P(ξ ≤ x)`` (standard).
406+
* ``False``: right-open ``F⁻(x) = P(ξ < x)``.
407+
408+
The right-open form satisfies
409+
``1 - F⁻(x) = P(ξ ≥ x)``, which is needed when computing the CDF
410+
of ``-ξ``.
365411
366412
Returns
367413
-------
@@ -384,9 +430,17 @@ def _fit_ppf_to_cdf_1D(
384430
right_idx[:-1] = change_idx[1:] - 1
385431
right_idx[-1] = n_q_grid - 1
386432

387-
cdf_table = q_grid[right_idx]
388-
cdf_table = np.clip(cdf_table, 0.0, 1.0)
389-
np.maximum.accumulate(cdf_table, out=cdf_table)
433+
left_idx = change_idx.copy()
434+
cdf_table_closed = np.clip(q_grid[right_idx], 0.0, 1.0)
435+
cdf_table_open = np.clip(
436+
np.concatenate([[0.0], q_grid[left_idx[1:] - 1]]),
437+
0.0,
438+
1.0,
439+
)
440+
np.maximum.accumulate(cdf_table_closed, out=cdf_table_closed)
441+
np.maximum.accumulate(cdf_table_open, out=cdf_table_open)
442+
443+
cdf_table = cdf_table_closed if right_closed else cdf_table_open
390444

391445
x_min = float(xs_table[0])
392446
x_max = float(xs_table[-1])
@@ -395,16 +449,25 @@ def _cdf(x: NumericArray, **options: Any) -> NumericArray:
395449
x_arr = np.atleast_1d(np.asarray(x, dtype=float))
396450
result = np.empty_like(x_arr)
397451

398-
left_mask = x_arr < x_min
399-
right_mask = x_arr >= x_max
452+
if right_closed:
453+
left_mask = x_arr < x_min
454+
right_mask = x_arr >= x_max
455+
else:
456+
left_mask = x_arr <= x_min
457+
right_mask = x_arr > x_max
458+
400459
interior = ~left_mask & ~right_mask
401460

402461
result[left_mask] = 0.0
403462
result[right_mask] = 1.0
404463

405464
if np.any(interior):
406465
xi = x_arr[interior]
407-
idx = np.searchsorted(xs_table, xi, side="right") - 1
466+
if right_closed:
467+
idx = np.searchsorted(xs_table, xi, side="right") - 1
468+
else:
469+
idx = np.searchsorted(xs_table, xi, side="left") - 1
470+
idx = idx + 1
408471
idx = np.clip(idx, 0, cdf_table.size - 1)
409472
result[interior] = cdf_table[idx]
410473

@@ -423,7 +486,19 @@ def _build_ppf_to_cdf_1D() -> FitterDescriptor:
423486
target=CharacteristicName.CDF,
424487
sources=[CharacteristicName.PPF],
425488
fitter=_fit_ppf_to_cdf_1D,
426-
characteristic_options=(),
489+
characteristic_options=(
490+
CharacteristicOption(
491+
name="right_closed",
492+
type=bool,
493+
default=True,
494+
description=(
495+
"CDF convention. True (default): right-closed F(x) = P(ξ ≤ x). "
496+
"False: right-open F⁻(x) = P(ξ < x). "
497+
"The right-open form satisfies 1 - F⁻(x) = P(ξ ≥ x), "
498+
"which is needed when computing the CDF of -ξ."
499+
),
500+
),
501+
),
427502
computation_options=(
428503
ComputationOption(
429504
name="n_q_grid",
@@ -437,7 +512,11 @@ def _build_ppf_to_cdf_1D() -> FitterDescriptor:
437512
),
438513
),
439514
constraint_tags=frozenset({"discrete", "univariate"}),
440-
description="PPF -> CDF via grid probing and step-function table construction.",
515+
description=(
516+
"PPF -> CDF via grid probing and step-function table construction. "
517+
"Supports right-closed and right-open CDF conventions via the "
518+
"``right_closed`` characteristic option."
519+
),
441520
)
442521

443522

0 commit comments

Comments
 (0)