Skip to content

Commit 2d953bf

Browse files
authored
Merge pull request #3283 from lingium/feature/issue-3238-yule-simon-cdf
add yule_simon_cdf
2 parents 4135fc9 + 13125df commit 2d953bf

2 files changed

Lines changed: 168 additions & 0 deletions

File tree

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
#ifndef STAN_MATH_PRIM_PROB_YULE_SIMON_CDF_HPP
2+
#define STAN_MATH_PRIM_PROB_YULE_SIMON_CDF_HPP
3+
4+
#include <stan/math/prim/meta.hpp>
5+
#include <stan/math/prim/err.hpp>
6+
#include <stan/math/prim/fun/constants.hpp>
7+
#include <stan/math/prim/fun/digamma.hpp>
8+
#include <stan/math/prim/fun/beta.hpp>
9+
#include <stan/math/prim/fun/lgamma.hpp>
10+
#include <stan/math/prim/fun/max_size.hpp>
11+
#include <stan/math/prim/fun/scalar_seq_view.hpp>
12+
#include <stan/math/prim/fun/size.hpp>
13+
#include <stan/math/prim/fun/size_zero.hpp>
14+
#include <stan/math/prim/fun/value_of.hpp>
15+
#include <stan/math/prim/functor/partials_propagator.hpp>
16+
17+
namespace stan {
18+
namespace math {
19+
20+
/** \ingroup prob_dists
21+
* Returns the CDF of the Yule-Simon distribution with shape parameter.
22+
* Given containers of matching sizes, returns the product of probabilities.
23+
*
24+
* @tparam T_n type of outcome parameter
25+
* @tparam T_alpha type of shape parameter
26+
*
27+
* @param n outcome variable
28+
* @param alpha shape parameter
29+
* @return probability or product of probabilities
30+
* @throw std::domain_error if alpha fails to be positive
31+
* @throw std::invalid_argument if container sizes mismatch
32+
*/
33+
template <typename T_n, typename T_alpha>
34+
inline return_type_t<T_alpha> yule_simon_cdf(const T_n& n,
35+
const T_alpha& alpha) {
36+
using T_partials_return = partials_return_t<T_n, T_alpha>;
37+
using T_n_ref = ref_type_t<T_n>;
38+
using T_alpha_ref = ref_type_t<T_alpha>;
39+
static constexpr const char* function = "yule_simon_cdf";
40+
41+
check_consistent_sizes(function, "Outcome variable", n, "Shape parameter",
42+
alpha);
43+
if (size_zero(n, alpha)) {
44+
return 1.0;
45+
}
46+
47+
T_n_ref n_ref = n;
48+
T_alpha_ref alpha_ref = alpha;
49+
check_positive_finite(function, "Shape parameter", alpha_ref);
50+
51+
scalar_seq_view<T_n> n_vec(n);
52+
scalar_seq_view<T_alpha_ref> alpha_vec(alpha_ref);
53+
const size_t max_size_seq_view = max_size(n_ref, alpha_ref);
54+
55+
// Explicit return for invalid or extreme values
56+
// The gradients are technically ill-defined, but treated as zero
57+
for (int i = 0; i < stan::math::size(n); i++) {
58+
if (n_vec.val(i) < 1.0) {
59+
return 0.0;
60+
}
61+
if (n_vec.val(i) == std::numeric_limits<int>::max()) {
62+
return 1.0;
63+
}
64+
}
65+
66+
T_partials_return cdf(1.0);
67+
auto ops_partials = make_partials_propagator(alpha_ref);
68+
for (size_t i = 0; i < max_size_seq_view; i++) {
69+
auto np1 = n_vec.val(i) + 1.0;
70+
auto ap1 = alpha_vec.val(i) + 1.0;
71+
auto nap1 = n_vec.val(i) + ap1;
72+
73+
auto ccdf = stan::math::exp(lgamma(ap1) + lgamma(np1) - lgamma(nap1));
74+
cdf *= 1.0 - ccdf;
75+
76+
if constexpr (is_autodiff_v<T_alpha>) {
77+
auto chain_rule_term = -ccdf / (1.0 - ccdf);
78+
partials<0>(ops_partials)[i]
79+
+= (digamma(ap1) - digamma(nap1)) * chain_rule_term;
80+
}
81+
}
82+
83+
if constexpr (is_autodiff_v<T_alpha>) {
84+
for (size_t i = 0; i < stan::math::size(alpha); ++i) {
85+
partials<0>(ops_partials)[i] *= cdf;
86+
}
87+
}
88+
89+
return ops_partials.build(cdf);
90+
}
91+
92+
} // namespace math
93+
} // namespace stan
94+
#endif
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
// Arguments: Ints, Doubles
2+
#include <stan/math/prim/prob/yule_simon_cdf.hpp>
3+
#include <stan/math/prim/fun/lgamma.hpp>
4+
5+
using std::numeric_limits;
6+
using std::vector;
7+
8+
class AgradCdfYuleSimon : public AgradCdfTest {
9+
public:
10+
void valid_values(vector<vector<double>>& parameters, vector<double>& cdf) {
11+
vector<double> param(2);
12+
13+
param[0] = 5; // n
14+
param[1] = 20.0; // alpha
15+
parameters.push_back(param);
16+
cdf.push_back(0.9999811782420478); // expected cdf
17+
18+
param[0] = 10; // n
19+
param[1] = 5.5; // alpha
20+
parameters.push_back(param);
21+
cdf.push_back(0.9997987132162779); // expected cdf
22+
23+
param[0] = 1; // n
24+
param[1] = 0.1; // alpha
25+
parameters.push_back(param);
26+
cdf.push_back(0.0909090909090918); // expected cdf
27+
28+
param[0] = 1; // n
29+
param[1] = 0.01; // alpha
30+
parameters.push_back(param);
31+
cdf.push_back(0.0099009900990106); // expected cdf
32+
}
33+
34+
void invalid_values(vector<size_t>& index, vector<double>& value) {
35+
// n
36+
37+
// alpha
38+
index.push_back(1U);
39+
value.push_back(0.0);
40+
41+
index.push_back(1U);
42+
value.push_back(-1.0);
43+
44+
index.push_back(1U);
45+
value.push_back(std::numeric_limits<double>::infinity());
46+
}
47+
48+
// BOUND INCLUDED IN ORDER FOR TEST TO PASS WITH CURRENT FRAMEWORK
49+
bool has_lower_bound() { return false; }
50+
51+
bool has_upper_bound() { return false; }
52+
53+
template <class T_n, class T_alpha, typename T2, typename T3, typename T4,
54+
typename T5>
55+
stan::return_type_t<T_n, T_alpha> cdf(const T_n& n, const T_alpha& alpha,
56+
const T2&, const T3&, const T4&,
57+
const T5&) {
58+
return stan::math::yule_simon_cdf(n, alpha);
59+
}
60+
61+
template <class T_n, class T_alpha, typename T2, typename T3, typename T4,
62+
typename T5>
63+
stan::return_type_t<T_n, T_alpha> cdf_function(const T_n& n,
64+
const T_alpha& alpha,
65+
const T2&, const T3&,
66+
const T4&, const T5&) {
67+
using stan::math::lgamma;
68+
69+
auto log_ccdf
70+
= lgamma(alpha + 1.0) + lgamma(n + 1.0) - lgamma(n + alpha + 1.0);
71+
72+
return 1.0 - stan::math::exp(log_ccdf);
73+
}
74+
};

0 commit comments

Comments
 (0)