|
29 | 29 | #' for the generalized Pareto distribution. *Technometrics* **51**, 316-325. |
30 | 30 | #' |
31 | 31 | gpdfit <- function(x, wip = TRUE, min_grid_pts = 30, sort_x = TRUE) { |
32 | | - posterior::gpdfit( |
33 | | - x = x, |
34 | | - wip = wip, |
35 | | - min_grid_pts = min_grid_pts, |
36 | | - sort_x = sort_x |
37 | | - ) |
| 32 | + # See section 4 of Zhang and Stephens (2009) |
| 33 | + if (sort_x) { |
| 34 | + x <- sort.int(x) |
| 35 | + } |
| 36 | + N <- length(x) |
| 37 | + prior <- 3 |
| 38 | + M <- min_grid_pts + floor(sqrt(N)) |
| 39 | + jj <- seq_len(M) |
| 40 | + xstar <- x[floor(N / 4 + 0.5)] # first quartile of sample |
| 41 | + theta <- 1 / x[N] + (1 - sqrt(M / (jj - 0.5))) / prior / xstar |
| 42 | + l_theta <- N * lx(theta, x) # profile log-lik |
| 43 | + w_theta <- exp(l_theta - matrixStats::logSumExp(l_theta)) # normalize |
| 44 | + theta_hat <- sum(theta * w_theta) |
| 45 | + k <- mean.default(log1p(-theta_hat * x)) |
| 46 | + sigma <- -k / theta_hat |
| 47 | + |
| 48 | + if (wip) { |
| 49 | + k <- adjust_k_wip(k, n = N) |
| 50 | + } |
| 51 | + |
| 52 | + if (is.na(k)) { |
| 53 | + k <- Inf |
| 54 | + } |
| 55 | + |
| 56 | + nlist(k, sigma) |
| 57 | +} |
| 58 | + |
| 59 | + |
| 60 | +# internal ---------------------------------------------------------------- |
| 61 | + |
| 62 | +lx <- function(a,x) { |
| 63 | + a <- -a |
| 64 | + k <- vapply(a, FUN = function(a_i) mean(log1p(a_i * x)), FUN.VALUE = numeric(1)) |
| 65 | + log(a / k) - k - 1 |
| 66 | +} |
| 67 | + |
| 68 | +#' Adjust k based on weakly informative prior, Gaussian centered on 0.5. This |
| 69 | +#' will stabilize estimates for very small Monte Carlo sample sizes and low neff |
| 70 | +#' cases. |
| 71 | +#' |
| 72 | +#' @noRd |
| 73 | +#' @param k Scalar khat estimate. |
| 74 | +#' @param n Integer number of tail samples used to fit GPD. |
| 75 | +#' @return Scalar adjusted khat estimate. |
| 76 | +#' |
| 77 | +adjust_k_wip <- function(k, n) { |
| 78 | + a <- 10 |
| 79 | + n_plus_a <- n + a |
| 80 | + k * n / n_plus_a + a * 0.5 / n_plus_a |
| 81 | +} |
| 82 | + |
| 83 | + |
| 84 | +#' Inverse CDF of generalized Pareto distribution |
| 85 | +#' (assuming location parameter is 0) |
| 86 | +#' |
| 87 | +#' @noRd |
| 88 | +#' @param p Vector of probabilities. |
| 89 | +#' @param k Scalar shape parameter. |
| 90 | +#' @param sigma Scalar scale parameter. |
| 91 | +#' @return Vector of quantiles. |
| 92 | +#' |
| 93 | +qgpd <- function(p, k, sigma) { |
| 94 | + if (is.nan(sigma) || sigma <= 0) { |
| 95 | + return(rep(NaN, length(p))) |
| 96 | + } |
| 97 | + |
| 98 | + sigma * expm1(-k * log1p(-p)) / k |
38 | 99 | } |
0 commit comments