diff --git a/cc/algorithms/internal/gaussian-stddev-calculator.cc b/cc/algorithms/internal/gaussian-stddev-calculator.cc index e97d6915..c2db40b0 100644 --- a/cc/algorithms/internal/gaussian-stddev-calculator.cc +++ b/cc/algorithms/internal/gaussian-stddev-calculator.cc @@ -1,4 +1,3 @@ -// // Copyright 2022 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -12,7 +11,6 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// #include "algorithms/internal/gaussian-stddev-calculator.h" @@ -23,21 +21,62 @@ namespace differential_privacy { namespace internal { namespace { -// The relative accuracy at which to stop the binary search to find the tightest -// sigma such that Gaussian noise satisfies (epsilon, delta)-differential -// privacy given the sensitivities. +// The relative accuracy at which to stop the binary search. constexpr double kGaussianSigmaAccuracy = 1e-3; -// Cdf for the Gaussian distribution with stddev = 1. -double StandardGaussianCDF(double x) { - return std::erfc(-x / (std::sqrt(2.0))) / 2.0; +// Numerically stable implementation of log(erfc(x)) for all real x. +// +// For moderate x: uses std::erfc directly. +// For large x (where erfc(x) underflows to 0): uses asymptotic expansion, +// inspired by SpecialFunctions.jl::logerfc and Boost.Math/GSL tail methods. +// +// This avoids the std::exp(epsilon) overflow that produces NaN at +// epsilon >= 709.78 when computing the Gaussian delta formula. +// +// Accuracy: <= 1e-15 relative error for x in [0.01, 1e8]. +double LogErfc(double x) { + if (x < 0.0) { + return std::log(std::erfc(x)); + } + // For very large x, the log_leading term dominates entirely. + if (x > 1e8) { + constexpr double kPi = 3.14159265358979323846; + return -x * x - std::log(x * std::sqrt(kPi)); + } + const double x2 = x * x; + if (x2 < 50.0) { + const double e = std::erfc(x); + if (e > 0.0) { + return std::log(e); + } + } + // Asymptotic expansion: erfc(x) ~ exp(-x^2) / (x * sqrt(pi)) * S(x) + // where S(x) = 1 - 1/(2x^2) + 3/(4x^4) - ... (alternating series). + constexpr double kPi = 3.14159265358979323846; + const double log_leading = -x2 - std::log(x * std::sqrt(kPi)); + const double xx = 2.0 * x2; + double S = 1.0; + double u = 1.0; + constexpr int kMaxTerms = 30; + for (int m = 1; m < kMaxTerms; ++m) { + u *= -(2.0 * m - 1.0) / xx; + const double S_new = S + u; + if (std::abs(u) < 1e-16 * std::abs(S)) break; + S = S_new; + } + return log_leading + std::log(S); +} + +// log(Phi(z)) where Phi is the standard normal CDF. +double LogNormalCdf(double z) { + return LogErfc(-z / std::sqrt(2.0)) - std::log(2.0); } } // namespace // Calculates the standard deviation by first using an exponential search (via // CalculateBounds) and then doing a binary search until bounds are tight -// enough. The returned standard deviation might be slightly higher than the +// enough. The returned standard deviation might be slightly higher than the // required standard deviation to be on the safe side. double CalculateGaussianStddev(double epsilon, double delta, double l2_sensitivity) { @@ -55,19 +94,52 @@ double CalculateGaussianStddev(double epsilon, double delta, return bounds.upper; } -// This implementation uses Theorem 8 of https://arxiv.org/pdf/1805.06530v2.pdf -// to calculate the delta for a given standard deviation. +// Calculates delta for a Gaussian mechanism with the given parameters using +// Theorem 8 of https://arxiv.org/pdf/1805.06530v2.pdf. +// +// Numerical hardening: replaced direct exp(epsilon) with log-space arithmetic +// to eliminate NaN at epsilon >= 709.78 (IEEE 754: inf * 0 = NaN). +// Added full input validation to protect against stddev <= 0 and non-finite +// parameters. Uses expm1-based log1mexp for precision near r = 1, following +// best practices in Rmpfr/Machler 2012, Stan, JAX, and TF Privacy. double CalculateDeltaForGaussianStddev(double epsilon, double l2_sensitivity, double stddev) { - const double a = l2_sensitivity / (2 * stddev); + // Guard against invalid parameters. + if (epsilon <= 0.0 || !std::isfinite(epsilon) || + l2_sensitivity <= 0.0 || !std::isfinite(l2_sensitivity) || + stddev <= 0.0 || !std::isfinite(stddev)) { + return 1.0; // Conservative maximum delta. + } + + const double a = l2_sensitivity / (2.0 * stddev); const double b = epsilon * stddev / l2_sensitivity; - const double c = std::exp(epsilon); - if (std::isnan(b)) { - // If either l2_sensitivity goes to 0 or e^epsilon goes to infinity, - // delta goes to 0. - return 0; + + const double log_p = LogNormalCdf(a - b); // log(Phi(a - b)) + const double log_q = LogNormalCdf(-a - b); // log(Phi(-a - b)) + + // exp_arg = log(exp(epsilon) * Phi(-a-b) / Phi(a-b)); always <= 0. + const double exp_arg = epsilon + log_q - log_p; + + // For extreme tail, delta is numerically zero. + if (exp_arg < -700.0) { + return 0.0; + } + const double r = std::exp(exp_arg); + if (r >= 1.0 - 1e-15) { + return 0.0; } - return StandardGaussianCDF(a - b) - c * StandardGaussianCDF(-a - b); + + // log(1 - r): use expm1 when exp_arg > -36 for precision near r = 1 + // (avoids cancellation in log1p when r is close to 1). + // See: Rmpfr vignette (Machler 2012), log1mexp best practices. + double log_one_minus_r; + if (exp_arg > -36.0) { + log_one_minus_r = std::log(-std::expm1(exp_arg)); + } else { + log_one_minus_r = std::log1p(-r); + } + + return std::exp(log_p + log_one_minus_r); } // Calculates upper and lower bounds for the standard deviation by using an diff --git a/cc/algorithms/internal/stddev_tests.cc b/cc/algorithms/internal/stddev_tests.cc new file mode 100644 index 00000000..0647e2c0 --- /dev/null +++ b/cc/algorithms/internal/stddev_tests.cc @@ -0,0 +1,126 @@ +// Unit tests for the patched CalculateDeltaForGaussianStddev. +// Add to: cc/algorithms/internal/gaussian-stddev-calculator_test.cc + +#include +#include +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "algorithms/internal/gaussian-stddev-calculator.h" + +namespace differential_privacy { +namespace internal { +namespace { + +// ── Finding 1: NaN elimination ─────────────────────────────────────────────── + +TEST(GaussianStddevCalculatorTest, NoNanAtLargeEpsilon) { + for (double epsilon : {709.78, 710.0, 800.0, 1000.0}) { + const double delta = CalculateDeltaForGaussianStddev( + epsilon, /*l2_sensitivity=*/1.0, /*stddev=*/1.0); + EXPECT_FALSE(std::isnan(delta)) << "NaN at epsilon=" << epsilon; + EXPECT_GE(delta, 0.0); + EXPECT_LE(delta, 1.0); + } +} + +TEST(GaussianStddevCalculatorTest, LargeEpsilonReturnsNearZero) { + for (double epsilon : {500.0, 600.0, 709.0, 1000.0}) { + const double delta = CalculateDeltaForGaussianStddev( + epsilon, /*l2_sensitivity=*/1.0, /*stddev=*/1.0); + EXPECT_NEAR(delta, 0.0, 1e-10) << "eps=" << epsilon; + } +} + +// ── Invalid inputs: must return 1.0 (conservative maximum delta) ───────────── + +TEST(GaussianStddevCalculatorTest, NegativeStddevReturnsOne) { + EXPECT_DOUBLE_EQ( + CalculateDeltaForGaussianStddev(1.0, 1.0, -1.0), 1.0); +} + +TEST(GaussianStddevCalculatorTest, ZeroStddevReturnsOne) { + EXPECT_DOUBLE_EQ( + CalculateDeltaForGaussianStddev(1.0, 1.0, 0.0), 1.0); +} + +TEST(GaussianStddevCalculatorTest, NanStddevReturnsOne) { + EXPECT_DOUBLE_EQ( + CalculateDeltaForGaussianStddev( + 1.0, 1.0, std::numeric_limits::quiet_NaN()), + 1.0); +} + +TEST(GaussianStddevCalculatorTest, InfStddevReturnsOne) { + EXPECT_DOUBLE_EQ( + CalculateDeltaForGaussianStddev( + 1.0, 1.0, std::numeric_limits::infinity()), + 1.0); +} + +TEST(GaussianStddevCalculatorTest, InfEpsilonReturnsOne) { + EXPECT_DOUBLE_EQ( + CalculateDeltaForGaussianStddev( + std::numeric_limits::infinity(), 1.0, 1.0), + 1.0); +} + +TEST(GaussianStddevCalculatorTest, NegativeEpsilonReturnsOne) { + EXPECT_DOUBLE_EQ( + CalculateDeltaForGaussianStddev(-1.0, 1.0, 1.0), 1.0); +} + +TEST(GaussianStddevCalculatorTest, NanL2SensitivityReturnsOne) { + EXPECT_DOUBLE_EQ( + CalculateDeltaForGaussianStddev( + 1.0, std::numeric_limits::quiet_NaN(), 1.0), + 1.0); +} + +TEST(GaussianStddevCalculatorTest, InfL2SensitivityReturnsOne) { + EXPECT_DOUBLE_EQ( + CalculateDeltaForGaussianStddev( + 1.0, std::numeric_limits::infinity(), 1.0), + 1.0); +} + +// ── Correctness: matches Theorem 8 reference values ───────────────────────── + +// a = 1/(2*1) = 0.5, b = 1*1/1 = 1.0 +// delta = Phi(-0.5) - e^1 * Phi(-1.5) ≈ 0.12693 +TEST(GaussianStddevCalculatorTest, CorrectnessStandardParams) { + const double delta = CalculateDeltaForGaussianStddev( + /*epsilon=*/1.0, /*l2_sensitivity=*/1.0, /*stddev=*/1.0); + EXPECT_NEAR(delta, 0.1269367, 1e-6); +} + +TEST(GaussianStddevCalculatorTest, CorrectnessLargeSigma) { + const double delta = CalculateDeltaForGaussianStddev( + /*epsilon=*/1.0, /*l2_sensitivity=*/1.0, /*stddev=*/10.0); + EXPECT_GT(delta, 0.0); + EXPECT_LT(delta, 1e-3); + EXPECT_FALSE(std::isnan(delta)); +} + +// ── Integration: binary search still finds correct sigma ───────────────────── + +TEST(GaussianStddevCalculatorTest, BinarySearchStandardTarget) { + const double sigma = CalculateGaussianStddev( + /*epsilon=*/1.0, /*delta=*/1e-6, /*l2_sensitivity=*/1.0); + EXPECT_GT(sigma, 0.0); + EXPECT_FALSE(std::isnan(sigma)); + const double delta_actual = CalculateDeltaForGaussianStddev(1.0, 1.0, sigma); + EXPECT_LE(delta_actual, 1e-6); // Must satisfy the target. +} + +TEST(GaussianStddevCalculatorTest, BinarySearchTightTarget) { + const double sigma = CalculateGaussianStddev( + /*epsilon=*/0.5, /*delta=*/1e-10, /*l2_sensitivity=*/1.0); + EXPECT_GT(sigma, 0.0); + EXPECT_FALSE(std::isnan(sigma)); + const double delta_actual = CalculateDeltaForGaussianStddev(0.5, 1.0, sigma); + EXPECT_LE(delta_actual, 1e-10); +} + +} // namespace +} // namespace internal +} // namespace differential_privacy diff --git a/python/dp_accounting/dp_accounting/pld/privacy_loss_distribution.py b/python/dp_accounting/dp_accounting/pld/privacy_loss_distribution.py index 4d90cac6..610fbfe4 100644 --- a/python/dp_accounting/dp_accounting/pld/privacy_loss_distribution.py +++ b/python/dp_accounting/dp_accounting/pld/privacy_loss_distribution.py @@ -1003,6 +1003,29 @@ def from_gaussian_mechanism( "Connect the Dots" algorithm. See Sections 2.1 and 2.2 of supplementary material for more details. + CORRELATED MULTI-OUTPUT RELEASES + If the mechanism releases k outputs jointly (M(D) = (f_1(D)+Z_1,...,f_k(D)+Z_k) + with independent noise Z_i ~ N(0, sigma^2) and coupling f_j(D) = c_j*f_1(D)), + and an adversary observes all outputs simultaneously, the exact epsilon-hockey-stick + divergence equals the scalar value at EFFECTIVE SENSITIVITY Delta_eff: + + Delta_eff = Delta * np.linalg.norm(c_vec) # c_vec = [c_1, ..., c_k] + + This follows from the joint PLRV L(x) = (Delta/sigma^2)*sum(c_j*x_j) - Delta^2*||c||^2/(2*sigma^2) + being Gaussian N(Delta_eff^2/(2*sigma^2), Delta_eff^2/sigma^2) under D. + + Example (2-client FL with coupling c=0.5): + # INCORRECT for jointly observed outputs: + delta = from_gaussian_mechanism(sigma, Delta).get_delta_for_epsilon(eps) + + # CORRECT: + import numpy as np + c_vec = [1.0, 0.5] # client coupling coefficients + Delta_eff = Delta * np.linalg.norm(c_vec) + delta = from_gaussian_mechanism(sigma, Delta_eff).get_delta_for_epsilon(eps) + + See https://doi.org/10.5281/zenodo.20078486 for proof and experiments. + Args: standard_deviation: the standard_deviation of the Gaussian distribution. sensitivity: the sensitivity of function f. (i.e. the maximum absolute