Skip to content

Commit bdc1726

Browse files
authored
Add PLD support for Quantiles (#607)
1 parent 568a623 commit bdc1726

5 files changed

Lines changed: 171 additions & 24 deletions

File tree

pipeline_dp/combiners.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -673,11 +673,13 @@ def compute_metrics(self, accumulator: AccumulatorType) -> AccumulatorType:
673673
tree = self._create_empty_quantile_tree()
674674
tree.merge(pydp._pydp.bytes_to_summary(accumulator))
675675

676-
quantiles = tree.compute_quantiles(
677-
self._params.eps, self._params.delta,
676+
quantiles = dp_computations.compute_dp_quantiles(
677+
tree,
678+
self._params.mechanism_spec,
679+
self._quantiles_to_compute,
678680
self._params.aggregate_params.max_partitions_contributed,
679681
self._params.aggregate_params.max_contributions_per_partition,
680-
self._quantiles_to_compute, self._noise_type())
682+
)
681683

682684
return dict([(name, value)
683685
for name, value in zip(self.metrics_names(), quantiles)])
@@ -707,14 +709,6 @@ def _create_empty_quantile_tree(self):
707709
self._params.aggregate_params.max_value, DEFAULT_TREE_HEIGHT,
708710
DEFAULT_BRANCHING_FACTOR)
709711

710-
def _noise_type(self) -> str:
711-
noise_kind = self._params.aggregate_params.noise_kind
712-
if noise_kind == pipeline_dp.NoiseKind.LAPLACE:
713-
return "laplace"
714-
if noise_kind == pipeline_dp.NoiseKind.GAUSSIAN:
715-
return "gaussian"
716-
assert False, f"{noise_kind} is not support by PyDP quantile tree."
717-
718712
def mechanism_spec(self) -> budget_accounting.MechanismSpec:
719713
return self._params.mechanism_spec
720714

pipeline_dp/dp_computations.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from pipeline_dp import budget_accounting
2626
from pipeline_dp import partition_selection
2727
from pydp.algorithms import numerical_mechanisms as dp_mechanisms
28+
from pydp.algorithms import quantile_tree
2829

2930

3031
@dataclass
@@ -883,3 +884,56 @@ def create_thresholding_mechanism(
883884
strategy=strategy,
884885
l0_sensitivity=sensitivities.l0,
885886
pre_threshold=pre_threshold)
887+
888+
889+
def compute_dp_quantiles(
890+
quantile_tree: quantile_tree.QuantileTree,
891+
mechanism_spec: budget_accounting.MechanismSpec,
892+
quantiles_to_compute: list[float],
893+
max_partitions_contributed: int,
894+
max_contributions_per_partition: int,
895+
) -> list[float]:
896+
"""Computes DP quantiles for the given quantile tree and quantiles to compute.
897+
898+
Args:
899+
quantile_tree: The quantile tree to compute quantiles for.
900+
mechanism_spec: The mechanism spec to use for the computation.
901+
quantiles_to_compute: The quantiles to compute.
902+
max_partitions_contributed: The maximum number of partitions contributed
903+
per privacy unit.
904+
max_contributions_per_partition: The maximum number of contributions per
905+
partition.
906+
907+
Returns:
908+
The computed DP quantiles, which are the same length as
909+
quantiles_to_compute.
910+
"""
911+
912+
noise_kind = mechanism_spec.mechanism_type.to_noise_kind()
913+
914+
if mechanism_spec.standard_deviation_is_set:
915+
# QuantileTree supports setting DP guarantees only via (epsilon, delta).
916+
# But in PLD accounting we find the standard deviation of the equivalent
917+
# Laplace or Gaussian mechanism. Let us convert the standard deviation
918+
# to (epsilon, delta) here such that the corresponding Laplace or Gaussian
919+
# mechanism (which is used internally by QuantileTree) has the same
920+
# standard deviation.
921+
sigma = mechanism_spec.noise_standard_deviation
922+
if noise_kind == aggregate_params.NoiseKind.LAPLACE:
923+
epsilon = np.sqrt(2) / sigma
924+
delta = 0.0
925+
else:
926+
epsilon = 1.0
927+
delta = gaussian_delta(sigma, epsilon)
928+
else:
929+
epsilon = mechanism_spec.eps
930+
delta = mechanism_spec.delta
931+
932+
return quantile_tree.compute_quantiles(
933+
epsilon=epsilon,
934+
delta=delta,
935+
max_partitions_contributed=max_partitions_contributed,
936+
max_contributions_per_partition=max_contributions_per_partition,
937+
quantiles=quantiles_to_compute,
938+
noise_type=noise_kind.value,
939+
)

pipeline_dp/dp_engine.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -556,15 +556,12 @@ def _check_budget_accountant_compatibility(self, is_public_partition: bool,
556556
budget_accounting.NaiveBudgetAccountant):
557557
# All aggregations support NaiveBudgetAccountant.
558558
return
559-
supported_metrics = [
560-
aggregate_params.Metrics.COUNT,
561-
aggregate_params.Metrics.PRIVACY_ID_COUNT,
562-
aggregate_params.Metrics.SUM, aggregate_params.Metrics.MEAN
563-
]
564-
non_supported_metrics = set(metrics) - set(supported_metrics)
565-
if non_supported_metrics:
566-
raise NotImplementedError(f"Metrics {non_supported_metrics} do not "
567-
f"support PLD budget accounting")
559+
if aggregate_params.Metrics.VECTOR_SUM in metrics:
560+
raise NotImplementedError(
561+
"Vector Sum is not supported by PLD budget accounting")
562+
if aggregate_params.Metrics.VARIANCE in metrics:
563+
raise NotImplementedError(
564+
"Variance is not supported by PLD budget accounting")
568565
if custom_combiner:
569566
raise ValueError(f"PLD budget accounting does not support custom "
570567
f"combiners")

tests/dp_computations_test.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1045,5 +1045,110 @@ def is_monotonic(self) -> bool:
10451045
self.assertSetEqual(chosen_values, {0, 1})
10461046

10471047

1048+
class ComputeDpQuantilesTest(parameterized.TestCase):
1049+
1050+
def test_compute_dp_quantiles_std_dev_set_laplace(self):
1051+
# Arrange
1052+
mock_tree = MagicMock()
1053+
mock_tree.compute_quantiles.return_value = [10.0, 20.0]
1054+
1055+
spec = budget_accounting.MechanismSpec(
1056+
aggregate_params.MechanismType.LAPLACE)
1057+
spec.set_noise_standard_deviation(2.0)
1058+
1059+
quantiles = [0.5, 0.9]
1060+
max_partitions = 5
1061+
max_contributions = 2
1062+
1063+
# Act
1064+
result = dp_computations.compute_dp_quantiles(
1065+
quantile_tree=mock_tree,
1066+
mechanism_spec=spec,
1067+
quantiles_to_compute=quantiles,
1068+
max_partitions_contributed=max_partitions,
1069+
max_contributions_per_partition=max_contributions,
1070+
)
1071+
1072+
# Assert
1073+
expected_eps = np.sqrt(2) / 2.0
1074+
mock_tree.compute_quantiles.assert_called_once_with(
1075+
epsilon=expected_eps,
1076+
delta=0.0,
1077+
max_partitions_contributed=max_partitions,
1078+
max_contributions_per_partition=max_contributions,
1079+
quantiles=quantiles,
1080+
noise_type="laplace",
1081+
)
1082+
self.assertEqual(result, [10.0, 20.0])
1083+
1084+
def test_compute_dp_quantiles_std_dev_set_gaussian(self):
1085+
# Arrange
1086+
mock_tree = MagicMock()
1087+
mock_tree.compute_quantiles.return_value = [10.0, 20.0]
1088+
1089+
spec = budget_accounting.MechanismSpec(
1090+
aggregate_params.MechanismType.GAUSSIAN)
1091+
spec.set_noise_standard_deviation(2.0)
1092+
1093+
quantiles = [0.5, 0.9]
1094+
max_partitions = 5
1095+
max_contributions = 2
1096+
1097+
# Act
1098+
result = dp_computations.compute_dp_quantiles(
1099+
quantile_tree=mock_tree,
1100+
mechanism_spec=spec,
1101+
quantiles_to_compute=quantiles,
1102+
max_partitions_contributed=max_partitions,
1103+
max_contributions_per_partition=max_contributions,
1104+
)
1105+
1106+
# Assert
1107+
expected_eps = 1.0
1108+
expected_delta = dp_computations.gaussian_delta(2.0, 1.0)
1109+
mock_tree.compute_quantiles.assert_called_once_with(
1110+
epsilon=expected_eps,
1111+
delta=expected_delta,
1112+
max_partitions_contributed=max_partitions,
1113+
max_contributions_per_partition=max_contributions,
1114+
quantiles=quantiles,
1115+
noise_type="gaussian",
1116+
)
1117+
self.assertEqual(result, [10.0, 20.0])
1118+
1119+
def test_compute_dp_quantiles_std_dev_not_set_gaussian(self):
1120+
# Arrange
1121+
mock_tree = MagicMock()
1122+
mock_tree.compute_quantiles.return_value = [10.0, 20.0]
1123+
1124+
spec = budget_accounting.MechanismSpec(
1125+
aggregate_params.MechanismType.GAUSSIAN)
1126+
spec.set_eps_delta(1.0, 1e-5)
1127+
1128+
quantiles = [0.5, 0.9]
1129+
max_partitions = 5
1130+
max_contributions = 2
1131+
1132+
# Act
1133+
result = dp_computations.compute_dp_quantiles(
1134+
quantile_tree=mock_tree,
1135+
mechanism_spec=spec,
1136+
quantiles_to_compute=quantiles,
1137+
max_partitions_contributed=max_partitions,
1138+
max_contributions_per_partition=max_contributions,
1139+
)
1140+
1141+
# Assert
1142+
mock_tree.compute_quantiles.assert_called_once_with(
1143+
epsilon=1.0,
1144+
delta=1e-5,
1145+
max_partitions_contributed=max_partitions,
1146+
max_contributions_per_partition=max_contributions,
1147+
quantiles=quantiles,
1148+
noise_type="gaussian",
1149+
)
1150+
self.assertEqual(result, [10.0, 20.0])
1151+
1152+
10481153
if __name__ == '__main__':
10491154
absltest.main()

tests/dp_engine_test.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1307,13 +1307,10 @@ def test_min_max_sum_per_partition(self):
13071307
self.assertLen(output, 1)
13081308
self.assertAlmostEqual(output[0][1].sum, -3, delta=0.1)
13091309

1310-
@unittest.skipIf(
1311-
sys.version_info.major == 3 and sys.version_info.minor <= 8,
1312-
"dp_accounting library only support python >=3.9")
13131310
def test_pld_not_supported_metrics(self):
13141311
with self.assertRaisesRegex(
13151312
NotImplementedError,
1316-
"Metrics {VARIANCE} do not support PLD budget accounting"):
1313+
"Variance is not supported by PLD budget accounting"):
13171314
budget_accountant = pipeline_dp.PLDBudgetAccountant(
13181315
total_epsilon=1, total_delta=1e-10)
13191316
engine = pipeline_dp.DPEngine(

0 commit comments

Comments
 (0)