Skip to content

Commit 78d08ac

Browse files
miracvbasarancopybara-github
authored andcommitted
n/a
PiperOrigin-RevId: 897660323
1 parent 9ebc395 commit 78d08ac

10 files changed

Lines changed: 429 additions & 32 deletions

File tree

go/checks/checks.go

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,56 @@ func CheckMaxContributionsPerPartition(maxContributionsPerPartition int64) error
254254
return nil
255255
}
256256

257+
// CheckContributionBoundingOptions returns an error unless exactly one of MaxContributions and MaxPartitionsContributed is set.
258+
func CheckContributionBoundingOptions(maxContributions, maxPartitionsContributed int64) error {
259+
if maxContributions > 0 && maxPartitionsContributed > 0 {
260+
return fmt.Errorf("MaxContributions and MaxPartitionsContributed are both set, exactly one should be set")
261+
}
262+
if maxContributions <= 0 && maxPartitionsContributed <= 0 {
263+
return fmt.Errorf("MaxContributions and MaxPartitionsContributed are both unset, exactly one should be set")
264+
}
265+
return nil
266+
}
267+
268+
// CheckContributionBoundingWithMaxValue checks that either MaxContributions or {MaxValue, MaxPartitionsContributed} is set, but not both.
269+
// If MaxContributions is set, {MaxValue, MaxPartitionsContributed} must be 0.
270+
// If {MaxValue, MaxPartitionsContributed} are set, MaxContributions must be 0.
271+
func CheckContributionBoundingOptionsWithMaxValue(maxContributions, maxPartitionsContributed, maxValue int64) error {
272+
if maxContributions < 0 {
273+
return fmt.Errorf("MaxContributions must be non-negative, was %d instead", maxContributions)
274+
}
275+
if maxValue < 0 {
276+
return fmt.Errorf("MaxValue must be non-negative, was %d instead", maxValue)
277+
}
278+
if maxPartitionsContributed < 0 {
279+
return fmt.Errorf("MaxPartitionsContributed must be non-negative, was %d instead", maxPartitionsContributed)
280+
}
281+
282+
maxContributionsSet := maxContributions > 0
283+
maxValueSet := maxValue > 0
284+
maxPartitionsContributedSet := maxPartitionsContributed > 0
285+
286+
if maxContributionsSet {
287+
// MaxContributions configuration must be used
288+
if maxValueSet || maxPartitionsContributedSet {
289+
return fmt.Errorf("when MaxContributions is set, MaxValue and MaxPartitionsContributed must be 0")
290+
}
291+
return nil
292+
}
293+
// MaxValue/MaxPartitionsContributed configuration must be used
294+
if !maxValueSet && !maxPartitionsContributedSet {
295+
return fmt.Errorf("when MaxContributions is not set, both MaxValue and MaxPartitionsContributed must be set to a positive value")
296+
}
297+
if !maxValueSet {
298+
return fmt.Errorf("MaxValue must be set to a positive value, was %d instead", maxValue)
299+
}
300+
if !maxPartitionsContributedSet {
301+
return fmt.Errorf("MaxPartitionsContributed must be set to a positive value, was %d instead", maxPartitionsContributed)
302+
}
303+
return nil
304+
}
305+
306+
257307
// CheckAlpha returns an error if the supplied alpha is not between 0 and 1.
258308
func CheckAlpha(alpha float64) error {
259309
if alpha <= 0 || alpha >= 1 || math.IsNaN(alpha) || math.IsInf(alpha, 0) {

go/dpagg/select_partition.go

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -129,9 +129,18 @@ type PreAggSelectPartitionOptions struct {
129129
// for more information.
130130
// Optional.
131131
PreThreshold int64
132-
// MaxPartitionsContributed is the number of distinct partitions a single
133-
// privacy unit can contribute to. Required.
132+
// MaxPartitionsContributed is the number of distinct partitions a single privacy unit can contribute to.
133+
// For PreAggSelectionPartition, setting MaxPartitionsContributed is functionally the same as
134+
// setting MaxContributions, but are kept separate for consistency with other aggregations.
135+
//
136+
// Mutually exclusive with MaxContributions. One of the two options is required.
134137
MaxPartitionsContributed int64
138+
// MaxContributions is the number of distinct contributions a single
139+
// privacy unit can make. For PreAggSelectionPartition, setting MaxContributions is functionally
140+
// the same as setting MaxPartitionsContributed, but are kept separate for consistency with other aggregations.
141+
//
142+
// Mutually exclusive with MaxPartitionsContributed. One of the two options is required.
143+
MaxContributions int64
135144
}
136145

137146
// NewPreAggSelectPartition constructs a new PreAggSelectPartition from opt.
@@ -148,11 +157,18 @@ func NewPreAggSelectPartition(opt *PreAggSelectPartitionOptions) (*PreAggSelectP
148157
opt.PreThreshold = 1
149158
}
150159

160+
if err := checks.CheckContributionBoundingOptions(opt.MaxContributions, opt.MaxPartitionsContributed); err != nil {
161+
return nil, fmt.Errorf("NewPreAggSelectPartition: %v", err)
162+
}
163+
l0Sensitivity := opt.MaxPartitionsContributed
164+
if opt.MaxContributions > 0 {
165+
l0Sensitivity = opt.MaxContributions
166+
}
151167
s := PreAggSelectPartition{
152168
epsilon: opt.Epsilon,
153169
delta: opt.Delta,
154170
preThreshold: opt.PreThreshold,
155-
l0Sensitivity: opt.MaxPartitionsContributed,
171+
l0Sensitivity: l0Sensitivity,
156172
}
157173

158174
if err := checks.CheckDeltaStrict(s.delta); err != nil {

go/dpagg/select_partition_test.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,22 @@ func TestNewPreAggSelectPartition(t *testing.T) {
3131
want *PreAggSelectPartition
3232
wantErr bool
3333
}{
34-
{"MaxPartitionsContributed is not set",
34+
{"MaxPartitionsContributed and MaxContributions are not set",
3535
&PreAggSelectPartitionOptions{
3636
Epsilon: ln3,
3737
Delta: tenten,
3838
},
3939
nil,
4040
true},
41+
{"MaxPartitionsContributed and MaxContributions are set at same time",
42+
&PreAggSelectPartitionOptions{
43+
Epsilon: ln3,
44+
Delta: tenten,
45+
MaxPartitionsContributed: 1,
46+
MaxContributions: 1,
47+
},
48+
nil,
49+
true},
4150
{"Epsilon is not set",
4251
&PreAggSelectPartitionOptions{
4352
Delta: tenten,

go/dpagg/sum.go

Lines changed: 47 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -74,27 +74,41 @@ func bsEquallyInitializedint64(s1, s2 *BoundedSumInt64) bool {
7474

7575
// BoundedSumInt64Options contains the options necessary to initialize a BoundedSumInt64.
7676
type BoundedSumInt64Options struct {
77-
Epsilon float64 // Privacy parameter ε. Required.
78-
Delta float64 // Privacy parameter δ. Required with Gaussian noise, must be 0 with Laplace noise.
79-
MaxPartitionsContributed int64 // How many distinct partitions may a single privacy unit contribute to? Required.
80-
// Lower and Upper bounds for clamping. Required; must be such that Lower <= Upper.
77+
Epsilon float64 // Privacy parameter ε. Required.
78+
Delta float64 // Privacy parameter δ. Required with Gaussian noise, must be 0 with Laplace noise.
79+
// How many distinct partitions may a single privacy unit contribute to?
80+
// Mutually exclusive with MaxContributions. Required to be specified along with Lower and Upper when MaxContributions is not set.
81+
MaxPartitionsContributed int64
82+
// Lower and Upper bounds for clamping. Must be such that Lower <= Upper.
83+
// Mutually exclusive with MaxContributions. Required to be specified along with MaxPartitionsContributed when MaxContributions is not set.
8184
Lower, Upper int64
8285
Noise noise.Noise // Type of noise used in BoundedSum. Defaults to Laplace noise.
8386
// How many times may a single privacy unit contribute to a single partition?
8487
// Defaults to 1. This is only needed for other aggregation functions using BoundedSum;
8588
// which is why the option is not exported.
89+
//
90+
// maxContributionsPerPartition is mutually exclusive with MaxContributions. This option has no effect if MaxContributions is set.
8691
maxContributionsPerPartition int64
92+
// How many times may a single privacy unit contribute in total to all partitions?
93+
// Currently only used for Count aggregation function.
94+
//
95+
// Mutually exclusive with set of {MaxPartitionsContributed, Lower, Upper}. Required when {MaxPartitionsContributed, Lower, Upper} are not set.
96+
MaxContributions int64
8797
}
8898

8999
// NewBoundedSumInt64 returns a new BoundedSumInt64, whose sum is initialized at 0.
90100
func NewBoundedSumInt64(opt *BoundedSumInt64Options) (*BoundedSumInt64, error) {
91101
if opt == nil {
92102
opt = &BoundedSumInt64Options{} // Prevents panicking due to a nil pointer dereference.
93103
}
104+
err := checks.CheckContributionBoundingOptions(opt.MaxContributions, opt.MaxPartitionsContributed)
105+
if err != nil {
106+
return nil, fmt.Errorf("NewBoundedSumInt64: %w", err)
107+
}
94108

95-
l0 := opt.MaxPartitionsContributed
96-
if l0 == 0 {
97-
return nil, fmt.Errorf("NewBoundedSumInt64: MaxPartitionsContributed must be set")
109+
l0, err := getL0Int(opt.MaxContributions, opt.MaxPartitionsContributed)
110+
if err != nil {
111+
return nil, fmt.Errorf("NewBoundedSumInt64: %w", err)
98112
}
99113

100114
maxContributionsPerPartition := opt.maxContributionsPerPartition
@@ -108,10 +122,9 @@ func NewBoundedSumInt64(opt *BoundedSumInt64Options) (*BoundedSumInt64, error) {
108122
}
109123
// Check bounds & use them to compute L_∞ sensitivity
110124
lower, upper := opt.Lower, opt.Upper
111-
if lower == 0 && upper == 0 {
112-
return nil, fmt.Errorf("NewBoundedSumInt64: Lower and Upper must be set (automatic bounds determination is not implemented yet). Lower and Upper cannot be both 0")
125+
if opt.MaxPartitionsContributed > 0 && lower == 0 && upper == 0 {
126+
return nil, fmt.Errorf("NewBoundedSumInt64: When using MaxPartitionsContributed, Lower and Upper must be set (automatic bounds determination is not implemented yet). Lower and Upper cannot be both 0")
113127
}
114-
var err error
115128
switch noise.ToKind(opt.Noise) {
116129
case noise.Unrecognised:
117130
err = checks.CheckBoundsInt64IgnoreOverflows(lower, upper)
@@ -121,7 +134,20 @@ func NewBoundedSumInt64(opt *BoundedSumInt64Options) (*BoundedSumInt64, error) {
121134
if err != nil {
122135
return nil, fmt.Errorf("NewBoundedSumInt64: %w", err)
123136
}
124-
lInf, err := getLInfInt(lower, upper, maxContributionsPerPartition)
137+
138+
var lInf int64
139+
if opt.MaxContributions > 0 {
140+
// When using MaxContributions, we set l0=1 and lInf=MaxContributions to represent
141+
// the L1 sensitivity (MaxContributions) in the form expected by the noise layer (l0 * lInf).
142+
// This does not hold for privacy-ID count aggregations, where l0=MaxContributions and lInf=1.
143+
lInf = opt.MaxContributions
144+
// When using MaxContributions, no per-partition contribution bounding is performed.
145+
// upper is set to a default of match.MaxInt64 because it is being used in the Add function below,
146+
// making it a no-op.
147+
upper = math.MaxInt64
148+
} else {
149+
lInf, err = getLInfInt(lower, upper, maxContributionsPerPartition)
150+
}
125151
if err != nil {
126152
if noise.ToKind(opt.Noise) == noise.Unrecognised {
127153
// Ignore sensitivity overflows if noise is not recognised.
@@ -160,6 +186,16 @@ func lInfIntOverflows(bound, maxContributionsPerPartition int64) bool {
160186
return mult/maxContributionsPerPartition != bound
161187
}
162188

189+
func getL0Int(maxContributions, maxPartitionsContributed int64) (int64, error) {
190+
if err := checks.CheckContributionBoundingOptions(maxContributions, maxPartitionsContributed); err != nil {
191+
return 0, err
192+
}
193+
if maxContributions > 0 {
194+
return 1, nil
195+
}
196+
return maxPartitionsContributed, nil
197+
}
198+
163199
// getLInfInt checks that the sensitivity parameters will not create overflow errors,
164200
// and returns the L_inf sensitivity of the BoundedSum object, which is calculated by the
165201
// formula = max(|lower|, |upper|) * maxContributionsPerPartition.

go/dpagg/sum_test.go

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ func TestNewBoundedSumInt64(t *testing.T) {
287287
want *BoundedSumInt64
288288
wantErr bool
289289
}{
290-
{"MaxPartitionsContributed is not set",
290+
{"MaxPartitionsContributed is not set when using maxContributionsPerPartition",
291291
&BoundedSumInt64Options{
292292
Epsilon: ln3,
293293
Delta: tenten,
@@ -298,7 +298,7 @@ func TestNewBoundedSumInt64(t *testing.T) {
298298
},
299299
nil,
300300
true},
301-
{"maxContributionsPerPartition is not set",
301+
{"maxContributionsPerPartition is not set when using MaxPartitionsContributed",
302302
&BoundedSumInt64Options{
303303
Epsilon: ln3,
304304
Delta: 0,
@@ -320,6 +320,16 @@ func TestNewBoundedSumInt64(t *testing.T) {
320320
state: defaultState,
321321
},
322322
false},
323+
{"MaxContributions is not set when not using maxContributionsPerPartition and MaxPartitionsContributed",
324+
&BoundedSumInt64Options{
325+
Epsilon: ln3,
326+
Delta: 0,
327+
Lower: -1,
328+
Upper: 5,
329+
Noise: noNoise{},
330+
},
331+
nil,
332+
true},
323333
{"Noise is not set",
324334
&BoundedSumInt64Options{
325335
Epsilon: ln3,
@@ -707,6 +717,24 @@ func TestCheckMergeBoundedSumInt64Compatibility(t *testing.T) {
707717
maxContributionsPerPartition: 2,
708718
},
709719
false},
720+
{"same options, all fields filled while using MaxContributions",
721+
&BoundedSumInt64Options{
722+
Epsilon: ln3,
723+
Delta: tenten,
724+
Lower: -1,
725+
Upper: 5,
726+
Noise: noise.Gaussian(),
727+
MaxContributions: 2,
728+
},
729+
&BoundedSumInt64Options{
730+
Epsilon: ln3,
731+
Delta: tenten,
732+
Lower: -1,
733+
Upper: 5,
734+
Noise: noise.Gaussian(),
735+
MaxContributions: 2,
736+
},
737+
false},
710738
{"same options, only required fields filled",
711739
&BoundedSumInt64Options{
712740
Epsilon: ln3,
@@ -721,6 +749,20 @@ func TestCheckMergeBoundedSumInt64Compatibility(t *testing.T) {
721749
MaxPartitionsContributed: 1,
722750
},
723751
false},
752+
{"same options, only required fields filled while using MaxContributions",
753+
&BoundedSumInt64Options{
754+
Epsilon: ln3,
755+
Lower: -1,
756+
Upper: 5,
757+
MaxContributions: 2,
758+
},
759+
&BoundedSumInt64Options{
760+
Epsilon: ln3,
761+
Lower: -1,
762+
Upper: 5,
763+
MaxContributions: 2,
764+
},
765+
false},
724766
{"different epsilon",
725767
&BoundedSumInt64Options{
726768
Epsilon: ln3,
@@ -783,6 +825,20 @@ func TestCheckMergeBoundedSumInt64Compatibility(t *testing.T) {
783825
MaxPartitionsContributed: 1,
784826
},
785827
true},
828+
{"different MaxContributions",
829+
&BoundedSumInt64Options{
830+
Epsilon: ln3,
831+
Lower: -1,
832+
Upper: 5,
833+
MaxContributions: 2,
834+
},
835+
&BoundedSumInt64Options{
836+
Epsilon: ln3,
837+
Lower: -1,
838+
Upper: 5,
839+
MaxContributions: 5,
840+
},
841+
true},
786842
{"different lower bound",
787843
&BoundedSumInt64Options{
788844
Epsilon: ln3,

privacy-on-beam/pbeam/aggregations.go

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,8 @@ func randBool(_, _ beam.V) bool {
9898
// boundContributions takes a PCollection<K,V> as input, and for each key, selects and returns
9999
// at most contributionLimit records with this key. The selection is "mostly random":
100100
// the records returned are selected randomly, but the randomness isn't secure.
101-
// This is fine to use in the cross-partition bounding stage or in the per-partition bounding stage,
101+
// This is fine to use in the cross-partition bounding stage, the per-partition bounding stage,
102+
// or per-privacy identifier contribution bounding stage,
102103
// since the privacy guarantee doesn't depend on the privacy unit contributions being selected randomly.
103104
//
104105
// In order to do the cross-partition contribution bounding we need:
@@ -111,6 +112,13 @@ func randBool(_, _ beam.V) bool {
111112
// 1. the key to be the pair = {privacy ID, partition ID}.
112113
// 2. the value to be just the value which is associated with that {privacy ID, partition ID} pair
113114
// (there could be multiple entries with the same key).
115+
//
116+
// In order to do per-privacy-ID contribution bounding (L1 norm) we need:
117+
// 1. each record to represent a contribution of 1, such as Count. It cannot be used for aggregations
118+
// such as Sum since the function can only bound the number of contributions, not the value of the
119+
// contributions.
120+
// 2. the key should be the privacy ID.
121+
// 3. the value should be the partition ID.
114122
func boundContributions(s beam.Scope, kvCol beam.PCollection, contributionLimit int64) beam.PCollection {
115123
s = s.Scope("boundContributions")
116124
// Transform the PCollection<K,V> into a PCollection<K,[]V>, where
@@ -299,6 +307,7 @@ type boundedSumInt64Fn struct {
299307
MaxPartitionsContributed int64
300308
Lower int64
301309
Upper int64
310+
MaxContributions int64
302311
NoiseKind noise.Kind
303312
noise noise.Noise // Set during Setup phase according to NoiseKind.
304313
PublicPartitions bool
@@ -319,6 +328,7 @@ func newBoundedSumInt64Fn(spec PrivacySpec, params SumParams, noiseKind noise.Ki
319328
MaxPartitionsContributed: params.MaxPartitionsContributed,
320329
Lower: int64(params.MinValue),
321330
Upper: int64(params.MaxValue),
331+
MaxContributions: params.maxContributions,
322332
NoiseKind: noiseKind,
323333
PublicPartitions: publicPartitions,
324334
TestMode: spec.testMode,
@@ -345,6 +355,7 @@ func (fn *boundedSumInt64Fn) CreateAccumulator() (boundedSumAccumInt64, error) {
345355
MaxPartitionsContributed: fn.MaxPartitionsContributed,
346356
Lower: fn.Lower,
347357
Upper: fn.Upper,
358+
MaxContributions: fn.MaxContributions,
348359
Noise: fn.noise,
349360
})
350361
if err != nil {
@@ -357,6 +368,7 @@ func (fn *boundedSumInt64Fn) CreateAccumulator() (boundedSumAccumInt64, error) {
357368
Delta: fn.PartitionSelectionDelta,
358369
PreThreshold: fn.PreThreshold,
359370
MaxPartitionsContributed: fn.MaxPartitionsContributed,
371+
MaxContributions: fn.MaxContributions,
360372
})
361373
}
362374
return accum, err

0 commit comments

Comments
 (0)