Skip to content

Commit e8f233d

Browse files
authored
delta=0 if groups (#897)
* use pure DP where we can; slightly better generated notebooks * update docs * revert strip * update index.html
1 parent 84df4c6 commit e8f233d

4 files changed

Lines changed: 36 additions & 30 deletions

File tree

docs/index.html

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -797,7 +797,7 @@ <h1>Context</h1>
797797
&gt;&gt;&gt;
798798
&gt;&gt;&gt; privacy_loss = dp.loss_of(
799799
... epsilon=1.0,
800-
... delta=1 / max(1e7, 100000),
800+
... delta=0, # or 1 / max(1e7, 100000),
801801
... )
802802
&gt;&gt;&gt;
803803
&gt;&gt;&gt; # See the OpenDP Library docs for more on Context:
@@ -813,7 +813,15 @@ <h1>Context</h1>
813813
... split_by_weights=[ # With only one query, the entire budget is allocated to that query:
814814
... 1, # grade
815815
... ],
816-
... margins=[],
816+
... margins=[
817+
... dp.polars.Margin(
818+
... by=list({}.keys()),
819+
... invariant=&quot;keys&quot;,
820+
... max_length=100000,
821+
... max_groups=100,
822+
... ),
823+
... dp.polars.Margin(by=([&quot;grade_bin&quot;] + list({}.keys())), invariant=&quot;keys&quot;),
824+
... ],
817825
... )
818826
</code></pre>
819827
</div>

docs/index.md

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -545,7 +545,7 @@ Next, we'll define our Context. This is where we set the privacy budget, and set
545545
>>>
546546
>>> privacy_loss = dp.loss_of(
547547
... epsilon=1.0,
548-
... delta=1 / max(1e7, 100000),
548+
... delta=0, # or 1 / max(1e7, 100000),
549549
... )
550550
>>>
551551
>>> # See the OpenDP Library docs for more on Context:
@@ -561,7 +561,15 @@ Next, we'll define our Context. This is where we set the privacy budget, and set
561561
... split_by_weights=[ # With only one query, the entire budget is allocated to that query:
562562
... 1, # grade
563563
... ],
564-
... margins=[],
564+
... margins=[
565+
... dp.polars.Margin(
566+
... by=list({}.keys()),
567+
... invariant="keys",
568+
... max_length=100000,
569+
... max_groups=100,
570+
... ),
571+
... dp.polars.Margin(by=(["grade_bin"] + list({}.keys())), invariant="keys"),
572+
... ],
565573
... )
566574
567575
```

dp_wizard/utils/code_generators/__init__.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -226,9 +226,6 @@ def template(CONTRIBUTIONS, CONTRIBUTIONS_ENTITY):
226226

227227
def make_privacy_loss_block(pure: bool, epsilon: float, max_rows: int):
228228
"""
229-
Comments in the *pure* privacy loss block reference synthetic data generation
230-
("cuts dict"), so don't use "pure=True" for stats code!
231-
232229
>>> print(
233230
... 'pure DP: ',
234231
... make_privacy_loss_block(pure=True, epsilon=1, max_rows=1000)
@@ -249,8 +246,8 @@ def template(EPSILON, MAX_ROWS):
249246
privacy_loss = dp.loss_of( # noqa: F841
250247
# EPSILON_COMMENT_BLOCK
251248
epsilon=EPSILON,
252-
# If your columns don't match your cuts dict,
253-
# you will also need to provide a very small "delta" value.
249+
# Not necessary in this case,
250+
# but other analyses require a very small "delta" value.
254251
# https://docs.opendp.org/en/OPENDP_V_VERSION/getting-started/tabular-data/grouping.html#Stable-Keys
255252
delta=0, # or 1 / max(1e7, MAX_ROWS),
256253
)

dp_wizard/utils/code_generators/abstract_generator.py

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
make_privacy_loss_block,
1212
make_privacy_unit_block,
1313
)
14-
from dp_wizard.utils.code_generators.analyses import histogram
1514
from dp_wizard.utils.dp_helper import confidence
1615
from dp_wizard.utils.shared.bins import make_cut_points
1716

@@ -147,12 +146,13 @@ def bin_template(GROUPS, BIN_NAME):
147146
DefaultsTemplate(basic_template)
148147
.fill_values(GROUPS=groups, MAX_ROWS=max_rows)
149148
.finish()
150-
] + [
151-
DefaultsTemplate(bin_template)
152-
.fill_values(GROUPS=groups, BIN_NAME=bin_name)
153-
.finish()
154-
for bin_name in bin_names
155149
]
150+
for bin_name in bin_names:
151+
margins.append(
152+
DefaultsTemplate(bin_template)
153+
.fill_values(GROUPS=groups, BIN_NAME=bin_name)
154+
.finish()
155+
)
156156

157157
margins_list = "[" + ", ".join(margins) + "\n ]"
158158
return margins_list
@@ -248,30 +248,23 @@ def _make_partial_stats_context(self):
248248
for name, plan in self.analysis_plan.analysis_columns.items()
249249
if has_bins(get_statistic_by_name(plan[0].statistic_name))
250250
]
251+
all_groups_have_keys = all(
252+
len(group_keys) > 0 for group_keys in self.analysis_plan.groups.values()
253+
)
251254

252255
privacy_unit_block = make_privacy_unit_block(
253256
contributions=self.analysis_plan.contributions,
254257
contributions_entity=self.analysis_plan.contributions_entity,
255258
)
256259
privacy_loss_block = make_privacy_loss_block(
257-
pure=False,
260+
pure=all_groups_have_keys,
258261
epsilon=self.analysis_plan.epsilon,
259262
max_rows=self.analysis_plan.max_rows,
260263
)
261-
262-
is_just_histograms = all(
263-
plan_column[0].statistic_name == histogram.name
264-
for plan_column in self.analysis_plan.analysis_columns.values()
265-
)
266-
margins_list = (
267-
# Histograms don't need margins.
268-
"[]"
269-
if is_just_histograms
270-
else self._make_margins_list(
271-
bin_names=[f"{name}_bin" for name in bin_column_names],
272-
groups=self.analysis_plan.groups,
273-
max_rows=self.analysis_plan.max_rows,
274-
)
264+
margins_list = self._make_margins_list(
265+
bin_names=[f"{name}_bin" for name in bin_column_names],
266+
groups=self.analysis_plan.groups,
267+
max_rows=self.analysis_plan.max_rows,
275268
)
276269
extra_columns = ", ".join(
277270
[

0 commit comments

Comments
 (0)