delta=0 if groups (#897)

mccalluc · web-flow · commit e8f233d572d9 · 2026-04-07T10:29:53.000-04:00
* use pure DP where we can; slightly better generated notebooks

* update docs

* revert strip

* update index.html
diff --git a/docs/index.html b/docs/index.html
@@ -797,7 +797,7 @@ <h1>Context</h1>
 &gt;&gt;&gt;
 &gt;&gt;&gt; privacy_loss = dp.loss_of(
 ...     epsilon=1.0,
-...     delta=1 / max(1e7, 100000),
+...     delta=0,  # or 1 / max(1e7, 100000),
 ... )
 &gt;&gt;&gt;
 &gt;&gt;&gt; # See the OpenDP Library docs for more on Context:
@@ -813,7 +813,15 @@ <h1>Context</h1>
 ...     split_by_weights=[  # With only one query, the entire budget is allocated to that query:
 ...         1,  # grade
 ...     ],
-...     margins=[],
+...     margins=[
+...         dp.polars.Margin(
+...             by=list({}.keys()),
+...             invariant=&quot;keys&quot;,
+...             max_length=100000,
+...             max_groups=100,
+...         ),
+...         dp.polars.Margin(by=([&quot;grade_bin&quot;] + list({}.keys())), invariant=&quot;keys&quot;),
+...     ],
 ... )
 </code></pre>
 </div>
diff --git a/docs/index.md b/docs/index.md
@@ -545,7 +545,7 @@ Next, we'll define our Context. This is where we set the privacy budget, and set
 >>>
 >>> privacy_loss = dp.loss_of(
 ...     epsilon=1.0,
-...     delta=1 / max(1e7, 100000),
+...     delta=0,  # or 1 / max(1e7, 100000),
 ... )
 >>>
 >>> # See the OpenDP Library docs for more on Context:
@@ -561,7 +561,15 @@ Next, we'll define our Context. This is where we set the privacy budget, and set
 ...     split_by_weights=[  # With only one query, the entire budget is allocated to that query:
 ...         1,  # grade
 ...     ],
-...     margins=[],
+...     margins=[
+...         dp.polars.Margin(
+...             by=list({}.keys()),
+...             invariant="keys",
+...             max_length=100000,
+...             max_groups=100,
+...         ),
+...         dp.polars.Margin(by=(["grade_bin"] + list({}.keys())), invariant="keys"),
+...     ],
 ... )
 
 ```
diff --git a/dp_wizard/utils/code_generators/__init__.py b/dp_wizard/utils/code_generators/__init__.py
@@ -226,9 +226,6 @@ def template(CONTRIBUTIONS, CONTRIBUTIONS_ENTITY):
 
 def make_privacy_loss_block(pure: bool, epsilon: float, max_rows: int):
     """
-    Comments in the *pure* privacy loss block reference synthetic data generation
-    ("cuts dict"), so don't use "pure=True" for stats code!
-
     >>> print(
     ...     'pure DP: ',
     ...     make_privacy_loss_block(pure=True, epsilon=1, max_rows=1000)
@@ -249,8 +246,8 @@ def template(EPSILON, MAX_ROWS):
             privacy_loss = dp.loss_of(  # noqa: F841
                 # EPSILON_COMMENT_BLOCK
                 epsilon=EPSILON,
-                # If your columns don't match your cuts dict,
-                # you will also need to provide a very small "delta" value.
+                # Not necessary in this case,
+                # but other analyses require a very small "delta" value.
                 # https://docs.opendp.org/en/OPENDP_V_VERSION/getting-started/tabular-data/grouping.html#Stable-Keys
                 delta=0,  # or 1 / max(1e7, MAX_ROWS),
             )
diff --git a/dp_wizard/utils/code_generators/abstract_generator.py b/dp_wizard/utils/code_generators/abstract_generator.py
@@ -11,7 +11,6 @@
     make_privacy_loss_block,
     make_privacy_unit_block,
 )
-from dp_wizard.utils.code_generators.analyses import histogram
 from dp_wizard.utils.dp_helper import confidence
 from dp_wizard.utils.shared.bins import make_cut_points
 
@@ -147,12 +146,13 @@ def bin_template(GROUPS, BIN_NAME):
             DefaultsTemplate(basic_template)
             .fill_values(GROUPS=groups, MAX_ROWS=max_rows)
             .finish()
-        ] + [
-            DefaultsTemplate(bin_template)
-            .fill_values(GROUPS=groups, BIN_NAME=bin_name)
-            .finish()
-            for bin_name in bin_names
         ]
+        for bin_name in bin_names:
+            margins.append(
+                DefaultsTemplate(bin_template)
+                .fill_values(GROUPS=groups, BIN_NAME=bin_name)
+                .finish()
+            )
 
         margins_list = "[" + ", ".join(margins) + "\n    ]"
         return margins_list
@@ -248,30 +248,23 @@ def _make_partial_stats_context(self):
             for name, plan in self.analysis_plan.analysis_columns.items()
             if has_bins(get_statistic_by_name(plan[0].statistic_name))
         ]
+        all_groups_have_keys = all(
+            len(group_keys) > 0 for group_keys in self.analysis_plan.groups.values()
+        )
 
         privacy_unit_block = make_privacy_unit_block(
             contributions=self.analysis_plan.contributions,
             contributions_entity=self.analysis_plan.contributions_entity,
         )
         privacy_loss_block = make_privacy_loss_block(
-            pure=False,
+            pure=all_groups_have_keys,
             epsilon=self.analysis_plan.epsilon,
             max_rows=self.analysis_plan.max_rows,
         )
-
-        is_just_histograms = all(
-            plan_column[0].statistic_name == histogram.name
-            for plan_column in self.analysis_plan.analysis_columns.values()
-        )
-        margins_list = (
-            # Histograms don't need margins.
-            "[]"
-            if is_just_histograms
-            else self._make_margins_list(
-                bin_names=[f"{name}_bin" for name in bin_column_names],
-                groups=self.analysis_plan.groups,
-                max_rows=self.analysis_plan.max_rows,
-            )
+        margins_list = self._make_margins_list(
+            bin_names=[f"{name}_bin" for name in bin_column_names],
+            groups=self.analysis_plan.groups,
+            max_rows=self.analysis_plan.max_rows,
         )
         extra_columns = ", ".join(
             [