Skip to content

Commit 4232bde

Browse files
committed
Remove bucket seed from LCF condition in count_distinct.c
1 parent a53619d commit 4232bde

File tree

4 files changed

+12
-16
lines changed

4 files changed

+12
-16
lines changed

pg_diffix/aggregation/noise.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ extern double generate_layered_noise(const seed_t *seeds, int seeds_count,
1515
const char *step_name, double layer_sd);
1616

1717
/*
18-
* Returns the noisy LCF threshold for the given noise layers.
18+
* Returns the noisy LCF threshold for the given noise layer.
1919
*/
20-
extern double generate_lcf_threshold(const seed_t *seeds, int seeds_count);
20+
extern double generate_lcf_threshold(seed_t seed);
2121

2222
#endif /* PG_DIFFIX_NOISE_H */

src/aggregation/count_distinct.c

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -80,33 +80,31 @@ static void set_value_sorting_globals(Oid element_type)
8080
g_compare_values_func = &g_compare_values_typentry->cmp_proc_finfo;
8181
}
8282

83-
static bool aid_set_is_high_count(seed_t bucket_seed, const List *aid_values_set)
83+
static bool aid_set_is_high_count(const List *aid_values_set)
8484
{
8585
if (list_length(aid_values_set) < g_config.low_count_min_threshold)
8686
return false; /* Fewer AID values than minimum threshold, value is low-count. */
8787

8888
seed_t aid_seed = hash_set_to_seed(aid_values_set);
89-
90-
seed_t seeds[] = {bucket_seed, aid_seed};
91-
double threshold = generate_lcf_threshold(seeds, ARRAY_LENGTH(seeds));
89+
double threshold = generate_lcf_threshold(aid_seed);
9290

9391
return list_length(aid_values_set) >= threshold;
9492
}
9593

96-
static bool aid_sets_are_high_count(seed_t bucket_seed, const List *aid_values_sets)
94+
static bool aid_sets_are_high_count(const List *aid_values_sets)
9795
{
9896
ListCell *cell;
9997
foreach (cell, aid_values_sets)
10098
{
10199
const List *aid_values_set = (const List *)lfirst(cell);
102-
if (!aid_set_is_high_count(bucket_seed, aid_values_set))
100+
if (!aid_set_is_high_count(aid_values_set))
103101
return false;
104102
}
105103
return true;
106104
}
107105

108106
/* Returns a list with the tracker entries that are low count. */
109-
static List *filter_lc_entries(seed_t bucket_seed, DistinctTracker_hash *tracker)
107+
static List *filter_lc_entries(DistinctTracker_hash *tracker)
110108
{
111109
List *lc_entries = NIL;
112110

@@ -115,7 +113,7 @@ static List *filter_lc_entries(seed_t bucket_seed, DistinctTracker_hash *tracker
115113
DistinctTrackerHashEntry *entry = NULL;
116114
while ((entry = DistinctTracker_iterate(tracker, &it)) != NULL)
117115
{
118-
if (!aid_sets_are_high_count(bucket_seed, entry->aid_values_sets))
116+
if (!aid_sets_are_high_count(entry->aid_values_sets))
119117
lc_entries = lappend(lc_entries, entry);
120118
}
121119

@@ -352,7 +350,7 @@ static CountDistinctResult count_distinct_calculate_final(CountDistinctState *st
352350

353351
DistinctTracker_hash *tracker = state->tracker;
354352

355-
List *lc_entries = filter_lc_entries(bucket_seed, tracker);
353+
List *lc_entries = filter_lc_entries(tracker);
356354
list_sort(lc_entries, &compare_tracker_entries_by_value); /* Needed to ensure determinism. */
357355

358356
CountDistinctResult result = {0};

src/aggregation/low_count.c

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,7 @@ typedef struct AidResult
1717
static AidResult calculate_aid_result(const AidTrackerState *tracker)
1818
{
1919
AidResult result = {.aid_seed = tracker->aid_seed};
20-
21-
seed_t seeds[] = {tracker->aid_seed};
22-
result.threshold = generate_lcf_threshold(seeds, ARRAY_LENGTH(seeds));
20+
result.threshold = generate_lcf_threshold(tracker->aid_seed);
2321
result.low_count = tracker->aid_set->members < result.threshold;
2422

2523
return result;

src/aggregation/noise.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,15 +112,15 @@ double generate_layered_noise(const seed_t *seeds, int seeds_count,
112112
return noise;
113113
}
114114

115-
double generate_lcf_threshold(const seed_t *seeds, int seeds_count)
115+
double generate_lcf_threshold(seed_t seed)
116116
{
117117
/*
118118
* `low_count_mean_gap` is the number of (total!) standard deviations between
119119
* `low_count_min_threshold` and desired mean.
120120
*/
121121
double threshold_mean = (double)g_config.low_count_min_threshold +
122122
g_config.low_count_mean_gap * g_config.low_count_layer_sd * sqrt(2.0);
123-
double noise = generate_layered_noise(seeds, seeds_count, "suppress", g_config.low_count_layer_sd);
123+
double noise = generate_layered_noise(&seed, 1, "suppress", g_config.low_count_layer_sd);
124124
double noisy_threshold = threshold_mean + noise;
125125
return Max(noisy_threshold, g_config.low_count_min_threshold);
126126
}

0 commit comments

Comments
 (0)