Skip to content

Commit e125951

Browse files
shs037tensorflower-gardener
authored andcommitted
Sets training set as positive class for sklearn.metrics.roc_curve.
sklearn.metrics.roc_curve uses classification rules in the form "score >= threshold ==> predict positive". When calling roc_curve, we used to label test data as positive class. This way, TPR = % test examples classified as test, FPR = % training examples classified as test. The classification rule is "loss >= threshold ==> predict test". For membership inference, TPR is usually defined as % training examples classified as training, and FPR is % test examples classified as training. As training samples usually have lower loss, we usually use rules in the form of "loss <= threshold ==> predict training". Therefore, TPR in the 2nd case is actually (1 - FPR) in the 1st case, FPR in the 2nd case is (1 - TPR) in the 1st case. This mismatch does not affect attacker advantage or AUC, but this can cause problem to PPV. Now, we: - set training set as positive class. - for threshold and entropy attacks, set score to be -loss, so that higher score corresponds to training data. - negate the thresholds (computed based on -loss) so that it corresponds to loss. PiperOrigin-RevId: 519880043
1 parent 7796369 commit e125951

3 files changed

Lines changed: 70 additions & 21 deletions

File tree

tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/membership_inference_attack.py

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,9 @@ def _run_trained_attack(
120120
assert not np.any(np.isnan(scores))
121121

122122
# Generate ROC curves with scores.
123+
# Different from the threshold attacker which uses the loss, we do not negate
124+
# the scores here, because the attacker returns the probability of the
125+
# positive class.
123126
fpr, tpr, thresholds = metrics.roc_curve(labels, scores)
124127
# 'test_train_ratio' is the ratio of test data size to train data size. It is
125128
# used to compute the Positive Predictive Value.
@@ -131,7 +134,7 @@ def _run_trained_attack(
131134
thresholds=thresholds,
132135
test_train_ratio=test_train_ratio)
133136

134-
in_train_indices = (labels == 0)
137+
in_train_indices = labels == 1
135138
return SingleAttackResult(
136139
slice_spec=_get_slice_spec(attack_input),
137140
data_size=prepared_attacker_data.data_size,
@@ -154,17 +157,22 @@ def _run_threshold_attack(attack_input: AttackInputData):
154157
loss_train = np.sum(loss_train, axis=1)
155158
loss_test = np.sum(loss_test, axis=1)
156159
fpr, tpr, thresholds = metrics.roc_curve(
157-
np.concatenate((np.zeros(ntrain), np.ones(ntest))),
158-
np.concatenate((loss_train, loss_test)))
160+
np.concatenate((np.ones(ntrain), np.zeros(ntest))),
161+
# roc_curve uses classifier in the form of
162+
# "score >= threshold ==> predict positive", while training data has lower
163+
# loss, so we negate the loss.
164+
-np.concatenate((loss_train, loss_test)),
165+
)
159166
# 'test_train_ratio' is the ratio of test data size to train data size. It is
160167
# used to compute the Positive Predictive Value.
161168
test_train_ratio = ntest / ntrain
162169

163170
roc_curve = RocCurve(
164171
tpr=tpr,
165172
fpr=fpr,
166-
thresholds=thresholds,
167-
test_train_ratio=test_train_ratio)
173+
thresholds=-thresholds, # negate because we negated the loss
174+
test_train_ratio=test_train_ratio,
175+
)
168176

169177
return SingleAttackResult(
170178
slice_spec=_get_slice_spec(attack_input),
@@ -182,18 +190,23 @@ def _run_threshold_entropy_attack(attack_input: AttackInputData):
182190
'multilabel data.'))
183191
ntrain, ntest = attack_input.get_train_size(), attack_input.get_test_size()
184192
fpr, tpr, thresholds = metrics.roc_curve(
185-
np.concatenate((np.zeros(ntrain), np.ones(ntest))),
186-
np.concatenate(
187-
(attack_input.get_entropy_train(), attack_input.get_entropy_test())))
193+
np.concatenate((np.ones(ntrain), np.zeros(ntest))),
194+
# Similar as in loss, we negate the entropy becase training examples are
195+
# expected to have lower entropy.
196+
-np.concatenate(
197+
(attack_input.get_entropy_train(), attack_input.get_entropy_test())
198+
),
199+
)
188200
# 'test_train_ratio' is the ratio of test data size to train data size. It is
189201
# used to compute the Positive Predictive Value.
190202
test_train_ratio = ntest / ntrain
191203

192204
roc_curve = RocCurve(
193205
tpr=tpr,
194206
fpr=fpr,
195-
thresholds=thresholds,
196-
test_train_ratio=test_train_ratio)
207+
thresholds=-thresholds, # negate because we negated the loss
208+
test_train_ratio=test_train_ratio,
209+
)
197210

198211
return SingleAttackResult(
199212
slice_spec=_get_slice_spec(attack_input),

tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/membership_inference_attack_test.py

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -214,9 +214,11 @@ def test_run_attack_trained_sets_membership_scores(self):
214214
result.membership_scores_test,
215215
result.membership_scores_test[0],
216216
rtol=1e-3)
217-
# Training score should be smaller than test score
218-
self.assertLess(result.membership_scores_train[0],
219-
result.membership_scores_test[0])
217+
# Training score should be larger than test score, as training set is set
218+
# to be positive.
219+
self.assertGreater(
220+
result.membership_scores_train[0], result.membership_scores_test[0]
221+
)
220222

221223
def test_run_attack_threshold_calculates_correct_auc(self):
222224
result = mia._run_attack(
@@ -236,12 +238,39 @@ def test_run_attack_threshold_entropy_calculates_correct_auc(self):
236238

237239
np.testing.assert_almost_equal(result.roc_curve.get_auc(), 0.83, decimal=2)
238240

241+
@parameterized.parameters(
242+
[AttackType.THRESHOLD_ATTACK],
243+
[AttackType.THRESHOLD_ENTROPY_ATTACK],
244+
)
245+
def test_calculates_correct_tpr_fpr(self, attack_type):
246+
rng = np.random.RandomState(27)
247+
loss_train = rng.rand(100)
248+
loss_test = rng.rand(50) + 0.1
249+
result = mia._run_attack(
250+
AttackInputData(
251+
loss_train=loss_train,
252+
loss_test=loss_test,
253+
entropy_train=loss_train,
254+
entropy_test=loss_test,
255+
),
256+
attack_type,
257+
)
258+
self.assertEqual(attack_type, result.attack_type)
259+
for tpr, fpr, threshold in zip(
260+
result.roc_curve.tpr, result.roc_curve.fpr, result.roc_curve.thresholds
261+
):
262+
self.assertAlmostEqual(tpr, np.mean(loss_train <= threshold))
263+
self.assertAlmostEqual(fpr, np.mean(loss_test <= threshold))
264+
239265
@mock.patch('sklearn.metrics.roc_curve')
240266
def test_run_attack_threshold_entropy_small_tpr_fpr_correct_ppv(
241267
self, patched_fn):
242268
# sklearn.metrics.roc_curve returns (fpr, tpr, thresholds).
243-
patched_fn.return_value = ([0.2, 0.04, 0.0003], [0.1, 0.0001,
244-
0.0002], [0.2, 0.4, 0.6])
269+
patched_fn.return_value = (
270+
np.array([0.2, 0.04, 0.0003]),
271+
np.array([0.1, 0.0001, 0.0002]),
272+
np.array([0.2, 0.4, 0.6]),
273+
)
245274
result = mia._run_attack(
246275
AttackInputData(
247276
entropy_train=np.array([0.1, 0.2, 1.3, 0.4, 0.5, 0.6]),
@@ -380,8 +409,11 @@ def test_run_multilabel_attack_threshold_calculates_correct_ppv(self):
380409
def test_run_multilabel_attack_threshold_small_tpr_fpr_correct_ppv(
381410
self, patched_fn):
382411
# sklearn.metrics.roc_curve returns (fpr, tpr, thresholds).
383-
patched_fn.return_value = ([0.2, 0.04, 0.0003], [0.1, 0.0001,
384-
0.0002], [0.2, 0.4, 0.6])
412+
patched_fn.return_value = (
413+
np.array([0.2, 0.04, 0.0003]),
414+
np.array([0.1, 0.0001, 0.0002]),
415+
np.array([0.2, 0.4, 0.6]),
416+
)
385417
result = mia._run_attack(
386418
AttackInputData(
387419
loss_train=np.array([[0.1, 0.2], [1.3, 0.4], [0.5, 0.6], [0.9,

tensorflow_privacy/privacy/privacy_tests/membership_inference_attack/models.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def create_attacker_data(attack_input_data: data_structures.AttackInputData,
7676

7777
ntrain, ntest = attack_input_train.shape[0], attack_input_test.shape[0]
7878
features_all = np.concatenate((attack_input_train, attack_input_test))
79-
labels_all = np.concatenate((np.zeros(ntrain), np.ones(ntest)))
79+
labels_all = np.concatenate((np.ones(ntrain), np.zeros(ntest)))
8080
if attack_input_data.has_nonnull_sample_weights():
8181
sample_weights_all = np.concatenate((attack_input_data.sample_weight_train,
8282
attack_input_data.sample_weight_test),
@@ -282,13 +282,17 @@ def train_model(self, input_features, is_training_labels, sample_weight=None):
282282
self.model = model
283283

284284

285-
def create_attacker(attack_type,
286-
backend: Optional[str] = None) -> TrainedAttacker:
285+
def create_attacker(
286+
attack_type: data_structures.AttackType, backend: Optional[str] = None
287+
) -> TrainedAttacker:
287288
"""Returns the corresponding attacker for the provided attack_type."""
288289
# Compare by name instead of the variable itself to support module reload.
289290
if attack_type.name == data_structures.AttackType.LOGISTIC_REGRESSION.name:
290291
return LogisticRegressionAttacker(backend=backend)
291-
if attack_type.name == data_structures.AttackType.MULTI_LAYERED_PERCEPTRON.name:
292+
if (
293+
attack_type.name
294+
== data_structures.AttackType.MULTI_LAYERED_PERCEPTRON.name
295+
):
292296
return MultilayerPerceptronAttacker(backend=backend)
293297
if attack_type.name == data_structures.AttackType.RANDOM_FOREST.name:
294298
return RandomForestAttacker(backend=backend)

0 commit comments

Comments
 (0)