Skip to content

Commit 79cbbfd

Browse files
committed
Fixed two bugs:
1. n_jobs is set to None by default to avoid an error. 2. Solve unexpected parameter 'allow_empty_party' Add readme and examples.
1 parent b2a55af commit 79cbbfd

7 files changed

Lines changed: 90 additions & 13 deletions

File tree

README.md

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,57 @@ pip install vertibench
1717

1818
## Getting Started
1919

20+
This examples includes the pipeline of split and evaluate. First,
21+
load your datasets or generate synthetic datasets.
22+
23+
```python
24+
from sklearn.datasets import make_classification
25+
26+
# Generate a large dataset
27+
X, y = make_classification(n_samples=10000, n_features=10)
28+
```
29+
30+
To split the dataset by importance,
31+
32+
```python
33+
from vertibench.Splitter import ImportanceSplitter
34+
35+
imp_splitter = ImportanceSplitter(num_parties=4, weights=[1, 1, 1, 3])
36+
Xs = imp_splitter.split(X)
37+
```
38+
39+
To split the dataset by correlation,
40+
41+
```python
42+
from vertibench.Splitter import CorrelationSplitter
43+
44+
corr_splitter = CorrelationSplitter(num_parties=4)
45+
Xs = corr_splitter.fit_split(X)
46+
```
47+
48+
To evaluate a feature split `Xs` in terms of party importance,
49+
50+
```python
51+
from vertibench.Evaluator import ImportanceEvaluator
52+
from sklearn.linear_model import LogisticRegression
53+
import numpy as np
54+
55+
model = LogisticRegression()
56+
X = np.concatenate(Xs, axis=1)
57+
model.fit(X, y)
58+
imp_evaluator = ImportanceEvaluator()
59+
imp_scores = imp_evaluator.evaluate(Xs, model.predict)
60+
alpha = imp_evaluator.evaluate_alpha(scores=imp_scores)
61+
print(f"Importance scores: {imp_scores}, alpha: {alpha}")
62+
```
63+
64+
To evaluate a feature split in terms of correlation,
65+
66+
```python
67+
from vertibench.Evaluator import CorrelationEvaluator
68+
69+
corr_evaluator = CorrelationEvaluator()
70+
corr_scores = corr_evaluator.fit_evaluate(Xs)
71+
beta = corr_evaluator.evaluate_beta()
72+
print(f"Correlation scores: {corr_scores}, beta: {beta}")
73+
```

example/EvaluatorExample.py

Lines changed: 0 additions & 9 deletions
This file was deleted.

example/SplitEvaluateExample.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
from sklearn.datasets import make_classification
2+
3+
# Generate a large dataset
4+
X, y = make_classification(n_samples=10000, n_features=10)
5+
6+
7+
from vertibench.Evaluator import ImportanceEvaluator, CorrelationEvaluator
8+
from vertibench.Splitter import ImportanceSplitter, CorrelationSplitter
9+
from sklearn.linear_model import LogisticRegression
10+
11+
# Split by importance
12+
imp_splitter = ImportanceSplitter(num_parties=4, weights=[1, 1, 1, 3])
13+
Xs = imp_splitter.split(X)
14+
15+
# Evaluate split by importance
16+
model = LogisticRegression()
17+
model.fit(X, y)
18+
imp_evaluator = ImportanceEvaluator()
19+
imp_scores = imp_evaluator.evaluate(Xs, model.predict)
20+
alpha = imp_evaluator.evaluate_alpha(scores=imp_scores)
21+
print(f"Importance scores: {imp_scores}, alpha: {alpha}")
22+
23+
# Split by correlation
24+
corr_splitter = CorrelationSplitter(num_parties=4)
25+
Xs = corr_splitter.fit_split(X)
26+
27+
# Evaluate split by correlation
28+
corr_evaluator = CorrelationEvaluator()
29+
corr_scores = corr_evaluator.fit_evaluate(Xs)
30+
beta = corr_evaluator.evaluate_beta()
31+
print(f"Correlation scores: {corr_scores}, beta: {beta}")
32+

example/SplitExample.py

Whitespace-only changes.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ authors = [
99
{ name = "Junyi Hou" },
1010
{ name = "Bingsheng He" }
1111
]
12-
version = "0.1.1a1"
12+
version = "0.1.1"
1313
description = "A tool for benchmarking vertical federated learning algorithms, containing synthetic data split and data evaluation."
1414
readme = "README.md"
1515
license = { file = "LICENSE" } # If you have a LICENSE file

src/vertibench/Evaluator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ class CorrelationEvaluator:
129129
Correlation evaluator for VFL datasets
130130
"""
131131

132-
def __init__(self, corr_func='spearmanr', gpu_id=None, svd_algo='auto', n_jobs=1, **kwargs):
132+
def __init__(self, corr_func='spearmanr', gpu_id=None, svd_algo='auto', n_jobs=None, **kwargs):
133133
"""
134134
:param corr_func: [str] function to calculate the correlation between two features
135135
:param gamma: [float] weight of the inner-party correlation score

src/vertibench/Splitter.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ def split_indices(self, X, allow_empty_party=False):
160160

161161
class CorrelationSplitter(Splitter):
162162

163-
def __init__(self, num_parties: int, evaluator: CorrelationEvaluator = None, seed=None, gpu_id=None, n_jobs=1):
163+
def __init__(self, num_parties: int, evaluator: CorrelationEvaluator = None, seed=None, gpu_id=None, n_jobs=None):
164164
"""
165165
Split a 2D dataset by feature correlation (assuming the features are equally important).
166166
:param num_parties: [int] number of parties
@@ -273,7 +273,7 @@ def fit(self, X, **kwargs):
273273
self.max_icor = self.evaluator.max_icor
274274

275275
def split_indices(self, X, n_elites=20, n_offsprings=70, n_mutants=10, n_gen=100, bias=0.7, verbose=False,
276-
beta=0.5, term_tol=1e-4, term_period=10):
276+
beta=0.5, term_tol=1e-4, term_period=10, **kwargs):
277277
"""
278278
Use BRKGA to find the best order of features that minimizes the difference between the mean of icor and the
279279
target. split() assumes that the min and max icor have been calculated by fit().

0 commit comments

Comments
 (0)