Skip to content

Commit 22ec277

Browse files
authored
Merge pull request #128 from bmacedo-lgtm/metrics
Add `auc_u_test` to `performance_metrics.py`
2 parents 300dd18 + 70313a4 commit 22ec277

File tree

2 files changed

+30
-0
lines changed

2 files changed

+30
-0
lines changed

selene_sdk/utils/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from .performance_metrics import PerformanceMetrics
1414
from .performance_metrics import visualize_roc_curves
1515
from .performance_metrics import visualize_precision_recall_curves
16+
from .performance_metrics import auc_u_test
1617
from .config import load
1718
from .config import load_path
1819
from .config import instantiate

selene_sdk/utils/performance_metrics.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from sklearn.metrics import precision_recall_curve
1212
from sklearn.metrics import roc_auc_score
1313
from sklearn.metrics import roc_curve
14+
from scipy.stats import rankdata
1415

1516

1617
logger = logging.getLogger("selene")
@@ -247,6 +248,34 @@ def get_feature_specific_scores(data, get_feature_from_index_fn):
247248
return feature_score_dict
248249

249250

251+
def auc_u_test(labels, predictions):
252+
"""
253+
Outputs the area under the the ROC curve associated with a certain
254+
set of labels and the predictions given by the training model.
255+
Computed from the U statistic.
256+
257+
Parameters
258+
----------
259+
labels: numpy.ndarray
260+
Known labels of values predicted by model. Must be one dimensional.
261+
predictions: numpy.ndarray
262+
Value predicted by user model. Must be one dimensional, with matching
263+
dimension to `labels`
264+
265+
Returns
266+
-------
267+
float
268+
AUC value of given label, prediction pairs
269+
270+
"""
271+
len_pos = int(np.sum(labels))
272+
len_neg = len(labels) - len_pos
273+
rank_sum = np.sum(rankdata(predictions)[labels == 1])
274+
u_value = rank_sum - (len_pos * (len_pos + 1)) / 2
275+
auc = u_value / (len_pos * len_neg)
276+
return auc
277+
278+
250279
class PerformanceMetrics(object):
251280
"""
252281
Tracks and calculates metrics to evaluate how closely a model's

0 commit comments

Comments
 (0)