Skip to content

Commit ae7ba86

Browse files
authored
Fix .rank() method for multiple models (#615)
The new Recommender.rank() function adds k as required value, which breaks some models that do not use k in ranking evaluation (e.g., ComparER, EFM, LRPPM). This commit updates .rank() for mentioned models with topK option.
1 parent cbdc8f2 commit ae7ba86

4 files changed

Lines changed: 94 additions & 50 deletions

File tree

cornac/models/comparer/recom_comparer_obj.pyx

Lines changed: 31 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -663,39 +663,51 @@ class ComparERObj(Recommender):
663663
item_score = self.U2[item_id, :].dot(self.U1[user_id, :]) + self.H2[item_id, :].dot(self.H1[user_id, :])
664664
return item_score
665665

666-
def rank(self, user_id, item_ids=None):
666+
def rank(self, user_idx, item_indices=None, k=-1):
667667
"""Rank all test items for a given user.
668668
669669
Parameters
670670
----------
671-
user_id: int, required
671+
user_idx: int, required
672672
The index of the user for whom to perform item raking.
673673
674-
item_ids: 1d array, optional, default: None
674+
item_indices: 1d array, optional, default: None
675675
A list of candidate item indices to be ranked by the user.
676676
If `None`, list of ranked known item indices and their scores will be returned
677677
678+
k: int, required
679+
Cut-off length for recommendations, k=-1 will return ranked list of all items.
680+
This is more important for ANN to know the limit to avoid exhaustive ranking.
681+
678682
Returns
679683
-------
680-
Tuple of `item_rank`, and `item_scores`. The order of values
681-
in item_scores are corresponding to the order of their ids in item_ids
684+
(ranked_items, item_scores): tuple
685+
`ranked_items` contains item indices being ranked by their scores.
686+
`item_scores` contains scores of items corresponding to index in `item_indices` input.
682687
683688
"""
684-
X_ = self.U1[user_id, :].dot(self.V.T)
689+
X_ = self.U1[user_idx, :].dot(self.V.T)
685690
most_cared_aspects_indices = (-X_).argsort()[:self.num_most_cared_aspects]
686691
most_cared_X_ = X_[most_cared_aspects_indices]
687692
most_cared_Y_ = self.U2.dot(self.V[most_cared_aspects_indices, :].T)
688693
explicit_scores = most_cared_X_.dot(most_cared_Y_.T) / (self.num_most_cared_aspects * self.rating_scale)
689-
item_scores = self.alpha * explicit_scores + (1 - self.alpha) * self.score(user_id)
690-
691-
if item_ids is None:
692-
item_scores = item_scores
693-
item_rank = item_scores.argsort()[::-1]
694-
else:
695-
num_items = max(self.num_items, max(item_ids) + 1)
696-
item_scores = np.ones(num_items) * np.min(item_scores)
697-
item_scores[:self.num_items] = item_scores
698-
item_rank = item_scores.argsort()[::-1]
699-
item_rank = intersects(item_rank, item_ids, assume_unique=True)
700-
item_scores = item_scores[item_ids]
701-
return item_rank, item_scores
694+
all_item_scores = self.alpha * explicit_scores + (1 - self.alpha) * self.score(user_idx)
695+
696+
# rank items based on their scores
697+
item_indices = (
698+
np.arange(self.num_items)
699+
if item_indices is None
700+
else np.asarray(item_indices)
701+
)
702+
item_scores = all_item_scores[item_indices]
703+
704+
if k != -1: # O(n + k log k), faster for small k which is usually the case
705+
partitioned_idx = np.argpartition(item_scores, -k)
706+
top_k_idx = partitioned_idx[-k:]
707+
sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])]
708+
partitioned_idx[-k:] = sorted_top_k_idx
709+
ranked_items = item_indices[partitioned_idx[::-1]]
710+
else: # O(n log n)
711+
ranked_items = item_indices[item_scores.argsort()[::-1]]
712+
713+
return ranked_items, item_scores

cornac/models/comparer/recom_comparer_sub.pyx

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -759,7 +759,7 @@ class ComparERSub(MTER):
759759

760760
return correct, skipped, loss, bpr_loss
761761

762-
def rank(self, user_idx, item_indices=None):
762+
def rank(self, user_idx, item_indices=None, k=-1):
763763
if self.alpha > 0 and self.n_top_aspects > 0:
764764
n_top_aspects = min(self.n_top_aspects, self.num_aspects)
765765
ts1 = np.einsum("abc,a->bc", self.G1, self.U[user_idx])
@@ -786,12 +786,21 @@ class ComparERSub(MTER):
786786
all_item_scores[: self.num_items] = known_item_scores
787787

788788
# rank items based on their scores
789-
if item_indices is None:
790-
item_scores = all_item_scores[: self.num_items]
791-
item_rank = item_scores.argsort()[::-1]
792-
else:
793-
item_scores = all_item_scores[item_indices]
794-
item_rank = np.array(item_indices)[item_scores.argsort()[::-1]]
795-
796-
return item_rank, item_scores
797-
return super().rank(user_idx, item_indices)
789+
item_indices = (
790+
np.arange(self.num_items)
791+
if item_indices is None
792+
else np.asarray(item_indices)
793+
)
794+
item_scores = all_item_scores[item_indices]
795+
796+
if k != -1: # O(n + k log k), faster for small k which is usually the case
797+
partitioned_idx = np.argpartition(item_scores, -k)
798+
top_k_idx = partitioned_idx[-k:]
799+
sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])]
800+
partitioned_idx[-k:] = sorted_top_k_idx
801+
ranked_items = item_indices[partitioned_idx[::-1]]
802+
else: # O(n log n)
803+
ranked_items = item_indices[item_scores.argsort()[::-1]]
804+
805+
return ranked_items, item_scores
806+
return super().rank(user_idx, item_indices, k)

cornac/models/efm/recom_efm.pyx

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,7 @@ class EFM(Recommender):
468468
item_score = self.U2[item_idx, :].dot(self.U1[user_idx, :]) + self.H2[item_idx, :].dot(self.H1[user_idx, :])
469469
return item_score
470470

471-
def rank(self, user_idx, item_indices=None):
471+
def rank(self, user_idx, item_indices=None, k=-1):
472472
"""Rank all test items for a given user.
473473
474474
Parameters
@@ -480,10 +480,15 @@ class EFM(Recommender):
480480
A list of candidate item indices to be ranked by the user.
481481
If `None`, list of ranked known item indices and their scores will be returned
482482
483+
k: int, required
484+
Cut-off length for recommendations, k=-1 will return ranked list of all items.
485+
This is more important for ANN to know the limit to avoid exhaustive ranking.
486+
483487
Returns
484488
-------
485-
Tuple of `item_rank`, and `item_scores`. The order of values
486-
in item_scores are corresponding to the order of their ids in item_ids
489+
(ranked_items, item_scores): tuple
490+
`ranked_items` contains item indices being ranked by their scores.
491+
`item_scores` contains scores of items corresponding to index in `item_indices` input.
487492
488493
"""
489494
X_ = self.U1[user_idx, :].dot(self.V.T)
@@ -504,11 +509,20 @@ class EFM(Recommender):
504509
all_item_scores[: self.num_items] = known_item_scores
505510

506511
# rank items based on their scores
507-
if item_indices is None:
508-
item_scores = all_item_scores[: self.num_items]
509-
item_rank = item_scores.argsort()[::-1]
510-
else:
511-
item_scores = all_item_scores[item_indices]
512-
item_rank = np.array(item_indices)[item_scores.argsort()[::-1]]
513-
514-
return item_rank, item_scores
512+
item_indices = (
513+
np.arange(self.num_items)
514+
if item_indices is None
515+
else np.asarray(item_indices)
516+
)
517+
item_scores = all_item_scores[item_indices]
518+
519+
if k != -1: # O(n + k log k), faster for small k which is usually the case
520+
partitioned_idx = np.argpartition(item_scores, -k)
521+
top_k_idx = partitioned_idx[-k:]
522+
sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])]
523+
partitioned_idx[-k:] = sorted_top_k_idx
524+
ranked_items = item_indices[partitioned_idx[::-1]]
525+
else: # O(n log n)
526+
ranked_items = item_indices[item_scores.argsort()[::-1]]
527+
528+
return ranked_items, item_scores

cornac/models/lrppm/recom_lrppm.pyx

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -516,7 +516,7 @@ class LRPPM(Recommender):
516516
item_score = self.I[i_idx].dot(self.U[u_idx])
517517
return item_score
518518

519-
def rank(self, user_idx, item_indices=None):
519+
def rank(self, user_idx, item_indices=None, k=-1):
520520
if self.alpha > 0 and self.num_top_aspects > 0:
521521
n_items = self.num_items
522522
num_top_aspects = min(self.num_top_aspects, self.num_aspects)
@@ -540,12 +540,21 @@ class LRPPM(Recommender):
540540
all_item_scores[: self.num_items] = known_item_scores
541541

542542
# rank items based on their scores
543-
if item_indices is None:
544-
item_scores = all_item_scores[: self.num_items]
545-
item_rank = item_scores.argsort()[::-1]
546-
else:
547-
item_scores = all_item_scores[item_indices]
548-
item_rank = np.array(item_indices)[item_scores.argsort()[::-1]]
549-
550-
return item_rank, item_scores
551-
return super().rank(user_idx, item_indices)
543+
item_indices = (
544+
np.arange(self.num_items)
545+
if item_indices is None
546+
else np.asarray(item_indices)
547+
)
548+
item_scores = all_item_scores[item_indices]
549+
550+
if k != -1: # O(n + k log k), faster for small k which is usually the case
551+
partitioned_idx = np.argpartition(item_scores, -k)
552+
top_k_idx = partitioned_idx[-k:]
553+
sorted_top_k_idx = top_k_idx[np.argsort(item_scores[top_k_idx])]
554+
partitioned_idx[-k:] = sorted_top_k_idx
555+
ranked_items = item_indices[partitioned_idx[::-1]]
556+
else: # O(n log n)
557+
ranked_items = item_indices[item_scores.argsort()[::-1]]
558+
559+
return ranked_items, item_scores
560+
return super().rank(user_idx, item_indices, k)

0 commit comments

Comments
 (0)