This repository was archived by the owner on Feb 16, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 115
Expand file tree
/
Copy pathsse_evaluator.py
More file actions
114 lines (96 loc) · 3.81 KB
/
sse_evaluator.py
File metadata and controls
114 lines (96 loc) · 3.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# coding=utf-8
################################################################################
#
# Copyright (c) 2016 eBay Software Foundation.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#################################################################################
#
# @Author: Mingkuan Liu
# @Email: mingkliu@ebay.com
# @Date: 2016-07-24
#
##################################################################################
"""
Accuracy evaluator for Sequence Semantic Embedding model.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from builtins import map
from builtins import range
from builtins import object
import codecs
import numpy as np
from six.moves import xrange
import tensorflow as tf
import data_utils
import sse_model
import text_encoder
import math
class Evaluator(object):
"""
Helper class to evaluate models using an evaluation set.
Calculate top-n accuracy on the evaluation set.
evaluator = model.Evaluator(model, eval_src, srcLens, tgtIDs, tgtInputs, tgtLens, tgtID_FullLabelMap , sess, batch_size=2048)
"""
def __init__(self, model, eval_corpus, tgtIndexFile, session ):
"""
Initializes an Evaluator.
:param model: SSE model
:param eval_corpus: eval corpus contains source_tokens, source_seq_length, correct_targetIds
:param tgtIndexFile: encoded full targetSpace Index file. Format: targetID, targetSequence, targetEncodings
:param session:
:param batch_size:
"""
self.model = model
self.srcSeq_batch = [ entry[0] for entry in eval_corpus ]
self.session = session
self.targetEncodings = []
self.targetIDs = []
self.idLabelMap = {}
idx=0
for line in codecs.open(tgtIndexFile, 'r', 'utf-8').readlines():
info = line.strip().split('\t')
if len(info) != 3:
print('Error in targetIndexFile! %s' % line)
continue
tgtid, tgtseq, tgtEncoding = info[0], info[1], info[2]
self.targetIDs.append(tgtid)
self.targetEncodings.append( [ float(f) for f in tgtEncoding.strip().split(',') ] )
self.idLabelMap[tgtid] = idx
idx += 1
self.eval_Labels = [ [ self.idLabelMap[tgtid] for tgtid in entry[1] ] for entry in eval_corpus ]
self.targetEncodings = np.array( self.targetEncodings )
def eval(self, top_n=(1, 3, 10)):
"""
Obtains predictions for eval set target sequences and compares them to the
respective previous labels.
Returns an array of top-n accuracies.
"""
acc = []
self.model.set_forward_only(True)
for n in top_n:
batchSize = 600
batchacc = []
for batchId in range(math.ceil( len(self.srcSeq_batch) / batchSize )):
feed_dict = self.model.get_source_encoding_feed_dict(self.srcSeq_batch[batchId * batchSize: (batchId +1) * batchSize])
sourceEncodings = self.session.run([self.model.norm_src_seq_embedding], feed_dict=feed_dict)
sourceEncodings = np.vstack(sourceEncodings)
distances = np.dot( sourceEncodings, self.targetEncodings.T)
rankedScore, rankedIdx = data_utils.getSortedResults(distances)
batchacc.append( data_utils.computeTopK_TightVersion_accuracy(n, self.eval_Labels[batchId * batchSize: (batchId +1) * batchSize], rankedIdx))
acc.append(np.mean(batchacc))
return acc