-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathbenchmarks.py
More file actions
114 lines (94 loc) · 3.32 KB
/
benchmarks.py
File metadata and controls
114 lines (94 loc) · 3.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import argparse
import json
import logging
import os
import pdb
import sys
import time
import requests
from flask import Flask, abort, jsonify, make_response, request
from flask_cors import CORS
from memory_profiler import memory_usage, profile
from config import SetupParameters
from transner import Transner
app = Flask(__name__)
cors = CORS(app)
model_dict = {'NERmodel': None}
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
_USE_CUDA=True
_N_GPUS = 3
os.environ["CUDA_VISIBLE_DEVICES"]=','.join([str(i) for i in range(_N_GPUS)])
_USE_QUANTIZE=False
_BATCH_SIZE=8
@app.route('/transner/v0.7/benchmarks', methods=['POST'])
def benchmarks():
"""This API returns the extracted entities for each sentence.
Returns:
[list] -- list of dictionaries containing, for each input string, the list of entities with type, value and offset.
e.g.
{'sentence': 'Mr. Robinson lives in Reeds.'
'entities':[
{
'offset': 0,
'type': PER,
'value': Mr. Robinson
},
{
'offset': 22,
'type': LOC,
'value': Reeds
}
]}
"""
device = 'GPU' if _USE_CUDA else 'CPU'
test_string = request.get_json()['strings'][0]
sentence_len = len(test_string.split())
test_array = [test_string for _ in range(_BATCH_SIZE)]
print('device={}; batch={}; sentence_len={}'.format(device, _BATCH_SIZE, sentence_len))
exec_times = []
mem_usages = []
for iteration in range(10):
print('------- RUN #{} -------'.format(iteration+1))
start = time.time()
curr_mem_usage = memory_usage((do_ner, (test_array,)))
end = time.time()
mem_usages.append(max(curr_mem_usage))
exec_times.append(end-start)
max_mem = max(mem_usages)
min_mem = min(mem_usages)
avg_mem = sum(mem_usages)/len(mem_usages)
avg_ms = sum(exec_times) / len(exec_times)
m_count = (avg_ms/60) % 60
s_count = avg_ms % 60
ms_count = avg_ms - int(avg_ms)
mem_str='max_mem={} MiB; min_mem={} MiB; avg_mem={} MiB'.format(round(max_mem), round(min_mem), round(avg_mem))
time_str='execution time: {}m:{}s:{}ms'.format(round(m_count), round(s_count), int(ms_count*1000))
print(mem_str)
print(time_str)
print('---------------------')
return jsonify('{} {}'.format(mem_str, time_str)), 200
@profile
def do_ner(raw_input_strings):
# select the model and run NER
model = model_dict['NERmodel']
# reset preprocesser internal structures
model.reset_preprocesser()
ner_dict = model.ner(raw_input_strings, apply_regex=True, apply_gazetteers=True)
return ner_dict
@app.errorhandler(404)
def not_found(error):
return make_response(jsonify({'error': 'Not found'}), 404)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument(
'--port',
type=int,
required=True,
help='Port to allocate for the rest api'
)
args = parser.parse_args()
model_dict['NERmodel'] = Transner(pretrained_model=SetupParameters['PRETRAINED_MODEL'],
quantization=_USE_QUANTIZE,
use_cuda=_USE_CUDA,
n_gpus=_N_GPUS)
app.run(host='0.0.0.0', debug=True, port=args.port)