Skip to content

Commit 88a8fdf

Browse files
author
Ram Idavalapati
authored
Merge pull request #26 from RamanjaneyuluIdavalapati/master
wordvecspace Service
2 parents 5ac290c + e68648b commit 88a8fdf

8 files changed

Lines changed: 317 additions & 87 deletions

File tree

.travis.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
language: python
22
python:
3-
- '2.7'
3+
- '3.5'
44
before_install:
55
- wget 'https://s3.amazonaws.com/deepcompute-public/data/wordvecspace/small_test_data.tgz'
66
&& tar xvzf small_test_data.tgz
77
- export WORDVECSPACE_DATADIR='./small_test_data'
88
install:
9-
- pip install .
9+
- pip install .[service]
1010
- sudo apt install libopenblas-base
1111
script:
12-
- python setup.py test
12+
- python3 setup.py test
1313
deploy:
1414
- provider: releases
1515
skip_cleanup: true
@@ -24,8 +24,8 @@ deploy:
2424
- wordvecspace/command.py
2525
- wordvecspace/convert.py
2626
- wordvecspace/cuda.py
27-
name: wordvecspace-0.4.2
28-
tag_name: 0.4.2
27+
name: wordvecspace-0.5
28+
tag_name: 0.5
2929
on:
3030
repo: deep-compute/wordvecspace
3131
# pypitest

README.md

Lines changed: 69 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@
33
A high performance pure python module that helps in loading and performing operations on word vector spaces created using Google's Word2vec tool.
44

55
## Installation
6-
> Prerequisites: Python2.7
6+
> Prerequisites: Python3.5
77
88
```bash
99
$ sudo apt install libopenblas-base
10-
$ sudo pip install wordvecspace
10+
$ sudo pip3 install wordvecspace
1111
```
1212

1313
## Usage
@@ -62,25 +62,6 @@ $ wordvecspace convert <input_dir> <output_dir>
6262

6363
# You can also generate shards by specifying number of vectors per each shard
6464
$ wordvecspace convert <input_dir> <output_dir> -n 5000
65-
```
66-
### Interactive console
67-
```bash
68-
$ wordvecspace interact <input_dir>
69-
70-
# <input_dir> is the directory which has vocab.txt and vectors.npy
71-
```
72-
Example:
73-
```bash
74-
$ wordvecspace interact /home/user/data
75-
76-
Total number of vectors and dimensions in .npy file (71291, 5)
77-
78-
>>> help
79-
['DEFAULT_K', 'VECTOR_FNAME', 'VOCAB_FNAME', '__class__', '__delattr__', '__dict__', '__doc__', '__format__', '__getattribute__', '__hash__', '__init__', '__module__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_load_vocab', '_make_array', '_perform_dot', '_perform_sgemm', '_perform_sgemv', 'data_dir', 'does_word_exist', 'get_distance', 'get_distances', 'get_nearest_neighbors', 'get_vector_magnitudes', 'get_word_at_index', 'get_word_index', 'get_word_occurrences', 'get_word_vector', 'get_word_vectors', 'load', 'magnitudes', 'num_dimensions', 'num_vectors', 'vectors', 'word_indices', 'word_occurrences', 'words']
80-
81-
WordVecSpace console
82-
>>> wv = WordVecSpace
83-
8465
```
8566
### Importing
8667
```python
@@ -256,6 +237,73 @@ Int64Index([ 509, 486, 14208, 20639, 8573, 3389, 5226, 20919, 10172,
256237
dtype='int64')
257238
```
258239

240+
### Service
241+
242+
```bash
243+
# Run wordvecspace as a service (which continuously listens on some port for API requests)
244+
$ wordvecspace runserver <input_dir> -p <port_no>
245+
246+
# -p is for giving port. If it is not mentioned, by default wordvecspace will run on 8900 port.
247+
# <port_no> is the port number of wordvecspace
248+
# <input_dir> is the directory which has vocab.txt and vectors.npy.
249+
```
250+
251+
Example:
252+
253+
```bash
254+
$ wordvecspace runserver /home/user/data -p 8000
255+
256+
# Make API request
257+
$ curl "http://localhost:8000/api/v1/does_word_exist?word=india"
258+
{"result": true, "success": true}
259+
```
260+
261+
#### Making call to all API methods
262+
263+
```bash
264+
$ http://localhost:8000/api/v1/does_word_exist?word=india
265+
266+
$ http://localhost:8000/api/v1/get_word_index?word=india
267+
268+
$ http://localhost:8000/api/v1/get_word_at_index?index=509
269+
270+
$ http://localhost:8000/api/v1/get_word_vector?word_or_index=509
271+
272+
$ http://localhost:8000/api/v1/get_vector_magnitudes?words_or_indices=[88, "india"]
273+
274+
$ http://localhost:8000/api/v1/get_word_occurrences?word_or_index=india
275+
276+
$ http://localhost:8000/api/v1/get_word_vectors?words_or_indices=[1, 'india']
277+
278+
$ http://localhost:8000/api/v1/get_distance?word1=ap&word2=india
279+
280+
$ http://localhost:8000/api/v1/get_distances?row_words=india
281+
282+
$ http://localhost:8000/api/v1/get_nearest_neighbors?word=india&k=100
283+
```
284+
285+
> To see all API methods of wordvecspace please run http://localhost:8000/api/v1/apidoc
286+
287+
### Interactive console
288+
```bash
289+
$ wordvecspace interact <input_dir>
290+
291+
# <input_dir> is the directory which has vocab.txt and vectors.npy
292+
```
293+
294+
Example:
295+
```bash
296+
$ wordvecspace interact /home/user/data
297+
298+
Total number of vectors and dimensions in .npy file (71291, 5)
299+
300+
>>> help
301+
['DEFAULT_K', 'VECTOR_FNAME', 'VOCAB_FNAME', '__class__', '__delattr__', '__dict__', '__doc__', '__format__', '__getattribute__', '__hash__', '__init__', '__module__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_load_vocab', '_make_array', '_perform_dot', '_perform_sgemm', '_perform_sgemv', 'data_dir', 'does_word_exist', 'get_distance', 'get_distances', 'get_nearest_neighbors', 'get_vector_magnitudes', 'get_word_at_index', 'get_word_index', 'get_word_occurrences', 'get_word_vector', 'get_word_vectors', 'load', 'magnitudes', 'num_dimensions', 'num_vectors', 'vectors', 'word_indices', 'word_occurrences', 'words']
302+
303+
WordVecSpace console
304+
>>> wv = WordVecSpace
305+
306+
```
259307
## Running tests
260308

261309
```bash

setup.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from setuptools import setup, find_packages
22

3-
version = '0.4.2'
3+
version = '0.5'
44
setup(
55
name="wordvecspace",
66
version=version,
@@ -19,21 +19,22 @@
1919
'numpy==1.13.1',
2020
'pandas==0.20.3',
2121
'numba==0.36.2',
22-
'basescript'
22+
'basescript==0.2.0',
2323
],
2424
extras_require={
2525
'cuda': ['pycuda==2017.1.1', 'scikit-cuda==0.5.1'],
26+
'service': ['kwikapi[tornado]==0.2']
2627
},
2728
package_dir={'wordvecspace': 'wordvecspace'},
2829
packages=find_packages('.'),
2930
include_package_data=True,
3031
classifiers=[
31-
"Programming Language :: Python :: 2.7",
32+
"Programming Language :: Python :: 3.5",
3233
"Environment :: Console",
3334
"Intended Audience :: Developers",
3435
"License :: OSI Approved :: MIT License",
3536
],
36-
test_suite='test.suite',
37+
test_suite='test.suitefn',
3738
entry_points={
3839
"console_scripts": [
3940
"wordvecspace = wordvecspace:main",

test.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,18 @@
11
#!/usr/bin/env python
22

33
import doctest
4+
import unittest
45

5-
from wordvecspace import wordvecspace
6+
from wordvecspace import wordvecspace, server
67

7-
suite = doctest.DocTestSuite(wordvecspace)
8+
def suitefn():
9+
suite = unittest.TestSuite()
10+
11+
suite.addTests(doctest.DocTestSuite(wordvecspace))
12+
suite.addTests(doctest.DocTestSuite(server))
13+
14+
return suite
815

916
if __name__ == "__main__":
1017
doctest.testmod(wordvecspace)
18+
doctest.testmod(server)

wordvecspace/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
from wordvecspace import WordVecSpace
2-
from command import main
1+
from .wordvecspace import WordVecSpace
2+
from .command import main

wordvecspace/command.py

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,45 @@
11
import code
22

33
from basescript import BaseScript
4-
from convert import GWVec2WordVecSpace
5-
from wordvecspace import WordVecSpace
4+
5+
from .convert import GWVec2WordVecSpace
6+
from .wordvecspace import WordVecSpace
7+
from .server import WordVecSpaceServer
68

79
class WordVecSpaceCommand(BaseScript):
8-
DESC = 'Word Vector Space command-line tool'
10+
DESC = 'Word Vector Space command-line tool and Service'
11+
12+
DEFAULT_PORT = 8900
913

1014
def convert(self):
15+
#FIXME: track issue to send logger
1116
convertor = GWVec2WordVecSpace(
1217
self.args.input_dir,
1318
self.args.output_dir,
1419
self.args.num_vecs_per_shard)
1520
convertor.start()
1621

22+
# FIXME: track issue to remove sharding or not
1723
DEFAULT_NUM_VECS_PER_SHARD = 0
1824

1925
def interact(self):
20-
interact = WordVecSpace(self.args.input_dir)
26+
interact = WordVecSpace(self.args.input_dir)
2127
interact.load()
2228

23-
vectors = interact.num_vectors
24-
dimensions = interact.num_dimensions
29+
vectors = interact.num_vectors
30+
dimensions = interact.num_dimensions
31+
32+
print("Total number of vectors and dimensions in .npy file (%s, %s)" %(vectors, dimensions))
33+
print("")
34+
print("help")
35+
print("%s" %(dir(interact)))
2536

26-
print "Total number of vectors and dimensions in .npy file (%s, %s)" %(vectors, dimensions)
27-
print ""
28-
print "help"
29-
print "%s" %(dir(interact))
37+
namespace=dict(WordVecSpace=interact)
38+
code.interact("WordVecSpace Console", local=namespace)
3039

31-
namespace=dict(WordVecSpace=interact)
32-
code.interact("WordVecSpace Console", local=namespace)
40+
def runserver(self):
41+
server = WordVecSpaceServer(self.args.input_dir, self.args.port, log=self.log)
42+
server.start()
3343

3444
def define_subcommands(self, subcommands):
3545
super(WordVecSpaceCommand, self).define_subcommands(subcommands)
@@ -53,6 +63,16 @@ def define_subcommands(self, subcommands):
5363
help='Input directory containing WordVecSpace format files'
5464
' (vocab.txt, vectors.npy)')
5565

66+
runserver_cmd = subcommands.add_parser('runserver',
67+
help='WordVecSpace Service')
68+
runserver_cmd.set_defaults(func=self.runserver)
69+
runserver_cmd.add_argument('input_dir',
70+
help='Input directory containing WordVecSpace format files'
71+
' (vocab.txt, vectors.npy)')
72+
runserver_cmd.add_argument('-p', '--port',
73+
default=self.DEFAULT_PORT, type=int,
74+
help='Port for wordvecspace service to run.')
75+
5676
def main():
5777
WordVecSpaceCommand().start()
5878

0 commit comments

Comments
 (0)