Skip to content

Commit 53aab4e

Browse files
after changing fileformat.py
1 parent d2d0908 commit 53aab4e

9 files changed

Lines changed: 17 additions & 29 deletions

File tree

test.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from wordvecspace import annoy
99
from wordvecspace import disk
1010

11-
1211
def suite_test():
1312
suite = unittest.TestSuite()
1413

@@ -19,7 +18,6 @@ def suite_test():
1918

2019
return suite
2120

22-
2321
if __name__ == "__main__":
2422
doctest.testmod(fileformat)
2523
doctest.testmod(mem)

wordvecspace/annoy.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
# export data directory path for test cases
1010
# export WORDVECSPACE_DATADIR=/path/to/data
11-
DATAFILE_ENV_VAR = os.environ.get('WORDVECSPACE_DATADIR', ' ')
11+
DATAFILE_ENV_VAR = os.environ.get('WORDVECSPACE_DATADIR', '')
1212

1313
check_equal = np.testing.assert_array_almost_equal
1414

@@ -97,7 +97,6 @@ def get_distances(self, row_words_or_indices, col_words_or_indices=None, raise_e
9797
return mat
9898

9999
DEFAULT_K = 512
100-
101100
def get_nearest(self, words_or_indices, k=DEFAULT_K, combination=False, raise_exc=False):
102101
'''
103102
>>> wv = WordVecSpaceAnnoy(DATAFILE_ENV_VAR)

wordvecspace/command.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -103,13 +103,13 @@ def define_subcommands(self, subcommands):
103103
super(WordVecSpaceCommand, self).define_subcommands(subcommands)
104104

105105
convert_cmd = subcommands.add_parser('convert',
106-
help='Convert data in Google\'s Word2Vec format to WordVecSpace format')
106+
help='Convert data in Google\'s Word2Vec format to WordVecSpace format')
107107
convert_cmd.set_defaults(func=self.convert)
108108
convert_cmd.add_argument('input_dir',
109-
help='Input directory containing Google Word2Vec format files'
110-
' (vocab.txt, vectors.bin)')
109+
help='Input directory containing Google Word2Vec format files'
110+
'(vocab.txt, vectors.bin)')
111111
convert_cmd.add_argument('output_dir',
112-
help='Output directory where WordVecSpace format files are produced')
112+
help='Output directory where WordVecSpace format files are produced')
113113

114114
interact_cmd = subcommands.add_parser('interact',
115115
help='WordVecSpace Console')
@@ -146,10 +146,8 @@ def define_subcommands(self, subcommands):
146146
Eg: --eargs n_trees=1:index_fpath=/tmp\
147147
(This is considered only when the type is annoy)')
148148

149-
150149
def main():
151150
WordVecSpaceCommand().start()
152151

153-
154152
if __name__ == '__main__':
155153
main()

wordvecspace/convert.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ def write(self, token, occur, vec):
5656
def close(self):
5757
self.out.close()
5858

59-
6059
class GW2VectoWordVecSpaceFile(object):
6160
'''
6261
Abstraction that helps in converting word vector space data

wordvecspace/disk.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
# export data directory path for test cases
1515
# export WORDVECSPACE_DATADIR=/path/to/data
16-
DATAFILE_ENV_VAR = os.environ.get('WORDVECSPACE_DATADIR', ' ')
16+
DATAFILE_ENV_VAR = os.environ.get('WORDVECSPACE_DATADIR', '')
1717

1818
class WordVecSpaceDisk(WordVecSpace):
1919
METRIC = 'angular'
@@ -277,7 +277,6 @@ def get_distances(self, row_words_or_indices, col_words_or_indices=None, metric=
277277
return distance.cdist(row_vectors, col_vectors, 'euclidean')
278278

279279
DEFAULT_K = 512
280-
281280
def get_nearest(self, words_or_indices, k=DEFAULT_K, combination=False, metric=None, raise_exc=False):
282281
'''
283282
>>> wv = WordVecSpaceDisk(DATAFILE_ENV_VAR)
@@ -301,5 +300,4 @@ def get_nearest(self, words_or_indices, k=DEFAULT_K, combination=False, metric=N
301300
if combination:
302301
return list(set(ner[0]).intersection(*ner))
303302

304-
else:
305-
return ner if isinstance(words_or_indices, (list, tuple)) else ner[0]
303+
return ner if isinstance(words_or_indices, (list, tuple)) else ner[0]

wordvecspace/exception.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,20 @@
11
class BaseException(Exception):
22
pass
33

4-
54
class UnknownWord(BaseException):
65
def __init__(self, word):
76
self.word = word
87

98
def __str__(self):
109
return '"%s"' % self.word
1110

12-
1311
class UnknownIndex(BaseException):
1412
def __init__(self, index):
1513
self.index = index
1614

1715
def __int__(self):
1816
return '"%s"' % self.index
1917

20-
2118
class UnknownType(BaseException):
2219
def __init__(self, _type):
2320
self._type = _type

wordvecspace/fileformat.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,8 @@
1414
# $export WORDVECSPACE_DATADIR=/path/to/data/
1515
DATAFILE_ENV_VAR = os.environ.get('WORDVECSPACE_DATADIR', '')
1616

17-
1817
class WordVecSpaceFile(object):
19-
DEFAULT_MODE = 'r+'
18+
DEFAULT_MODE = 'w'
2019
VECTOR = 1 << 0
2120
WORD = 1 << 1
2221
OCCURRENCE = 1 << 2

wordvecspace/mem.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,7 @@
1515

1616
# export data directory path for test cases
1717
# $export WORDVECSPACE_DATADIR=/path/to/data/
18-
DATAFILE_ENV_VAR = os.environ.get('WORDVECSPACE_DATADIR', ' ')
19-
18+
DATAFILE_ENV_VAR = os.environ.get('WORDVECSPACE_DATADIR', '')
2019

2120
@guvectorize(['void(float32[:], float32[:])'], '(n) -> ()', nopython=True, target='parallel')
2221
def normalize_vectors(vec, m):
@@ -381,5 +380,4 @@ def get_nearest(self, words_or_indices, k=DEFAULT_K, combination=False, metric=N
381380
if combination:
382381
return list(set(ner[0]).intersection(*ner))
383382

384-
else:
385-
return ner if isinstance(words_or_indices, (list, tuple)) else ner[0]
383+
return ner if isinstance(words_or_indices, (list, tuple)) else ner[0]

wordvecspace/server.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,8 @@ def get_word_vectors(self, words_or_indices: Union[list, tuple], normalized: boo
143143

144144
return self.wv.get_word_vectors(words_or_indices, normalized=normalized, raise_exc=raise_exc).tolist()
145145

146-
def get_distance(self, word_or_index1: Union[str, int], word_or_index2: Union[str, int], metric: Union[str, None]=None, raise_exc: bool=False) -> float:
146+
def get_distance(self, word_or_index1: Union[str, int], word_or_index2: Union[str, int], metric: Union[str, None]=None, \
147+
raise_exc: bool=False) -> float:
147148
'''
148149
Get cosine distance between two words
149150
@@ -156,7 +157,8 @@ def get_distance(self, word_or_index1: Union[str, int], word_or_index2: Union[st
156157

157158
return self.wv.get_distance(word_or_index1, word_or_index2, raise_exc=raise_exc)
158159

159-
def get_distances(self, row_words_or_indices: Union[str, int, tuple, list], col_words_or_indices: Union[list, None]=None, metric: Union[str, None]=None, raise_exc: bool=False) -> list:
160+
def get_distances(self, row_words_or_indices: Union[str, int, tuple, list], col_words_or_indices: Union[list, None]=None, \
161+
metric: Union[str, None]=None, raise_exc: bool=False) -> list:
160162
'''
161163
Get distances between given words and all words in the vector space
162164
@@ -177,16 +179,16 @@ def get_distances(self, row_words_or_indices: Union[str, int, tuple, list], col_
177179

178180
return self.wv.get_distances(row_words_or_indices, col_words_or_indices=col_words_or_indices, raise_exc=raise_exc).tolist()
179181

180-
def get_nearest(self, words_or_indices: Union[str, int, list, tuple], k: int=512, metric: Union[str, None]=None, combination: bool=False, raise_exc: bool=False) -> list:
182+
def get_nearest(self, words_or_indices: Union[str, int, list, tuple], k: int=512, metric: Union[str, None]=None, \
183+
combination: bool=False, raise_exc: bool=False) -> list:
181184
'''
182185
get_nearest_neighbors("india", 20) => [509, 3389, 486, 523, 7125, 16619, 4491, 12191, 6866, 8776, 15232, 14208, 5998, 21916, 5226, 6322, 4343, 6212, 10172, 6186]
183186
get_nearest(["ram", "india"], 5, metric='euclidean') => [[3844, 16727, 15811, 42731, 41516], [509, 3389, 486, 523, 7125]]
184187
get_nearest(['india', 'bosnia'], 10, combination=True) => [523, 509, 486]
185188
'''
186189

187190
if self._type == 'mem' or 'disk':
188-
neg = self.wv.get_nearest(
189-
words_or_indices, k, raise_exc=raise_exc, metric=metric)
191+
neg = self.wv.get_nearest(words_or_indices, k, raise_exc=raise_exc, metric=metric)
190192

191193
if isinstance(words_or_indices, (tuple, list)) and len(words_or_indices) > 1:
192194
for neg_key, item in enumerate(neg):

0 commit comments

Comments
 (0)