after changing fileformat.py

prasannababuAddagiri · prasannababuAddagiri · commit 53aab4e0ddef · 2018-04-13T09:17:10.000+02:00
diff --git a/test.py b/test.py
@@ -8,7 +8,6 @@
 from wordvecspace import annoy
 from wordvecspace import disk
 
-
 def suite_test():
     suite = unittest.TestSuite()
 
@@ -19,7 +18,6 @@ def suite_test():
 
     return suite
 
-
 if __name__ == "__main__":
     doctest.testmod(fileformat)
     doctest.testmod(mem)
diff --git a/wordvecspace/annoy.py b/wordvecspace/annoy.py
@@ -8,7 +8,7 @@
 
 # export data directory path for test cases
 # export WORDVECSPACE_DATADIR=/path/to/data
-DATAFILE_ENV_VAR = os.environ.get('WORDVECSPACE_DATADIR', ' ')
+DATAFILE_ENV_VAR = os.environ.get('WORDVECSPACE_DATADIR', '')
 
 check_equal = np.testing.assert_array_almost_equal
 
@@ -97,7 +97,6 @@ def get_distances(self, row_words_or_indices, col_words_or_indices=None, raise_e
         return mat
 
     DEFAULT_K = 512
-
     def get_nearest(self, words_or_indices, k=DEFAULT_K, combination=False, raise_exc=False):
         '''
         >>> wv = WordVecSpaceAnnoy(DATAFILE_ENV_VAR)
diff --git a/wordvecspace/command.py b/wordvecspace/command.py
@@ -103,13 +103,13 @@ def define_subcommands(self, subcommands):
         super(WordVecSpaceCommand, self).define_subcommands(subcommands)
 
         convert_cmd = subcommands.add_parser('convert',
-            help='Convert data in Google\'s Word2Vec format to WordVecSpace format')
+                help='Convert data in Google\'s Word2Vec format to WordVecSpace format')
         convert_cmd.set_defaults(func=self.convert)
         convert_cmd.add_argument('input_dir',
-            help='Input directory containing Google Word2Vec format files'
-                                 ' (vocab.txt, vectors.bin)')
+                help='Input directory containing Google Word2Vec format files'
+                    '(vocab.txt, vectors.bin)')
         convert_cmd.add_argument('output_dir',
-            help='Output directory where WordVecSpace format files are produced')
+                help='Output directory where WordVecSpace format files are produced')
 
         interact_cmd = subcommands.add_parser('interact',
                 help='WordVecSpace Console')
@@ -146,10 +146,8 @@ def define_subcommands(self, subcommands):
                         Eg: --eargs n_trees=1:index_fpath=/tmp\
                         (This is considered only when the type is annoy)')
 
-
 def main():
     WordVecSpaceCommand().start()
 
-
 if __name__ == '__main__':
     main()
diff --git a/wordvecspace/convert.py b/wordvecspace/convert.py
@@ -56,7 +56,6 @@ def write(self, token, occur, vec):
     def close(self):
         self.out.close()
 
-
 class GW2VectoWordVecSpaceFile(object):
     '''
     Abstraction that helps in converting word vector space data
diff --git a/wordvecspace/disk.py b/wordvecspace/disk.py
@@ -13,7 +13,7 @@
 
 # export data directory path for test cases
 # export WORDVECSPACE_DATADIR=/path/to/data
-DATAFILE_ENV_VAR = os.environ.get('WORDVECSPACE_DATADIR', ' ')
+DATAFILE_ENV_VAR = os.environ.get('WORDVECSPACE_DATADIR', '')
 
 class WordVecSpaceDisk(WordVecSpace):
     METRIC = 'angular'
@@ -277,7 +277,6 @@ def get_distances(self, row_words_or_indices, col_words_or_indices=None, metric=
             return distance.cdist(row_vectors, col_vectors, 'euclidean')
 
     DEFAULT_K = 512
-
     def get_nearest(self, words_or_indices, k=DEFAULT_K, combination=False, metric=None, raise_exc=False):
         '''
         >>> wv = WordVecSpaceDisk(DATAFILE_ENV_VAR)
@@ -301,5 +300,4 @@ def get_nearest(self, words_or_indices, k=DEFAULT_K, combination=False, metric=N
         if combination:
             return list(set(ner[0]).intersection(*ner))
 
-        else:
-            return ner if isinstance(words_or_indices, (list, tuple)) else ner[0]
+        return ner if isinstance(words_or_indices, (list, tuple)) else ner[0]
diff --git a/wordvecspace/exception.py b/wordvecspace/exception.py
@@ -1,23 +1,20 @@
 class BaseException(Exception):
     pass
 
-
 class UnknownWord(BaseException):
     def __init__(self, word):
         self.word = word
 
     def __str__(self):
         return '"%s"' % self.word
 
-
 class UnknownIndex(BaseException):
     def __init__(self, index):
         self.index = index
 
     def __int__(self):
         return '"%s"' % self.index
 
-
 class UnknownType(BaseException):
     def __init__(self, _type):
         self._type = _type
diff --git a/wordvecspace/fileformat.py b/wordvecspace/fileformat.py
@@ -14,9 +14,8 @@
 # $export WORDVECSPACE_DATADIR=/path/to/data/
 DATAFILE_ENV_VAR = os.environ.get('WORDVECSPACE_DATADIR', '')
 
-
 class WordVecSpaceFile(object):
-    DEFAULT_MODE = 'r+'
+    DEFAULT_MODE = 'w'
     VECTOR = 1 << 0
     WORD = 1 << 1
     OCCURRENCE = 1 << 2
diff --git a/wordvecspace/mem.py b/wordvecspace/mem.py
@@ -15,8 +15,7 @@
 
 # export data directory path for test cases
 # $export WORDVECSPACE_DATADIR=/path/to/data/
-DATAFILE_ENV_VAR = os.environ.get('WORDVECSPACE_DATADIR', ' ')
-
+DATAFILE_ENV_VAR = os.environ.get('WORDVECSPACE_DATADIR', '')
 
 @guvectorize(['void(float32[:], float32[:])'], '(n) -> ()', nopython=True, target='parallel')
 def normalize_vectors(vec, m):
@@ -381,5 +380,4 @@ def get_nearest(self, words_or_indices, k=DEFAULT_K, combination=False, metric=N
         if combination:
             return list(set(ner[0]).intersection(*ner))
 
-        else:
-            return ner if isinstance(words_or_indices, (list, tuple)) else ner[0]
+        return ner if isinstance(words_or_indices, (list, tuple)) else ner[0]
diff --git a/wordvecspace/server.py b/wordvecspace/server.py
@@ -143,7 +143,8 @@ def get_word_vectors(self, words_or_indices: Union[list, tuple], normalized: boo
 
         return self.wv.get_word_vectors(words_or_indices, normalized=normalized, raise_exc=raise_exc).tolist()
 
-    def get_distance(self, word_or_index1: Union[str, int], word_or_index2: Union[str, int], metric: Union[str, None]=None, raise_exc: bool=False) -> float:
+    def get_distance(self, word_or_index1: Union[str, int], word_or_index2: Union[str, int], metric: Union[str, None]=None, \
+                    raise_exc: bool=False) -> float:
         '''
         Get cosine distance between two words
 
@@ -156,7 +157,8 @@ def get_distance(self, word_or_index1: Union[str, int], word_or_index2: Union[st
 
         return self.wv.get_distance(word_or_index1, word_or_index2, raise_exc=raise_exc)
 
-    def get_distances(self, row_words_or_indices: Union[str, int, tuple, list], col_words_or_indices: Union[list, None]=None, metric: Union[str, None]=None, raise_exc: bool=False) -> list:
+    def get_distances(self, row_words_or_indices: Union[str, int, tuple, list], col_words_or_indices: Union[list, None]=None, \
+                    metric: Union[str, None]=None, raise_exc: bool=False) -> list:
         '''
         Get distances between given words and all words in the vector space
 
@@ -177,16 +179,16 @@ def get_distances(self, row_words_or_indices: Union[str, int, tuple, list], col_
 
         return self.wv.get_distances(row_words_or_indices, col_words_or_indices=col_words_or_indices, raise_exc=raise_exc).tolist()
 
-    def get_nearest(self, words_or_indices: Union[str, int, list, tuple], k: int=512, metric: Union[str, None]=None, combination: bool=False, raise_exc: bool=False) -> list:
+    def get_nearest(self, words_or_indices: Union[str, int, list, tuple], k: int=512, metric: Union[str, None]=None, \
+                    combination: bool=False, raise_exc: bool=False) -> list:
         '''
         get_nearest_neighbors("india", 20) => [509, 3389, 486, 523, 7125, 16619, 4491, 12191, 6866, 8776, 15232, 14208, 5998, 21916, 5226, 6322, 4343, 6212, 10172, 6186]
         get_nearest(["ram", "india"], 5, metric='euclidean') => [[3844, 16727, 15811, 42731, 41516], [509, 3389, 486, 523, 7125]]
         get_nearest(['india', 'bosnia'], 10, combination=True) => [523, 509, 486]
         '''
 
         if self._type == 'mem' or 'disk':
-            neg = self.wv.get_nearest(
-                words_or_indices, k, raise_exc=raise_exc, metric=metric)
+            neg = self.wv.get_nearest(words_or_indices, k, raise_exc=raise_exc, metric=metric)
 
             if isinstance(words_or_indices, (tuple, list)) and len(words_or_indices) > 1:
                 for neg_key, item in enumerate(neg):