@@ -117,7 +117,7 @@ def insert_single_record(self, rec):
117117 raise NotImplementedError
118118
119119 def __init__ (self , k = 16 , N = 63 , n_grid = 9 ,
120- crop_percentile = (5 , 95 ), distance_cutoff = 0.45 ,
120+ crop_percentile = (5 , 95 ), distance_cutoff = 0.45 , score_cutoff = 9.0 ,
121121 * signature_args , ** signature_kwargs ):
122122 """Set up storage scheme for images
123123
@@ -159,6 +159,8 @@ def __init__(self, k=16, N=63, n_grid=9,
159159 considering how much variance to keep in the image (default (5, 95))
160160 distance_cutoff (Optional [float]): maximum image signature distance to
161161 be considered a match (default 0.45)
162+ score_cutoff (Optional [float]): minimum ElasticSearch relevance score to
163+ be considered a match (default 9.0)
162164 *signature_args: Variable length argument list to pass to ImageSignature
163165 **signature_kwargs: Arbitrary keyword arguments to pass to ImageSignature
164166
@@ -175,14 +177,22 @@ def __init__(self, k=16, N=63, n_grid=9,
175177 self .N = N
176178 self .n_grid = n_grid
177179
178- # Check float input
180+ # Check float input for distance cutoff
179181 if type (distance_cutoff ) is not float :
180182 raise TypeError ('distance_cutoff should be a float' )
181183 if distance_cutoff < 0. :
182184 raise ValueError ('distance_cutoff should be > 0 (got %r)' % distance_cutoff )
183185
184186 self .distance_cutoff = distance_cutoff
185187
188+ # Check float input for elasticsearch score cutoff
189+ if type (score_cutoff ) is not float :
190+ raise TypeError ('score_cutoff should be a float' )
191+ if score_cutoff < 0. :
192+ raise ValueError ('score_cutoff should be > 0 (got %r)' % score_cutoff )
193+
194+ self .score_cutoff = score_cutoff
195+
186196 self .crop_percentile = crop_percentile
187197
188198 self .gis = ImageSignature (n = n_grid , crop_percentiles = crop_percentile , * signature_args , ** signature_kwargs )
@@ -222,7 +232,7 @@ def search_image(self, path, all_orientations=False, bytestream=False, pre_filte
222232 pre_filter (Optional[dict]): filters list before applying the matching algorithm
223233 (default None)
224234 Returns:
225- a formatted list of dicts representing unique matches, sorted by dist
235+ a formatted list of dicts representing unique matches, sorted by dist or score (in case of using ElasticSearch)
226236
227237 For example, if three matches are found:
228238
@@ -238,6 +248,19 @@ def search_image(self, path, all_orientations=False, bytestream=False, pre_filte
238248 'path': u'https://c2.staticflickr.com/8/7158/6814444991_08d82de57e_z.jpg'}
239249 ]
240250
251+ Here is an ElasticSearch example:
252+
253+ [
254+ {'score': 4.0,
255+ 'id': u'AVM37oZq0osmmAxpPvx7',
256+ 'path': u'https://pixabay.com/static/uploads/photo/2012/11/28/08/56/mona-lisa-67506_960_720.jpg'},
257+ {'score': 35.0,
258+ 'id': u'AVM37nMg0osmmAxpPvx6',
259+ 'path': u'https://upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Mona_Lisa,_by_Leonardo_da_Vinci,_from_C2RMF_retouched.jpg/687px-Mona_Lisa,_by_Leonardo_da_Vinci,_from_C2RMF_retouched.jpg'},
260+ {'score': 10.0,
261+ 'id': u'AVM37p530osmmAxpPvx9',
262+ 'path': u'https://c2.staticflickr.com/8/7158/6814444991_08d82de57e_z.jpg'}
263+ ]
241264 """
242265 img = self .gis .preprocess_image (path , bytestream )
243266
@@ -277,12 +300,21 @@ def search_image(self, path, all_orientations=False, bytestream=False, pre_filte
277300
278301 ids = set ()
279302 unique = []
303+ hasScore = False
280304 for item in result :
305+ if 'score' in item :
306+ hasScore = True
307+
281308 if item ['id' ] not in ids :
282309 unique .append (item )
283310 ids .add (item ['id' ])
284311
285- r = sorted (unique , key = itemgetter ('dist' ))
312+ # If data comes from ElasticSearch - sort by score, otherwise - default to sorting by dist
313+ if hasScore :
314+ r = sorted (unique , key = itemgetter ('score' ), reverse = True )
315+ else :
316+ r = sorted (unique , key = itemgetter ('dist' ))
317+
286318 return r
287319
288320
0 commit comments