Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ services:
language: python

python:
- 3.4
- 3.5

cache: pip

Expand All @@ -16,11 +16,12 @@ before_install:
-e transport.host=127.0.0.1
-e xpack.security.enabled=false
-e xpack.monitoring.enabled=false
docker.elastic.co/elasticsearch/elasticsearch:5.4.2
docker.elastic.co/elasticsearch/elasticsearch:7.8.0

install:
- pip install --upgrade pip codecov
- pip install codecov
- pip install --upgrade pytest
- pip install -e .[test]

script: pytest --cov=image_match && codecov
Expand Down
19 changes: 9 additions & 10 deletions image_match/elasticsearch_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,24 +53,23 @@ def search_single_record(self, rec, pre_filter=None):
rec.pop('metadata')

# build the 'should' list
should = [{'term': {word: rec[word]}} for word in rec]
should = [{'term': {'{}.{}'.format(self.doc_type, word): rec[word]}} for word in rec]
body = {
'query': {
'bool': {'should': should}
},
'_source': {'excludes': ['simple_word_*']}
'_source': {'excludes': ['{}.simple_word_*'.format(self.doc_type)]}
}

if pre_filter is not None:
body['query']['bool']['filter'] = pre_filter

res = self.es.search(index=self.index,
doc_type=self.doc_type,
body=body,
size=self.size,
timeout=self.timeout)['hits']['hits']

sigs = np.array([x['_source']['signature'] for x in res])
sigs = np.array([x['_source'][self.doc_type]['signature'] for x in res])

if sigs.size == 0:
return []
Expand All @@ -79,8 +78,8 @@ def search_single_record(self, rec, pre_filter=None):

formatted_res = [{'id': x['_id'],
'score': x['_score'],
'metadata': x['_source'].get('metadata'),
'path': x['_source'].get('url', x['_source'].get('path'))}
'metadata': x['_source'][self.doc_type].get('metadata'),
'path': x['_source'][self.doc_type].get('url', x['_source'][self.doc_type].get('path'))}
for x in res]

for i, row in enumerate(formatted_res):
Expand All @@ -91,7 +90,7 @@ def search_single_record(self, rec, pre_filter=None):

def insert_single_record(self, rec, refresh_after=False):
rec['timestamp'] = datetime.now()
self.es.index(index=self.index, doc_type=self.doc_type, body=rec, refresh=refresh_after)
self.es.index(index=self.index, body={ self.doc_type: rec }, refresh=refresh_after)

def delete_duplicates(self, path):
"""Delete all but one entries in elasticsearch whose `path` value is equivalent to that of path.
Expand All @@ -101,11 +100,11 @@ def delete_duplicates(self, path):
matching_paths = [item['_id'] for item in
self.es.search(body={'query':
{'match':
{'path': path}
{'{}.path'.format(self.doc_type): path}
}
},
index=self.index)['hits']['hits']
if item['_source']['path'] == path]
if item['_source'][self.doc_type]['path'] == path]
if len(matching_paths) > 0:
for id_tag in matching_paths[1:]:
self.es.delete(index=self.index, doc_type=self.doc_type, id=id_tag)
self.es.delete(index=self.index, id=id_tag)
5 changes: 3 additions & 2 deletions image_match/goldberg.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from io import BytesIO
import numpy as np
import xml.etree
import xml.etree.ElementTree


class CorruptImageError(RuntimeError):
Expand Down Expand Up @@ -236,14 +237,14 @@ def preprocess_image(image_or_path, bytestream=False, handle_mpo=False):
return rgb2gray(np.asarray(img, dtype=np.uint8))
elif type(image_or_path) in string_types or \
type(image_or_path) is text_type:
return imread(image_or_path, as_grey=True)
return imread(image_or_path, as_gray=True)
elif type(image_or_path) is bytes:
try:
img = Image.open(image_or_path)
arr = np.array(img.convert('RGB'))
except IOError:
# try again due to PIL weirdness
return imread(image_or_path, as_grey=True)
return imread(image_or_path, as_gray=True)
if handle_mpo:
# take the first images from the MPO
if arr.shape == (2,) and isinstance(arr[1].tolist(), MpoImageFile):
Expand Down
2 changes: 1 addition & 1 deletion image_match/signature_database_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def search_single_record(self, rec, pre_filter=None):
before applying the matching strategy

For example:
{ "term": { "metadata.category": "art" } }
{ "term": { "image.metadata.category": "art" } }

Returns:
a formatted list of dicts representing matches.
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def find_version(*file_paths):
],
install_requires=[
'scikit-image>=0.14',
'elasticsearch>=5.0.0,<6.0.0',
'elasticsearch>=7.0.0,<8.0.0',
'six>=1.11.0',
],
tests_require=tests_require,
Expand Down
34 changes: 19 additions & 15 deletions tests/test_elasticsearch_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,24 @@
INDEX_NAME = 'test_environment_{}'.format(hashlib.md5(os.urandom(128)).hexdigest()[:12])
DOC_TYPE = 'image'
MAPPINGS = {
"mappings": {
DOC_TYPE: {
"dynamic": True,
"properties": {
"metadata": {
"type": "object",
"dynamic": True,
"properties": {
"tenant_id": { "type": "keyword" }
"mappings": {
"properties": {
DOC_TYPE: {
"properties": {
"path": {
"type": "keyword"
},
"metadata": {
"properties": {
"tenant_id": {
"type": "keyword",
}
}
}
}
}
}
}
}
}
}


Expand All @@ -46,7 +50,7 @@ def setup_index(request, index_name):
try:
es.indices.create(index=index_name, body=MAPPINGS)
except RequestError as e:
if e.error == u'index_already_exists_exception':
if e.error == u'resource_already_exists_exception':
es.indices.delete(index_name)
else:
raise
Expand Down Expand Up @@ -189,15 +193,15 @@ def test_lookup_with_filter_by_metadata(ses):
)
ses.add_image('test2.jpg', metadata=metadata2, refresh_after=True)

r = ses.search_image('test1.jpg', pre_filter={"term": {"metadata.tenant_id": "foo"}})
r = ses.search_image('test1.jpg', pre_filter={"term": {'{}.metadata.tenant_id'.format(DOC_TYPE): "foo"}})
assert len(r) == 1
assert r[0]['metadata'] == metadata

r = ses.search_image('test1.jpg', pre_filter={"term": {"metadata.tenant_id": "bar-2"}})
r = ses.search_image('test1.jpg', pre_filter={"term": {'{}.metadata.tenant_id'.format(DOC_TYPE): "bar-2"}})
assert len(r) == 1
assert r[0]['metadata'] == metadata2

r = ses.search_image('test1.jpg', pre_filter={"term": {"metadata.tenant_id": "bar-3"}})
r = ses.search_image('test1.jpg', pre_filter={"term": {'{}.metadata.tenant_id'.format(DOC_TYPE): "bar-3"}})
assert len(r) == 0


Expand Down
43 changes: 18 additions & 25 deletions tests/test_elasticsearch_driver_metadata_as_nested.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,23 @@
INDEX_NAME = 'test_environment_{}'.format(hashlib.md5(os.urandom(128)).hexdigest()[:12])
DOC_TYPE = 'image'
MAPPINGS = {
"mappings": {
DOC_TYPE: {
"dynamic": True,
"properties": {
"metadata": {
"type": "nested",
"dynamic": True,
"properties": {
"tenant_id": { "type": "keyword" },
"project_id": { "type": "keyword" }
"mappings": {
"properties": {
DOC_TYPE: {
"properties": {
"path": {
"type": "keyword"
},
"metadata": {
"properties": {
"tenant_id": { "type": "keyword" },
"project_id": { "type": "keyword" }
}
}
}
}
}
}
}
}
}


Expand Down Expand Up @@ -122,16 +124,7 @@ def _metadata(tenant_id, project_id):
)

def _nested_filter(tenant_id, project_id):
return {
"nested" : {
"path" : "metadata",
"query" : {
"bool" : {
"must" : [
{"term": {"metadata.tenant_id": tenant_id}},
{"term": {"metadata.project_id": project_id}}
]
}
}
}
}
return [
{"term": {"image.metadata.tenant_id": tenant_id}},
{"term": {"image.metadata.project_id": project_id}}
]
2 changes: 1 addition & 1 deletion tests/test_goldberg.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,4 @@ def test_difference():
sig1 = gis.generate_signature('test.jpg')
sig2 = gis.generate_signature(test_diff_img_url)
dist = gis.normalized_distance(sig1, sig2)
assert dist == 0.42263283502672722
assert dist == 0.424549547059671