Skip to content

Commit 47170bc

Browse files
Merge branch 'dev' of https://github.com/datajoint/datajoint-python into refactor-errors
2 parents e307c7e + 74dfe41 commit 47170bc

22 files changed

+539
-34
lines changed

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,8 @@ build/
1818
./tests/.coverage
1919
./tests/dj-store/*
2020
*.log
21+
*.env
22+
LNX-docker-compose.yml
23+
notebooks/*
24+
test.dockerfile
25+
__main__.py

.travis.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ sudo: required
22
dist: xenial
33
language: python
44
env:
5-
- DJ_TEST_HOST="127.0.0.1" DJ_TEST_USER="datajoint" DJ_TEST_PASSWORD="datajoint" DJ_HOST="127.0.0.1" DJ_USER="root" DJ_PASS="" S3_ENDPOINT="127.0.0.1:9000" S3_ACCESS_KEY="datajoint" S3_SECRET_KEY="datajoint" S3_BUCKET="datajoint-test"
5+
- DJ_TEST_HOST="127.0.0.1" DJ_TEST_USER="datajoint" DJ_TEST_PASSWORD="datajoint" DJ_HOST="127.0.0.1" DJ_USER="root" DJ_PASS="" S3_ACCESS_KEY="datajoint" S3_SECRET_KEY="datajoint"
66
python:
77
- "3.4"
88
- "3.5"
@@ -16,8 +16,6 @@ before_install:
1616
- docker pull minio/minio
1717
- docker run -d -p 9000:9000 -e "MINIO_ACCESS_KEY=$S3_ACCESS_KEY" -e "MINIO_SECRET_KEY=$S3_SECRET_KEY" minio/minio server /data
1818
- sleep 120
19-
- docker pull minio/mc
20-
- docker run --network="host" --entrypoint=/bin/sh minio/mc -c "mc config host add dj-s3 http://$S3_ENDPOINT $S3_ACCESS_KEY $S3_SECRET_KEY;mc mb $S3_BUCKET;mc policy download $S3_BUCKET;exit 0;"
2119
- sudo apt-get install -y libblas-dev liblapack-dev libatlas-dev gfortran
2220
- sudo apt-get install -y graphviz graphviz-dev pkg-config
2321
- mysql --version

CHANGELOG.md

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,28 @@
11
## Release notes
22

3-
### 0.12.0 -- August 15, 2019 (planned)
4-
5-
* Configurable blob storage (#497, #532, #475)
6-
* Support file attachments: (#480, #532, #475)
7-
* Support for filepath datatype: (#481, 603)
8-
* Expand support of blob serialization (fixed #572, #520, #427, #392, #244)
9-
* Add support for UUID attributes (#562, #567)
10-
* Support of ellipsis in `proj`: `query_expression.proj(.., '-movie')` (#499)
11-
* `dj.conn()` accepts a `port` keyword argument (#563, #571)
12-
* `query_expr.fetch("KEY", as_dict=False)` returns results as `np.recarray`(#414, #574)
13-
* `dj.ERD` is now called `dj.Diagram` (#255, #565)
14-
* `dj.Diagram` underlines "distinguished" classes (#378, #557)
15-
* Bugfixes: #629, #633
3+
### 0.12.0 -- Aug 23, 2019
4+
* Support TLS/SSL connections PR620
5+
* Convert numpy array from python object to appropriate data type if all elements are of the same type (#587) PR #608
6+
* Remove expression requirement to have additional attributes (#604) PR #604
7+
* Support for filepath datatype (#481) PR #603
8+
* Avoid creating multiple copies of attachments and return a dictionary array when specifying `as_dict=True` (#592, #595) PR #593
9+
* Support of ellipsis in `proj`: `query_expression.proj(.., '-movie')` (#499) PR #578
10+
* Expand support of blob serialization (#572, #520, #427, #392, #244, #594) PR #577
11+
* Support for alter (#110) PR #573
12+
* Support for `conda install datajoint` via `conda-forge` channel (#293)
13+
* `dj.conn()` accepts a `port` keyword argument (#563) PR #571
14+
* Support for UUID datatype (#562) PR #567
15+
* `query_expr.fetch("KEY", as_dict=False)` returns results as `np.recarray`(#414) PR #574
16+
* `dj.ERD` is now called `dj.Diagram` (#255, #546) PR #565
17+
* `dj.Diagram` underlines "distinguished" classes (#378) PR #557
18+
* Accept alias for supported MySQL datatypes (#544) PR #545
19+
* Support for pandas and order by "KEY" (#459, #537, #538, #541) PR #534
20+
* Support file attachment datatype and configurable blob storage (#467, #475, #480, #497) PR #532
21+
* Increase default display rows (#523) PR #526
22+
* Bugfixes (#521, #205, #279, #570, #581, #597, #596, #618, #633)
23+
24+
### 0.11.3 -- Jul 26, 2019
25+
* Fix incompatibility with pyparsing 2.4.1 (#629) PR #631
1626

1727
### 0.11.2 -- July 25, 2019
1828
* Fix #628 - incompatibility with pyparsing 2.4.1

datajoint/errors.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,14 @@ class IntegrityError(QueryError):
6666
An integrity error triggered by foreign key constraints
6767
"""
6868

69+
6970
class MissingAttributeError(QueryError):
7071
"""
7172
An error arising when a required attribute value is not provided in INSERT
7273
"""
74+
75+
76+
class MissingExternalFile(DataJointError):
77+
"""
78+
Error raised when an external file managed by DataJoint is no longer accessible
79+
"""

datajoint/external.py

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,15 @@
22
import itertools
33
from collections import Mapping
44
from .settings import config
5-
from .errors import DataJointError
5+
from .errors import DataJointError, MissingExternalFile
66
from .hash import uuid_from_buffer, uuid_from_file
77
from .table import Table
88
from .declare import EXTERNAL_TABLE_ROOT
99
from . import s3
1010
from .utils import safe_write, safe_copy
1111

1212
CACHE_SUBFOLDING = (2, 2) # (2, 2) means "0123456789abcd" will be saved as "01/23/0123456789abcd"
13+
SUPPORT_MIGRATED_BLOBS = True # support blobs migrated from datajoint 0.11.*
1314

1415

1516
def subfold(name, folds):
@@ -124,11 +125,21 @@ def fput(self, local_filepath):
124125
def peek(self, blob_hash, bytes_to_peek=120):
125126
return self.get(blob_hash, size=bytes_to_peek)
126127

127-
def get(self, blob_hash, size=-1):
128+
def get(self, blob_hash, *, size=-1):
128129
"""
129130
get an object from external store.
130131
:param size: max number of bytes to retrieve. If size<0, retrieve entire blob
132+
:param explicit_path: if given, then use it as relative path rather than the path derived from
131133
"""
134+
135+
def read_file(filepath, size):
136+
try:
137+
with open(filepath, 'rb') as f:
138+
blob = f.read(size)
139+
except FileNotFoundError:
140+
raise MissingExternalFile('Lost access to external blob %s.' % full_path) from None
141+
return blob
142+
132143
if blob_hash is None:
133144
return None
134145

@@ -154,18 +165,33 @@ def get(self, blob_hash, size=-1):
154165
subfolders = os.path.join(*subfold(blob_hash.hex, self.spec['subfolding']))
155166
full_path = os.path.join(self.spec['location'], self.database, subfolders, blob_hash.hex)
156167
try:
157-
with open(full_path, 'rb') as f:
158-
blob = f.read(size)
159-
except FileNotFoundError:
160-
raise DataJointError('Lost access to external blob %s.' % full_path) from None
168+
blob = read_file(full_path, size)
169+
except MissingExternalFile:
170+
if not SUPPORT_MIGRATED_BLOBS:
171+
raise
172+
# migrated blobs from 0.11
173+
relative_filepath, contents_hash = (self & {'hash': blob_hash}).fetch1(
174+
'filepath', 'contents_hash')
175+
if relative_filepath is None:
176+
raise
177+
blob = read_file(os.path.join(self.spec['location'], relative_filepath))
161178
else:
162179
if size > 0:
163180
blob_size = os.path.getsize(full_path)
164181
elif self.spec['protocol'] == 's3':
165182
full_path = '/'.join(
166183
(self.database,) + subfold(blob_hash.hex, self.spec['subfolding']) + (blob_hash.hex,))
167184
if size < 0:
168-
blob = self.s3.get(full_path)
185+
try:
186+
blob = self.s3.get(full_path)
187+
except MissingExternalFile:
188+
if not SUPPORT_MIGRATED_BLOBS:
189+
raise
190+
relative_filepath, contents_hash = (self & {'hash': blob_hash}).fetch1(
191+
'filepath', 'contents_hash')
192+
if relative_filepath is None:
193+
raise
194+
blob = self.s3.get(relative_filepath)
169195
else:
170196
blob = self.s3.partial_get(full_path, 0, size)
171197
blob_size = self.s3.get_size(full_path)

datajoint/heading.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import re
55
import logging
66
from .errors import DataJointError
7-
from .declare import UUID_DATA_TYPE, CUSTOM_TYPES, TYPE_PATTERN, EXTERNAL_TYPES, SERIALIZED_TYPES
7+
from .declare import UUID_DATA_TYPE, CUSTOM_TYPES, TYPE_PATTERN, EXTERNAL_TYPES
88
from .utils import OrderedDict
99

1010

@@ -225,11 +225,13 @@ def init_from_database(self, conn, database, table_name):
225225
try:
226226
category = next(c for c in CUSTOM_TYPES if TYPE_PATTERN[c].match(attr['type']))
227227
except StopIteration:
228+
if attr['type'].startswith('external'):
229+
raise DataJointError('Legacy datatype `{type}`.'.format(**attr)) from None
228230
raise DataJointError('Unknown attribute type `{type}`'.format(**attr)) from None
229231
attr.update(
230232
is_attachment=category in ('INTERNAL_ATTACH', 'EXTERNAL_ATTACH'),
231233
is_filepath=category == 'FILEPATH',
232-
is_blob=category in ('INTERNAL_BLOB', 'EXTERNAL_BLOB'), # INTERNAL_BLOB is not a custom type but is included for completeness
234+
is_blob=category in ('INTERNAL_BLOB', 'EXTERNAL_BLOB'), # INTERNAL_BLOB is not a custom type but is included for completeness
233235
uuid=category == 'UUID',
234236
is_external=category in EXTERNAL_TYPES,
235237
store=attr['type'].split('@')[1] if category in EXTERNAL_TYPES else None)

datajoint/s3.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import warnings
77
import uuid
88
import os
9-
9+
from . import errors
1010

1111
class Folder:
1212
"""
@@ -29,7 +29,10 @@ def fput(self, relative_name, local_file, **meta):
2929
self.bucket, '/'.join((self.remote_path, relative_name)), local_file, metadata=meta or None)
3030

3131
def get(self, relative_name):
32-
return self.client.get_object(self.bucket, '/'.join((self.remote_path, relative_name))).data
32+
try:
33+
return self.client.get_object(self.bucket, '/'.join((self.remote_path, relative_name))).data
34+
except minio.error.NoSuchKey:
35+
raise errors.MissingExternalFile from None
3336

3437
def fget(self, relative_name, local_filepath):
3538
"""get file from object name to local filepath"""
@@ -48,13 +51,13 @@ def partial_get(self, relative_name, offset, size):
4851
return self.client.get_partial_object(
4952
self.bucket, '/'.join((self.remote_path, relative_name)), offset, size).data
5053
except minio.error.NoSuchKey:
51-
return None
54+
raise errors.MissingExternalFile from None
5255

5356
def get_size(self, relative_name):
5457
try:
5558
return self.client.stat_object(self.bucket, '/'.join((self.remote_path, relative_name))).size
5659
except minio.error.NoSuchKey:
57-
return None
60+
raise errors.MissingExternalFile from None
5861

5962
def list_objects(self, folder=''):
6063
return self.client.list_objects(self.bucket, '/'.join((self.remote_path, folder, '')), recursive=True)

datajoint/utils.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,3 +95,19 @@ def safe_copy(src, dest, overwrite=False):
9595
temp_file = dest + '.copying'
9696
shutil.copyfile(src, temp_file)
9797
os.rename(temp_file, dest)
98+
99+
100+
def parse_sql(filepath):
101+
DELIMITER = ';'
102+
statement = []
103+
with open(filepath, 'rt') as f:
104+
for line in f:
105+
line = line.strip()
106+
if not line.startswith('--') and len(line) > 1:
107+
if line.startswith('DELIMITER'):
108+
DELIMITER = line.split()[1]
109+
else:
110+
statement.append(line)
111+
if line.endswith(DELIMITER):
112+
yield ' '.join(statement)
113+
statement = []

tests/__init__.py

Lines changed: 84 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,15 @@
99
from os import environ, remove
1010
import datajoint as dj
1111
from distutils.version import LooseVersion
12+
import os
13+
from pathlib import Path
14+
from minio import Minio
15+
import urllib3
16+
import certifi
17+
import shutil
18+
from datajoint.utils import parse_sql
1219

13-
__author__ = 'Edgar Walker, Fabian Sinz, Dimitri Yatsenko'
20+
__author__ = 'Edgar Walker, Fabian Sinz, Dimitri Yatsenko, Raphael Guzman'
1421

1522
# turn on verbose logging
1623
logging.basicConfig(level=logging.DEBUG)
@@ -61,7 +68,7 @@
6168
conn_root.query(
6269
"GRANT SELECT ON `djtest%%`.* TO 'djssl'@'%%';")
6370
else:
64-
# grant permissions. For mysql5.6/5.7 this also automatically creates user
71+
# grant permissions. For mysql5.6/5.7 this also automatically creates user
6572
# if not exists
6673
conn_root.query("""
6774
GRANT ALL PRIVILEGES ON `djtest%%`.* TO 'datajoint'@'%%'
@@ -76,6 +83,25 @@
7683
REQUIRE SSL;
7784
""")
7885

86+
# Initialize httpClient with relevant timeout.
87+
httpClient = urllib3.PoolManager(
88+
timeout=30,
89+
cert_reqs='CERT_REQUIRED',
90+
ca_certs=certifi.where(),
91+
retries=urllib3.Retry(
92+
total=3,
93+
backoff_factor=0.2,
94+
status_forcelist=[500, 502, 503, 504]
95+
)
96+
)
97+
98+
# Initialize minioClient with an endpoint and access/secret keys.
99+
minioClient = Minio(S3_CONN_INFO['endpoint'],
100+
access_key=S3_CONN_INFO['access_key'],
101+
secret_key=S3_CONN_INFO['secret_key'],
102+
secure=False,
103+
http_client=httpClient)
104+
79105

80106
def setup_package():
81107
"""
@@ -84,6 +110,44 @@ def setup_package():
84110
"""
85111
dj.config['safemode'] = False
86112

113+
# Add old MySQL
114+
source = os.path.dirname(os.path.realpath(__file__)) + \
115+
"/external-legacy-data"
116+
db_name = "djtest_blob_migrate"
117+
db_file = "v0_11.sql"
118+
conn_root.query("""
119+
CREATE DATABASE {};
120+
""".format(db_name))
121+
122+
statements = parse_sql('{}/{}'.format(source, db_file))
123+
for s in statements:
124+
conn_root.query(s)
125+
126+
# Add old S3
127+
source = os.path.dirname(os.path.realpath(__file__)) + \
128+
"/external-legacy-data/s3"
129+
bucket = "migrate-test"
130+
region = "us-east-1"
131+
minioClient.make_bucket(bucket, location=region)
132+
133+
pathlist = Path(source).glob('**/*')
134+
for path in pathlist:
135+
if os.path.isfile(str(path)) and ".sql" not in str(path):
136+
minioClient.fput_object(
137+
bucket, os.path.relpath(
138+
str(path),
139+
'{}/{}'.format(source, bucket)
140+
), str(path))
141+
142+
# Add S3
143+
minioClient.make_bucket("datajoint-test", location=region)
144+
145+
# Add old File Content
146+
shutil.copytree(
147+
os.path.dirname(os.path.realpath(__file__)) +
148+
"/external-legacy-data/file/temp",
149+
os.path.expanduser('~/temp'))
150+
87151

88152
def teardown_package():
89153
"""
@@ -99,3 +163,21 @@ def teardown_package():
99163
conn.query('DROP DATABASE `{}`'.format(db[0]))
100164
conn.query('SET FOREIGN_KEY_CHECKS=1')
101165
remove("dj_local_conf.json")
166+
167+
# Remove old S3
168+
bucket = "migrate-test"
169+
objs = list(minioClient.list_objects_v2(
170+
bucket, recursive=True))
171+
objs = [minioClient.remove_object(bucket,
172+
o.object_name.encode('utf-8')) for o in objs]
173+
minioClient.remove_bucket(bucket)
174+
175+
# Remove S3
176+
bucket = "datajoint-test"
177+
objs = list(minioClient.list_objects_v2(bucket, recursive=True))
178+
objs = [minioClient.remove_object(bucket,
179+
o.object_name.encode('utf-8')) for o in objs]
180+
minioClient.remove_bucket(bucket)
181+
182+
# Remove old File Content
183+
shutil.rmtree(os.path.expanduser('~/temp'))

0 commit comments

Comments
 (0)