Skip to content

Commit 5eedf3c

Browse files
authored
609 obs upload fails when using a large file like python object (#618)
609 obs upload fails when using a large file like python object Reviewed-by: Anton Sidelnikov
1 parent 427058e commit 5eedf3c

3 files changed

Lines changed: 142 additions & 12 deletions

File tree

otcextensions/sdk/obs/v1/_proxy.py

Lines changed: 94 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
1010
# License for the specific language governing permissions and limitations
1111
# under the License.
12+
import io
1213
import os
1314
from urllib import parse
1415
from urllib.parse import urlsplit
@@ -383,21 +384,28 @@ def create_object(self, container, name, filename=None, data=None,
383384
endpoint_override=endpoint,
384385
requests_auth=self._get_req_auth(endpoint),
385386
**headers)
386-
387-
if data is not None:
388-
self.log.debug(
389-
"uploading data to %(endpoint)s",
390-
{'endpoint': endpoint})
391-
return self._create(
392-
_obj.Object, container=container,
393-
name=name, data=data,
394-
endpoint_override=endpoint,
395-
requests_auth=self._get_req_auth(endpoint),
396-
**headers)
397-
398387
if segment_size:
399388
segment_size = int(segment_size)
400389
segment_size = self.get_object_segment_size(segment_size)
390+
391+
if data is not None:
392+
data_size = self._try_get_size(data)
393+
394+
if data_size is not None and data_size > segment_size:
395+
return self._upload_large_data(
396+
endpoint, data, name, headers, segment_size
397+
)
398+
else:
399+
self.log.debug(
400+
"uploading data to %(endpoint)s",
401+
{'endpoint': endpoint})
402+
return self._create(
403+
_obj.Object, container=container,
404+
name=name, data=data,
405+
endpoint_override=endpoint,
406+
requests_auth=self._get_req_auth(endpoint),
407+
**headers)
408+
401409
file_size = os.path.getsize(filename)
402410

403411
if generate_checksums and md5 is None:
@@ -531,6 +539,80 @@ def _finish_large_object_upload(self, endpoint, headers, upload_id):
531539
if retries == 0:
532540
raise
533541

542+
def _try_get_size(self, data):
543+
"""Try to get the size of a data object if possible.
544+
545+
:param data: The data object passed to create_object.
546+
:returns: The size of the data if it can be determined, else None.
547+
"""
548+
if hasattr(data, 'fileno'):
549+
try:
550+
fileno = data.fileno()
551+
except io.UnsupportedOperation:
552+
return None
553+
try:
554+
st = os.fstat(fileno)
555+
return st.st_size
556+
except Exception:
557+
self.log.debug(
558+
"Cannot determine size of data with fileno %s",
559+
fileno, exc_info=True)
560+
return None
561+
if hasattr(data, 'len'):
562+
return data.len
563+
if hasattr(data, '__len__'):
564+
return len(data)
565+
try:
566+
pos = data.tell()
567+
data.seek(0, os.SEEK_END)
568+
size = data.tell()
569+
data.seek(pos, os.SEEK_SET)
570+
return size
571+
except Exception:
572+
pass
573+
return None
574+
575+
def _upload_large_data(self, endpoint, data, name, headers, segment_size):
576+
"""
577+
If the object is big, we need to break it up into segments that
578+
are no larger than segment_size, upload each of them individually
579+
and then upload a manifest object. The segments can be uploaded in
580+
parallel, so we'll use the async feature of the TaskManager.
581+
"""
582+
upload_id = _obj.Object.initiate_multipart_upload(
583+
self, endpoint, name,
584+
requests_auth=self._get_req_auth(endpoint)
585+
)
586+
url = f'{endpoint}/{name}'
587+
part_number = 1
588+
589+
try:
590+
while True:
591+
segment = data.read(segment_size)
592+
if not segment:
593+
break
594+
result = self.put(
595+
f'{url}?partNumber={part_number}&uploadId={upload_id}',
596+
headers=headers, data=segment,
597+
requests_auth=self._get_req_auth(endpoint)
598+
)
599+
result.raise_for_status()
600+
part_number += 1
601+
602+
return self._finish_large_object_upload(
603+
url, headers, upload_id)
604+
except Exception:
605+
try:
606+
self.log.debug(
607+
"Failed to upload large data. Aborting %s", upload_id
608+
)
609+
self._abort_multipart_upload(endpoint=url, upload_id=upload_id)
610+
except Exception:
611+
self.log.exception(
612+
"Failed to cleanup multipart upload %s:", upload_id
613+
)
614+
raise
615+
534616
def _object_name_from_url(self, url):
535617
'''Get container_name/object_name from the full URL called.
536618
Remove the Swift endpoint from the front of the URL, and remove
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# Licensed under the Apache License, Version 2.0 (the "License"); you may
2+
# not use this file except in compliance with the License. You may obtain
3+
# a copy of the License at
4+
#
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
#
7+
# Unless required by applicable law or agreed to in writing, software
8+
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
9+
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
10+
# License for the specific language governing permissions and limitations
11+
# under the License.
12+
import uuid
13+
from otcextensions.tests.functional import base
14+
15+
16+
class TestObsLargeFiles(base.BaseFunctionalTest):
17+
uuid_v4 = uuid.uuid4().hex[:8]
18+
bucket_name = 'obs-test-' + uuid_v4
19+
container = None
20+
21+
def setUp(self):
22+
super(TestObsLargeFiles, self).setUp()
23+
self.client = self.conn.obs
24+
self.container = self.client.create_container(
25+
name=self.bucket_name,
26+
storage_acl='public-read-write',
27+
storage_class='STANDARD'
28+
)
29+
self.addCleanup(self.client.delete_container, self.container)
30+
31+
def test_01_upload_large_file(self):
32+
fh = open("/mnt/d/Jellyfin/series/alien/s01.mkv", "rb")
33+
self.client.create_object(
34+
container=self.container,
35+
name='largefile',
36+
data=fh
37+
)
38+
self.object = self.client.get_object_metadata(
39+
container=self.container,
40+
obj='largefile'
41+
)
42+
self.client.delete_object(self.object)
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
upgrade:
3+
- |
4+
Uploading large files is now more reliable. When using the data parameter,
5+
the SDK can automatically split the upload into chunks, improving stability
6+
and reducing the chance of failures.

0 commit comments

Comments
 (0)