Skip to content

Commit d1328e2

Browse files
committed
Guess Content-Type
1 parent 81c42af commit d1328e2

2 files changed

Lines changed: 39 additions & 3 deletions

File tree

src/tesk_core/filer_s3.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import re
55
import botocore
66
import boto3
7+
import mimetypes
78
from tesk_core.transput import Transput, Type
89

910
class S3Transput(Transput):
@@ -50,10 +51,24 @@ def download_file(self):
5051
os.makedirs(basedir, exist_ok=True)
5152
return self.get_s3_file(self.path, self.file_path)
5253

54+
def get_content_type(self):
55+
# Guess content type based on filename; fallback to binary stream
56+
mime, encoding = mimetypes.guess_type(self.path)
57+
if mime is None:
58+
return 'application/octet-stream'
59+
elif mime.startswith('text/') or mime in ('application/json', 'application/xml', 'application/javascript'):
60+
mime = f'{mime}; charset=utf-8'
61+
return mime
62+
63+
5364
def upload_file(self):
5465
logging.debug('Uploading s3 object: "%s" Target: %s', self.path, self.bucket + "/" + self.file_path)
66+
content_type = self.get_content_type()
67+
logging.debug('Guessed Content-Type: %s for file: %s', content_type, self.path)
5568
try:
56-
self.bucket_obj.upload_file(Filename=self.path, Key=self.file_path)
69+
# Pass ContentType via ExtraArgs so the object is uploaded with the right MIME type
70+
self.bucket_obj.upload_file(Filename=self.path, Key=self.file_path,
71+
ExtraArgs={'ContentType': content_type})
5772
except (botocore.exceptions.ClientError, OSError) as err:
5873
logging.error("File upload failed for '%s'", self.bucket + "/" + self.file_path)
5974
logging.error(err)

tests/test_s3_filer.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,27 @@ def test_s3_upload_file( moto_boto, path, url, ftype, expected,fs, caplog):
111111
otherwise an exception will be raised.
112112
'''
113113
assert client.Object('tesk', 'folder/file.txt').load() == None
114+
# Check the ContentType metadata is set correctly for text files
115+
head = client.meta.client.head_object(Bucket=trans.bucket, Key=trans.file_path)
116+
assert head['ContentType'] == 'text/plain'
117+
118+
119+
@pytest.mark.parametrize("filename, url, expected_content", [
120+
("file.txt", "s3://tesk/folder/file.txt", "text/plain"),
121+
("file.zip", "s3://tesk/folder/file.zip", "application/zip"),
122+
])
123+
def test_s3_upload_file_content_type(moto_boto, filename, url, expected_content, fs):
124+
"""
125+
Ensure uploaded objects have correct Content-Type metadata based on file extension
126+
"""
127+
fs.create_file(f"/home/user/filer_test/{filename}")
128+
client = boto3.resource('s3', endpoint_url="http://s3.amazonaws.com")
129+
trans = S3Transput(f"/home/user/filer_test/{filename}", url, "FILE")
130+
trans.bucket_obj = client.Bucket(trans.bucket)
131+
assert trans.upload_file() == 0
132+
head = client.meta.client.head_object(Bucket=trans.bucket, Key=trans.file_path)
133+
assert head['ContentType'] == expected_content
134+
114135

115136

116137

@@ -133,8 +154,8 @@ def test_s3_upload_directory(path, url, ftype, expected, moto_boto, caplog):
133154
Checking if the file was uploaded, if the object is found load() method will return None
134155
otherwise an exception will be raised.
135156
'''
136-
assert client.Object('tesk', 'folder1/folder2/test_filer.py').load() == None
137-
157+
assert client.Object('tesk', 'folder1/folder2/test_filer.py').load() == None head = client.meta.client.head_object(Bucket=trans.bucket, Key='folder1/folder2/test_filer.py')
158+
assert head['ContentType'].startswith('text/')
138159
def test_upload_directory_for_unknown_file_type(moto_boto, fs, monkeypatch, caplog):
139160
"""
140161
Checking whether an exception is raised when the object type is neither file or directory

0 commit comments

Comments
 (0)