Skip to content

Commit 9740c54

Browse files
authored
Revert "Add case-sensitivity handling to multi-object download operations (aws#9961)"
1 parent c817eb9 commit 9740c54

File tree

20 files changed

+18
-855
lines changed

20 files changed

+18
-855
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"type": "bugfix",
3+
"category": "``s3``",
4+
"description": "Reverts addition of ``--case-conflict`` feature which caused a performance regression when copying from S3 to large local directories"
5+
}

awscli/customizations/s3/filegenerator.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,7 @@ def __init__(self, directory, filename):
9494
class FileStat(object):
9595
def __init__(self, src, dest=None, compare_key=None, size=None,
9696
last_update=None, src_type=None, dest_type=None,
97-
operation_name=None, response_data=None, etag=None,
98-
case_conflict_submitted=None, case_conflict_key=None,):
97+
operation_name=None, response_data=None, etag=None):
9998
self.src = src
10099
self.dest = dest
101100
self.compare_key = compare_key
@@ -106,8 +105,6 @@ def __init__(self, src, dest=None, compare_key=None, size=None,
106105
self.operation_name = operation_name
107106
self.response_data = response_data
108107
self.etag = etag
109-
self.case_conflict_submitted = case_conflict_submitted
110-
self.case_conflict_key = case_conflict_key
111108

112109

113110
class FileGenerator(object):

awscli/customizations/s3/fileinfo.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,7 @@ def __init__(self, src, dest=None, compare_key=None, size=None,
4242
last_update=None, src_type=None, dest_type=None,
4343
operation_name=None, client=None, parameters=None,
4444
source_client=None, is_stream=False,
45-
associated_response_data=None, etag=None,
46-
case_conflict_submitted=None, case_conflict_key=None,):
45+
associated_response_data=None, etag=None):
4746
self.src = src
4847
self.src_type = src_type
4948
self.operation_name = operation_name
@@ -61,8 +60,6 @@ def __init__(self, src, dest=None, compare_key=None, size=None,
6160
self.is_stream = is_stream
6261
self.associated_response_data = associated_response_data
6362
self.etag = etag
64-
self.case_conflict_submitted = case_conflict_submitted
65-
self.case_conflict_key = case_conflict_key
6663

6764
def is_glacier_compatible(self):
6865
"""Determines if a file info object is glacier compatible

awscli/customizations/s3/fileinfobuilder.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,6 @@ def _inject_info(self, file_base):
4646
file_info_attr['is_stream'] = self._is_stream
4747
file_info_attr['associated_response_data'] = file_base.response_data
4848
file_info_attr['etag'] = file_base.etag
49-
file_info_attr['case_conflict_submitted'] = getattr(
50-
file_base, 'case_conflict_submitted', None
51-
)
52-
file_info_attr['case_conflict_key'] = getattr(
53-
file_base, 'case_conflict_key', None
54-
)
5549

5650
# This is a bit quirky. The below conditional hinges on the --delete
5751
# flag being set, which only occurs during a sync command. The source

awscli/customizations/s3/s3handler.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@
4747
from awscli.customizations.s3.utils import DeleteSourceFileSubscriber
4848
from awscli.customizations.s3.utils import DeleteSourceObjectSubscriber
4949
from awscli.customizations.s3.utils import DeleteCopySourceObjectSubscriber
50-
from awscli.customizations.s3.utils import CaseConflictCleanupSubscriber
5150
from awscli.compat import get_binary_stdin
5251

5352

@@ -404,13 +403,6 @@ def _add_additional_subscribers(self, subscribers, fileinfo):
404403
if self._cli_params.get('is_move', False):
405404
subscribers.append(DeleteSourceObjectSubscriber(
406405
fileinfo.source_client))
407-
if fileinfo.case_conflict_submitted is not None:
408-
subscribers.append(
409-
CaseConflictCleanupSubscriber(
410-
fileinfo.case_conflict_submitted,
411-
fileinfo.case_conflict_key,
412-
)
413-
)
414406

415407
def _submit_transfer_request(self, fileinfo, extra_args, subscribers):
416408
bucket, key = find_bucket_key(fileinfo.src)

awscli/customizations/s3/subcommands.py

Lines changed: 5 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,7 @@
3434
S3PathResolver
3535
from awscli.customizations.utils import uni_print
3636
from awscli.customizations.s3.syncstrategy.base import MissingFileSync, \
37-
SizeAndLastModifiedSync, NeverSync, AlwaysSync
38-
from awscli.customizations.s3.syncstrategy.caseconflict import CaseConflictSync
37+
SizeAndLastModifiedSync, NeverSync
3938
from awscli.customizations.s3 import transferconfig
4039
from awscli.utils import resolve_v2_debug_mode
4140

@@ -483,33 +482,6 @@
483482
)
484483
}
485484

486-
CASE_CONFLICT = {
487-
'name': 'case-conflict',
488-
'choices': [
489-
'ignore',
490-
'skip',
491-
'warn',
492-
'error',
493-
],
494-
'default': 'warn',
495-
'help_text': (
496-
"Configures behavior when attempting to download multiple objects "
497-
"whose keys differ only by case, which can cause undefined behavior "
498-
"on case-insensitive filesystems. "
499-
"This parameter only applies for commands that perform multiple S3 "
500-
"to local downloads. "
501-
f"See <a href='{CaseConflictSync.DOC_URI}'>Handling case "
502-
"conflicts</a> for details. Valid values are: "
503-
"<ul>"
504-
"<li>``error`` - Raise an error and abort downloads.</li>"
505-
"<li>``warn`` - The default value. Emit a warning and download "
506-
"the object.</li>"
507-
"<li>``skip`` - Skip downloading the object.</li>"
508-
"<li>``ignore`` - Ignore the conflict and download the object.</li>"
509-
"</ul>"
510-
),
511-
}
512-
513485
TRANSFER_ARGS = [DRYRUN, QUIET, INCLUDE, EXCLUDE, ACL,
514486
FOLLOW_SYMLINKS, NO_FOLLOW_SYMLINKS, NO_GUESS_MIME_TYPE,
515487
SSE, SSE_C, SSE_C_KEY, SSE_KMS_KEY_ID, SSE_C_COPY_SOURCE,
@@ -835,8 +807,7 @@ class CpCommand(S3TransferCommand):
835807
"or <S3Uri> <S3Uri>"
836808
ARG_TABLE = [{'name': 'paths', 'nargs': 2, 'positional_arg': True,
837809
'synopsis': USAGE}] + TRANSFER_ARGS + \
838-
[METADATA, METADATA_DIRECTIVE, EXPECTED_SIZE, RECURSIVE,
839-
CASE_CONFLICT]
810+
[METADATA, METADATA_DIRECTIVE, EXPECTED_SIZE, RECURSIVE]
840811

841812

842813
class MvCommand(S3TransferCommand):
@@ -846,8 +817,7 @@ class MvCommand(S3TransferCommand):
846817
"or <S3Uri> <S3Uri>"
847818
ARG_TABLE = [{'name': 'paths', 'nargs': 2, 'positional_arg': True,
848819
'synopsis': USAGE}] + TRANSFER_ARGS +\
849-
[METADATA, METADATA_DIRECTIVE, RECURSIVE, VALIDATE_SAME_S3_PATHS,
850-
CASE_CONFLICT]
820+
[METADATA, METADATA_DIRECTIVE, RECURSIVE, VALIDATE_SAME_S3_PATHS]
851821

852822

853823
class RmCommand(S3TransferCommand):
@@ -869,7 +839,7 @@ class SyncCommand(S3TransferCommand):
869839
"<LocalPath> or <S3Uri> <S3Uri>"
870840
ARG_TABLE = [{'name': 'paths', 'nargs': 2, 'positional_arg': True,
871841
'synopsis': USAGE}] + TRANSFER_ARGS + \
872-
[METADATA, METADATA_DIRECTIVE, CASE_CONFLICT]
842+
[METADATA, METADATA_DIRECTIVE]
873843

874844

875845
class MbCommand(S3Command):
@@ -1034,16 +1004,7 @@ def choose_sync_strategies(self):
10341004
# Set the default strategies.
10351005
sync_strategies['file_at_src_and_dest_sync_strategy'] = \
10361006
SizeAndLastModifiedSync()
1037-
if self._should_handle_case_conflicts():
1038-
sync_strategies['file_not_at_dest_sync_strategy'] = (
1039-
CaseConflictSync(
1040-
on_case_conflict=self.parameters['case_conflict']
1041-
)
1042-
)
1043-
else:
1044-
sync_strategies['file_not_at_dest_sync_strategy'] = (
1045-
MissingFileSync()
1046-
)
1007+
sync_strategies['file_not_at_dest_sync_strategy'] = MissingFileSync()
10471008
sync_strategies['file_not_at_src_sync_strategy'] = NeverSync()
10481009

10491010
# Determine what strategies to override if any.
@@ -1177,12 +1138,6 @@ def run(self):
11771138
'filters': [create_filter(self.parameters)],
11781139
'file_info_builder': [file_info_builder],
11791140
's3_handler': [s3_transfer_handler]}
1180-
if self._should_handle_case_conflicts():
1181-
self._handle_case_conflicts(
1182-
command_dict,
1183-
rev_files,
1184-
rev_generator,
1185-
)
11861141
elif self.cmd == 'rm':
11871142
command_dict = {'setup': [files],
11881143
'file_generator': [file_generator],
@@ -1195,12 +1150,6 @@ def run(self):
11951150
'filters': [create_filter(self.parameters)],
11961151
'file_info_builder': [file_info_builder],
11971152
's3_handler': [s3_transfer_handler]}
1198-
if self._should_handle_case_conflicts():
1199-
self._handle_case_conflicts(
1200-
command_dict,
1201-
rev_files,
1202-
rev_generator,
1203-
)
12041153

12051154
files = command_dict['setup']
12061155
while self.instructions:
@@ -1266,74 +1215,6 @@ def _map_sse_c_params(self, request_parameters, paths_type):
12661215
}
12671216
)
12681217

1269-
def _should_handle_case_conflicts(self):
1270-
return (
1271-
self.cmd in {'sync', 'cp', 'mv'}
1272-
and self.parameters.get('paths_type') == 's3local'
1273-
and self.parameters['case_conflict'] != 'ignore'
1274-
and self.parameters.get('dir_op')
1275-
)
1276-
1277-
def _handle_case_conflicts(self, command_dict, rev_files, rev_generator):
1278-
# Objects are not returned in lexicographical order when
1279-
# operated on S3 Express directory buckets. This is required
1280-
# for sync operations to behave correctly, which is what
1281-
# recursive copies and moves fall back to so potential case
1282-
# conflicts can be detected and handled.
1283-
if not is_s3express_bucket(
1284-
split_s3_bucket_key(self.parameters['src'])[0]
1285-
):
1286-
self._modify_instructions_for_case_conflicts(
1287-
command_dict, rev_files, rev_generator
1288-
)
1289-
return
1290-
# `skip` and `error` are not valid choices in this case because
1291-
# it's not possible to detect case conflicts.
1292-
if self.parameters['case_conflict'] not in {'ignore', 'warn'}:
1293-
raise ValueError(
1294-
f"`{self.parameters['case_conflict']}` is not a valid value "
1295-
"for `--case-conflict` when operating on S3 Express "
1296-
"directory buckets. Valid values: `warn`, `ignore`."
1297-
)
1298-
msg = (
1299-
"warning: Recursive copies/moves from an S3 Express "
1300-
"directory bucket to a case-insensitive local filesystem "
1301-
"may result in undefined behavior if there are "
1302-
"S3 object key names that differ only by case. To disable "
1303-
"this warning, set the `--case-conflict` parameter to `ignore`. "
1304-
f"For more information, see {CaseConflictSync.DOC_URI}."
1305-
)
1306-
uni_print(msg, sys.stderr)
1307-
1308-
def _modify_instructions_for_case_conflicts(
1309-
self, command_dict, rev_files, rev_generator
1310-
):
1311-
# Command will perform recursive S3 to local downloads.
1312-
# Checking for potential case conflicts requires knowledge
1313-
# of local files. Instead of writing a separate validation
1314-
# mechanism for recursive downloads, we modify the instructions
1315-
# to mimic a sync command.
1316-
sync_strategies = {
1317-
# Local filename exists with exact case match. Always sync
1318-
# because it's a copy operation.
1319-
'file_at_src_and_dest_sync_strategy': AlwaysSync(),
1320-
# Local filename either doesn't exist or differs only by case.
1321-
# Let `CaseConflictSync` determine which it is and handle it
1322-
# according to configured `--case-conflict` parameter.
1323-
'file_not_at_dest_sync_strategy': CaseConflictSync(
1324-
on_case_conflict=self.parameters['case_conflict']
1325-
),
1326-
# Copy is one-way so never sync if not at source.
1327-
'file_not_at_src_sync_strategy': NeverSync(),
1328-
}
1329-
command_dict['setup'].append(rev_files)
1330-
command_dict['file_generator'].append(rev_generator)
1331-
command_dict['filters'].append(create_filter(self.parameters))
1332-
command_dict['comparator'] = [Comparator(**sync_strategies)]
1333-
self.instructions.insert(
1334-
self.instructions.index('file_info_builder'), 'comparator'
1335-
)
1336-
13371218

13381219
class CommandParameters(object):
13391220
"""

awscli/customizations/s3/syncstrategy/base.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -254,12 +254,3 @@ def determine_should_sync(self, src_file, dest_file):
254254
LOG.debug("syncing: %s -> %s, file does not exist at destination",
255255
src_file.src, src_file.dest)
256256
return True
257-
258-
259-
class AlwaysSync(BaseSync):
260-
def __init__(self, sync_type='file_at_src_and_dest'):
261-
super(AlwaysSync, self).__init__(sync_type)
262-
263-
def determine_should_sync(self, src_file, dest_file):
264-
LOG.debug(f"syncing: {src_file.src} -> {src_file.dest}")
265-
return True

awscli/customizations/s3/syncstrategy/caseconflict.py

Lines changed: 0 additions & 92 deletions
This file was deleted.

awscli/customizations/s3/utils.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -690,20 +690,6 @@ def _on_failure(self, future, e):
690690
pass
691691

692692

693-
class CaseConflictCleanupSubscriber(BaseSubscriber):
694-
"""
695-
A subscriber which removes object compare key from case conflict set
696-
when download finishes.
697-
"""
698-
699-
def __init__(self, submitted, case_conflict_key):
700-
self._submitted = submitted
701-
self._key = case_conflict_key
702-
703-
def on_done(self, future, **kwargs):
704-
self._submitted.discard(self._key)
705-
706-
707693
class DeleteSourceSubscriber(OnDoneFilteredSubscriber):
708694
"""A subscriber which deletes the source of the transfer."""
709695
def _on_success(self, future):

0 commit comments

Comments
 (0)