Skip to content

Commit 3a27dc2

Browse files
committed
Improving caching of S3 endpoints by omitting unused parameters
1 parent c02348d commit 3a27dc2

File tree

4 files changed

+61
-7
lines changed

4 files changed

+61
-7
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"type": "enhancement",
3+
"category": "``s3``",
4+
"description": "Improve caching of S3 endpoints, which should improve performance of sync and recursive copy commands that interact with multiple keys in the same bucket"
5+
}

awscli/botocore/endpoint_provider.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@
4747
r"^(?!-)[a-zA-Z\d-]{1,63}(?<!-)$",
4848
)
4949
CACHE_SIZE = 100
50+
# S3 endpoint ruleset parameters that are defined but not currently referenced.
51+
# They are excluded from the cache key to avoid cache thrashing when
52+
# accessing multiple objects in the same bucket.
53+
S3_UNREFERENCED_PARAMS = {'Key', 'Prefix', 'CopySource'}
5054
ARN_PARSER = ArnParser()
5155
STRING_FORMATTER = Formatter()
5256

@@ -700,16 +704,22 @@ def evaluate(self, input_parameters):
700704
class EndpointProvider:
701705
"""Derives endpoints from a RuleSet for given input parameters."""
702706

703-
def __init__(self, ruleset_data, partition_data):
707+
def __init__(self, ruleset_data, partition_data, excluded_params=None):
704708
self.ruleset = RuleSet(**ruleset_data, partitions=partition_data)
709+
self._excluded_params = excluded_params or frozenset()
705710

706-
@lru_cache_weakref(maxsize=CACHE_SIZE)
707711
def resolve_endpoint(self, **input_parameters):
708712
"""Match input parameters to a rule.
709713
710714
:type input_parameters: dict
711715
:rtype: RuleSetEndpoint
712716
"""
717+
for param in self._excluded_params:
718+
input_parameters.pop(param, None)
719+
return self._resolve_endpoint(**input_parameters)
720+
721+
@lru_cache_weakref(maxsize=CACHE_SIZE)
722+
def _resolve_endpoint(self, **input_parameters):
713723
params_for_error = input_parameters.copy()
714724
endpoint = self.ruleset.evaluate(input_parameters)
715725
if endpoint is None:

awscli/botocore/regions.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
import jmespath
2626
from botocore import UNSIGNED, xform_name
2727
from botocore.auth import AUTH_TYPE_MAPS
28-
from botocore.endpoint_provider import EndpointProvider
28+
from botocore.endpoint_provider import S3_UNREFERENCED_PARAMS, EndpointProvider
2929
from botocore.exceptions import (
3030
EndpointProviderError,
3131
EndpointVariantError,
@@ -470,6 +470,11 @@ def __init__(
470470
self._provider = EndpointProvider(
471471
ruleset_data=endpoint_ruleset_data,
472472
partition_data=partition_data,
473+
excluded_params=(
474+
S3_UNREFERENCED_PARAMS
475+
if service_model.service_name == 's3'
476+
else None
477+
),
473478
)
474479
self._param_definitions = self._provider.ruleset.parameters
475480
self._service_model = service_model
@@ -515,9 +520,9 @@ def construct_endpoint(
515520

516521
# The endpoint provider does not support non-secure transport.
517522
if (
518-
not self._use_ssl
519-
and provider_result.url.startswith('https://')
520-
and 'Endpoint' not in provider_params
523+
not self._use_ssl
524+
and provider_result.url.startswith('https://')
525+
and 'Endpoint' not in provider_params
521526
):
522527
provider_result = provider_result._replace(
523528
url=f'http://{provider_result.url[8:]}'

tests/functional/botocore/test_endpoint_rulesets.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,10 @@
1818
import pytest
1919
from botocore import xform_name
2020
from botocore.config import Config
21-
from botocore.endpoint_provider import EndpointProvider
21+
from botocore.endpoint_provider import (
22+
S3_UNREFERENCED_PARAMS,
23+
EndpointProvider,
24+
)
2225
from botocore.exceptions import (
2326
BotoCoreError,
2427
ClientError,
@@ -295,3 +298,34 @@ def builtin_overwriter_handler(builtins, **kwargs):
295298
except (ClientError, ResponseParserError):
296299
pass
297300
assert len(http_stubber.requests) == 0
301+
302+
303+
def _collect_refs_from_rules(rules):
304+
"""Recursively collect all {\"ref\": \"...\"} values from rules."""
305+
refs = set()
306+
if isinstance(rules, dict):
307+
if 'ref' in rules:
308+
refs.add(rules['ref'])
309+
for value in rules.values():
310+
refs.update(_collect_refs_from_rules(value))
311+
elif isinstance(rules, list):
312+
for item in rules:
313+
refs.update(_collect_refs_from_rules(item))
314+
return refs
315+
316+
317+
@pytest.mark.validates_model
318+
def test_s3_unreferenced_endpoint_ruleset_params():
319+
"""Assert that the set of known S3 endpoint ruleset parameters that are
320+
not used in the rules hasn't changed. These are set in the
321+
S3_UNREFERENCED_PARAMS constant to prevent them from influencing the cache.
322+
"""
323+
ruleset = LOADER.load_service_model('s3', 'endpoint-rule-set-1')
324+
all_params = set(ruleset['parameters'].keys())
325+
referenced = _collect_refs_from_rules(ruleset['rules'])
326+
unreferenced = all_params - referenced
327+
assert unreferenced == S3_UNREFERENCED_PARAMS, (
328+
"The set of unused S3 endpoints parameters have changed, examine the "
329+
"implication of this on endpoint caching behavior before updating"
330+
"S3_UNREFERENCED_PARAMS."
331+
)

0 commit comments

Comments
 (0)