From a9182314280a3e564f6bb9ae0110ff111c584d0c Mon Sep 17 00:00:00 2001 From: Bob Strahan Date: Mon, 22 Jun 2026 16:26:29 +0000 Subject: [PATCH] fix(config): paginate list_config_versions DynamoDB scan (#354) ConfigurationManager.list_config_versions() did a single unpaginated table.scan(). DynamoDB scans return at most 1MB per call, so on deployments with many config versions (230+) only the first ~58 were returned. Configs uploaded via CLI or the autotune agent were invisible in the UI's View/Edit Configuration page and the upload-document config-version dropdown, though they still worked when referenced by name in processing runs. Add a pagination loop over LastEvaluatedKey so all versions are returned. Fixes every caller (update_configuration, the AppSync configuration_resolver, rules_discovery, and the SDK). Add unit tests covering single-page and multi-page (paginated) scans. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 2 + .../config/configuration_manager.py | 50 +++++++++++-------- .../unit/config/test_configuration_manager.py | 48 ++++++++++++++++++ 3 files changed, 80 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e699cfe73..6b003bf6e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,8 @@ SPDX-License-Identifier: MIT-0 ### Fixed +- **Configuration version list silently truncated past the first page (#354)** — `ConfigurationManager.list_config_versions()` performed a single unpaginated `table.scan()` on the ConfigurationTable. Because a DynamoDB scan returns at most 1 MB per call, deployments with many config versions (e.g. 230+) only ever saw the ~58 that fit on the first page — uploaded-via-CLI and autotune-agent configs were invisible in the UI's View/Edit Configuration page and the upload-document config-version dropdown (the configs still worked when referenced by name). The method now paginates through `LastEvaluatedKey` so every version is returned. Fixes all callers (`update_configuration`, the AppSync `configuration_resolver`, `rules_discovery`, and the SDK). + - **Build Info "update available" indicator broke against the public release bucket** — The `getLatestPublishedVersion` resolver discovered the newest published version by calling `ListObjectsV2` on the public artifacts bucket and parsing `idp-main_.yaml` keys. That bucket grants `GetObject` only (no listing), so the check failed on real public deployments. `idp-cli publish` now writes a small pointer object — `/idp-main-latest.json` (`{version, templateUrl}`) — at the version-stripped prefix on every release, and the resolver reads that one known key with a single `GetObject` (unsigned, falling back to signed), with a conventional `idp-main_.yaml` URL fallback if the pointer omits one. No version parsing or `ListObjectsV2`. The check stays disabled when `PUBLIC_ARTIFACTS_BUCKET` is unset. - **Private AppSync unreachable from browser clients (WorkSpaces, VPN, bastion)** — `scripts/vpc-endpoints.yaml` `VpcEndpointSecurityGroup` previously allowed inbound HTTPS (port 443) only from the Lambda security group. Browsers inside the VPC send AppSync GraphQL requests directly to the `appsync-api` VPC Interface Endpoint (not through the ALB), so all queries, mutations, and subscriptions hung indefinitely — the Configuration page showed "Loading configuration..." forever, the Document List never populated, and the Upload Documents page showed "Input bucket not configured". Fixed by adding a `VpcCidr` parameter and a second ingress rule for the VPC CIDR block. `deploy-vpc-endpoints.py` now auto-looks up the VPC primary CIDR via `ec2:DescribeVpcs` and passes it automatically — no CLI changes required. Re-run `deploy-vpc-endpoints.py` against an existing deployment to apply the fix. diff --git a/lib/idp_common_pkg/idp_common/config/configuration_manager.py b/lib/idp_common_pkg/idp_common/config/configuration_manager.py index 9ac1e055a..d70dd8b09 100644 --- a/lib/idp_common_pkg/idp_common/config/configuration_manager.py +++ b/lib/idp_common_pkg/idp_common/config/configuration_manager.py @@ -417,28 +417,38 @@ def list_config_versions(self) -> List[Dict[str, Any]]: description, bdaProjectArn, bdaSyncStatus, bdaLastSyncedAt """ try: - response = self.table.scan( - FilterExpression="begins_with(Configuration, :config_prefix)", - ExpressionAttributeValues={":config_prefix": f"{CONFIG_TYPE_CONFIG}#"}, - ProjectionExpression="Configuration, IsActive, CreatedAt, UpdatedAt, Description, BdaProjectArn, BdaSyncStatus, BdaLastSyncedAt, Managed" - ) + # DynamoDB scan returns at most 1MB per call, so paginate through + # LastEvaluatedKey to ensure every config version is returned. Without + # this loop, versions beyond the first page are silently dropped. + scan_kwargs = { + "FilterExpression": "begins_with(Configuration, :config_prefix)", + "ExpressionAttributeValues": {":config_prefix": f"{CONFIG_TYPE_CONFIG}#"}, + "ProjectionExpression": "Configuration, IsActive, CreatedAt, UpdatedAt, Description, BdaProjectArn, BdaSyncStatus, BdaLastSyncedAt, Managed", + } versions = [] - for item in response.get('Items', []): - config_key = item.get('Configuration', '') - if "#" in config_key: - _, version = config_key.split("#", 1) - versions.append({ - "versionName": version, - "isActive": item.get('IsActive'), - "createdAt": item.get('CreatedAt'), - "updatedAt": item.get('UpdatedAt'), - "description": item.get('Description', ""), - "bdaProjectArn": item.get('BdaProjectArn'), - "bdaSyncStatus": item.get('BdaSyncStatus'), - "bdaLastSyncedAt": item.get('BdaLastSyncedAt'), - "managed": item.get('Managed', False), - }) + while True: + response = self.table.scan(**scan_kwargs) + for item in response.get('Items', []): + config_key = item.get('Configuration', '') + if "#" in config_key: + _, version = config_key.split("#", 1) + versions.append({ + "versionName": version, + "isActive": item.get('IsActive'), + "createdAt": item.get('CreatedAt'), + "updatedAt": item.get('UpdatedAt'), + "description": item.get('Description', ""), + "bdaProjectArn": item.get('BdaProjectArn'), + "bdaSyncStatus": item.get('BdaSyncStatus'), + "bdaLastSyncedAt": item.get('BdaLastSyncedAt'), + "managed": item.get('Managed', False), + }) + + last_evaluated_key = response.get('LastEvaluatedKey') + if not last_evaluated_key: + break + scan_kwargs['ExclusiveStartKey'] = last_evaluated_key return versions diff --git a/lib/idp_common_pkg/tests/unit/config/test_configuration_manager.py b/lib/idp_common_pkg/tests/unit/config/test_configuration_manager.py index 4fa8f8e9e..1f1763806 100644 --- a/lib/idp_common_pkg/tests/unit/config/test_configuration_manager.py +++ b/lib/idp_common_pkg/tests/unit/config/test_configuration_manager.py @@ -45,3 +45,51 @@ def test_activate_version_not_found(self, mock_boto3): with pytest.raises(ValueError, match="Config version test-version not found"): manager.activate_version("test-version") + + +@pytest.mark.unit +class TestConfigurationManagerListConfigVersions: + """Test list_config_versions pagination.""" + + @patch("idp_common.config.configuration_manager.boto3") + def test_list_config_versions_single_page(self, mock_boto3): + """All versions on a single page are returned.""" + mock_table = Mock() + mock_boto3.resource.return_value.Table.return_value = mock_table + mock_table.scan.return_value = { + "Items": [ + {"Configuration": "config#v1", "IsActive": True}, + {"Configuration": "config#v2", "IsActive": False}, + ] + } + + manager = ConfigurationManager(table_name="test-table") + versions = manager.list_config_versions() + + assert mock_table.scan.call_count == 1 + assert [v["versionName"] for v in versions] == ["v1", "v2"] + + @patch("idp_common.config.configuration_manager.boto3") + def test_list_config_versions_paginates(self, mock_boto3): + """Versions beyond the first scan page are still returned.""" + mock_table = Mock() + mock_boto3.resource.return_value.Table.return_value = mock_table + # First page returns a LastEvaluatedKey, second page does not. + mock_table.scan.side_effect = [ + { + "Items": [{"Configuration": "config#v1", "IsActive": False}], + "LastEvaluatedKey": {"Configuration": "config#v1"}, + }, + { + "Items": [{"Configuration": "config#v2", "IsActive": True}], + }, + ] + + manager = ConfigurationManager(table_name="test-table") + versions = manager.list_config_versions() + + assert mock_table.scan.call_count == 2 + # Second scan must continue from the prior page's LastEvaluatedKey. + _, second_call_kwargs = mock_table.scan.call_args_list[1] + assert second_call_kwargs["ExclusiveStartKey"] == {"Configuration": "config#v1"} + assert [v["versionName"] for v in versions] == ["v1", "v2"]