Skip to content

Commit 492737e

Browse files
committed
Merge branch 'fix/discovery' into 'develop'
Discovery no longer injects default config classes into target version See merge request genaiic-reusable-assets/engagement-artifacts/genaiic-idp-accelerator!570
2 parents 676a17c + 9004ad4 commit 492737e

4 files changed

Lines changed: 134 additions & 120 deletions

File tree

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ SPDX-License-Identifier: MIT-0
1515

1616
- **CLI: Remove deprecated `--pattern` references** — Updated `idp-cli.md` and CLI code to reflect the unified pattern architecture. Removed `--pattern` from all deploy and config command examples/options.
1717

18+
- **Discovery no longer injects default config classes into target version** — Previously, running Discovery on a configuration version would merge all classes from the `default` version into the target version alongside the newly discovered class. Now Discovery only adds/updates the discovered class within the target version's own class list, keeping the version's classes exactly as the user curated them.
19+
1820
- **Documentation: Comprehensive review and cleanup** — Fixed outdated references, broken links, and missing content across documentation files.
1921

2022
## [0.5.1]

lib/idp_common_pkg/idp_common/discovery/classes_discovery.py

Lines changed: 23 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,7 @@ def discovery_classes_with_document(self, input_bucket: str, input_prefix: str):
9898
# No need to transform - it's already in the right format
9999
current_class = model_response
100100

101-
# Merge the new class with existing Default + Custom classes
102-
# and save to Custom config
101+
# Add the discovered class to the target version's config
103102
self._merge_and_save_class(current_class)
104103

105104
return {"status": "SUCCESS"}
@@ -151,8 +150,7 @@ def discovery_classes_with_document_and_ground_truth(
151150
# No need to transform - it's already in the right format
152151
current_class = model_response
153152

154-
# Merge the new class with existing Default + Custom classes
155-
# and save to Custom config
153+
# Add the discovered class to the target version's config
156154
self._merge_and_save_class(current_class)
157155

158156
return {"status": "SUCCESS"}
@@ -166,18 +164,17 @@ def discovery_classes_with_document_and_ground_truth(
166164

167165
def _merge_and_save_class(self, new_class: Dict[str, Any]) -> None:
168166
"""
169-
Merge a new discovered class with existing Default + Custom classes and save to Custom.
167+
Merge a new discovered class into the target version's configuration.
170168
171-
This method ensures that discovered classes are ADDITIVE to existing classes:
172-
1. Read Default classes (base classes from deployment)
173-
2. Read existing Custom classes (previous user customizations)
174-
3. Build a merged list starting from Default, overriding with Custom
175-
4. Add/update the new discovered class
176-
5. Save the complete merged list to Custom
169+
This method only adds/updates the discovered class within the target version's
170+
own class list. It does NOT pull in classes from the default configuration version,
171+
keeping the target version's classes exactly as the user curated them, plus the
172+
newly discovered class.
177173
178-
This is necessary because the Default+Custom merge uses array replacement,
179-
not array concatenation. By saving the complete class list to Custom,
180-
we ensure no classes are lost during the merge.
174+
Steps:
175+
1. Read existing classes from the target version
176+
2. Add/update the new discovered class (deduplicate by $id)
177+
3. Save back to the target version
181178
182179
Args:
183180
new_class: The newly discovered class schema to add/update
@@ -186,65 +183,36 @@ def _merge_and_save_class(self, new_class: Dict[str, Any]) -> None:
186183
new_class_id = new_class.get("$id") or new_class.get("x-aws-idp-document-type")
187184
logger.info(f"Merging discovered class: {new_class_id}")
188185

189-
# Step 1: Read Default classes (base classes from deployment)
190-
default_config = self.config_manager.get_configuration("Config", "default")
191-
default_classes: list = []
192-
if (
193-
default_config
194-
and isinstance(default_config, IDPConfig)
195-
and default_config.classes
196-
):
197-
# Convert to list of dicts for easier manipulation
198-
default_classes = [
199-
cls
200-
if isinstance(cls, dict)
201-
else cls.model_dump()
202-
if hasattr(cls, "model_dump")
203-
else dict(cls)
204-
for cls in default_config.classes
205-
]
206-
logger.info(f"Found {len(default_classes)} classes in Default config")
207-
208-
# Step 2: Read existing Custom config (raw, no Pydantic defaults)
186+
# Read existing config for the target version (raw, no Pydantic defaults)
209187
existing_custom = (
210188
self.config_manager.get_raw_configuration("Config", version=self.version)
211189
or {}
212190
)
213191
custom_classes = list(existing_custom.get("classes", []))
214-
logger.info(f"Found {len(custom_classes)} classes in Custom config")
215-
216-
# Step 3: Build merged class list - start with Default, override with Custom
217-
# Use a dict keyed by class ID for efficient deduplication
218-
merged_classes_by_id: Dict[str, Dict[str, Any]] = {}
219-
220-
# Add Default classes first
221-
for cls in default_classes:
222-
cls_id = cls.get("$id") or cls.get("x-aws-idp-document-type")
223-
if cls_id:
224-
merged_classes_by_id[cls_id] = cls
192+
logger.info(
193+
f"Found {len(custom_classes)} existing classes in version '{self.version}'"
194+
)
225195

226-
# Override/add Custom classes
196+
# Build class list from existing version classes, deduplicating by ID
197+
classes_by_id: Dict[str, Dict[str, Any]] = {}
227198
for cls in custom_classes:
228199
cls_id = cls.get("$id") or cls.get("x-aws-idp-document-type")
229200
if cls_id:
230-
merged_classes_by_id[cls_id] = cls
201+
classes_by_id[cls_id] = cls
231202

232-
# Step 4: Add/update the new discovered class
203+
# Add/update the new discovered class
233204
if new_class_id:
234-
merged_classes_by_id[new_class_id] = new_class
205+
classes_by_id[new_class_id] = new_class
235206

236207
# Convert back to list
237-
merged_classes = list(merged_classes_by_id.values())
238-
logger.info(f"Merged class list has {len(merged_classes)} classes")
208+
merged_classes = list(classes_by_id.values())
209+
logger.info(f"Saving {len(merged_classes)} classes to version '{self.version}'")
239210

240-
# Step 5: Save to Custom config
241-
# The merged list will replace Default.classes during runtime merge,
242-
# ensuring all classes (Default + Custom + new) are preserved
211+
# Save to version config
243212
existing_custom["classes"] = merged_classes
244213
self.config_manager.save_raw_configuration(
245214
"Config", existing_custom, version=self.version
246215
)
247-
logger.info(f"Saved {len(merged_classes)} classes to Custom config")
248216

249217
def _validate_json_schema(self, schema: Dict[str, Any]) -> tuple[bool, str]:
250218
"""

lib/idp_common_pkg/tests/unit/discovery/test_classes_discovery.py

Lines changed: 79 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -639,32 +639,7 @@ def test_sample_output_format(self, service):
639639
def test_discovery_classes_with_document_updates_existing_class(
640640
self, service, mock_configuration_item
641641
):
642-
"""Test that discovery updates existing class configuration."""
643-
# Mock existing Default configuration with classes in JSON Schema format
644-
# Use actual IDPConfig so isinstance check passes
645-
existing_default_config = IDPConfig(
646-
classes=[
647-
{
648-
"$schema": "http://json-schema.org/draft-07/schema#",
649-
"$id": "w4",
650-
"type": "object",
651-
"title": "W-4",
652-
"description": "Old description",
653-
"x-aws-idp-document-type": "W-4",
654-
"properties": {},
655-
},
656-
{
657-
"$schema": "http://json-schema.org/draft-07/schema#",
658-
"$id": "other_form",
659-
"type": "object",
660-
"title": "Other-Form",
661-
"description": "Other form",
662-
"x-aws-idp-document-type": "Other-Form",
663-
"properties": {},
664-
},
665-
]
666-
)
667-
642+
"""Test that discovery updates existing class in the target version only."""
668643
with (
669644
patch("idp_common.utils.s3util.S3Util.get_bytes") as mock_get_bytes,
670645
patch("idp_common.bedrock.extract_text_from_response") as mock_extract_text,
@@ -685,12 +660,29 @@ def test_discovery_classes_with_document_updates_existing_class(
685660
"response": {"output": {"message": {"content": [{"text": "{}"}]}}},
686661
"metering": {"tokens": 500},
687662
}
688-
# get_configuration returns Default config (for reading Default classes)
689-
service.config_manager.get_configuration.return_value = (
690-
existing_default_config
691-
)
692-
# get_raw_configuration returns existing Custom (no existing Custom classes in this case)
693-
service.config_manager.get_raw_configuration.return_value = {}
663+
# Target version already has an old W-4 and another class
664+
service.config_manager.get_raw_configuration.return_value = {
665+
"classes": [
666+
{
667+
"$schema": "http://json-schema.org/draft-07/schema#",
668+
"$id": "w4",
669+
"type": "object",
670+
"title": "W-4",
671+
"description": "Old description",
672+
"x-aws-idp-document-type": "W-4",
673+
"properties": {},
674+
},
675+
{
676+
"$schema": "http://json-schema.org/draft-07/schema#",
677+
"$id": "other_form",
678+
"type": "object",
679+
"title": "Other-Form",
680+
"description": "Other form",
681+
"x-aws-idp-document-type": "Other-Form",
682+
"properties": {},
683+
},
684+
]
685+
}
694686

695687
result = service.discovery_classes_with_document(
696688
"test-bucket", "test-document.pdf"
@@ -704,7 +696,7 @@ def test_discovery_classes_with_document_updates_existing_class(
704696
assert call_args[0][0] == "Config" # Config type
705697
updated_classes = call_args[0][1]["classes"] # Classes from saved config
706698

707-
# Should have 2 classes (Other-Form from Default + updated W-4)
699+
# Should have 2 classes (existing Other-Form + updated W-4 from version)
708700
assert len(updated_classes) == 2
709701

710702
# Find the W-4 class and verify it was updated (by $id)
@@ -714,6 +706,60 @@ def test_discovery_classes_with_document_updates_existing_class(
714706
assert w4_class is not None
715707
assert w4_class["description"] == "Updated description"
716708

709+
# Verify Other-Form is preserved from the version
710+
other_class = next(
711+
(cls for cls in updated_classes if cls.get("$id") == "other_form"), None
712+
)
713+
assert other_class is not None
714+
assert other_class["description"] == "Other form"
715+
716+
def test_discovery_does_not_pull_default_classes_into_version(self, service):
717+
"""Test that discovery does NOT inject default config classes into the target version.
718+
719+
When a user runs discovery on a version, only the discovered class should be
720+
added. Classes from the 'default' config version should NOT be merged in.
721+
"""
722+
with (
723+
patch("idp_common.utils.s3util.S3Util.get_bytes") as mock_get_bytes,
724+
patch("idp_common.bedrock.extract_text_from_response") as mock_extract_text,
725+
):
726+
mock_get_bytes.return_value = b"fake_content"
727+
mock_extract_text.return_value = json.dumps(
728+
{
729+
"$schema": "http://json-schema.org/draft-07/schema#",
730+
"$id": "w4",
731+
"type": "object",
732+
"title": "W-4",
733+
"description": "Discovered W-4",
734+
"x-aws-idp-document-type": "W-4",
735+
"properties": {},
736+
}
737+
)
738+
service._mock_bedrock_client.return_value = {
739+
"response": {"output": {"message": {"content": [{"text": "{}"}]}}},
740+
"metering": {"tokens": 500},
741+
}
742+
# Target version has NO classes yet (empty version)
743+
service.config_manager.get_raw_configuration.return_value = {}
744+
745+
result = service.discovery_classes_with_document(
746+
"test-bucket", "test-document.pdf"
747+
)
748+
749+
assert result["status"] == "SUCCESS"
750+
751+
# Verify only the discovered class is saved — no default classes injected
752+
service.config_manager.save_raw_configuration.assert_called_once()
753+
call_args = service.config_manager.save_raw_configuration.call_args
754+
updated_classes = call_args[0][1]["classes"]
755+
756+
assert len(updated_classes) == 1
757+
assert updated_classes[0]["$id"] == "w4"
758+
assert updated_classes[0]["description"] == "Discovered W-4"
759+
760+
# Verify get_configuration was NOT called (no default config reading)
761+
service.config_manager.get_configuration.assert_not_called()
762+
717763
def test_discovery_classes_with_document_no_existing_config(self, service):
718764
"""Test discovery when no existing configuration exists."""
719765
with (

lib/idp_common_pkg/tests/unit/discovery/test_classes_discovery_integration.py

Lines changed: 30 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
# Import application modules
1919
from idp_common.discovery.classes_discovery import ClassesDiscovery
20-
from idp_common.config.models import IDPConfig
2120

2221

2322
@pytest.mark.unit
@@ -371,7 +370,11 @@ def test_discovery_updates_existing_configuration(
371370
service_with_mocks,
372371
mock_w4_bedrock_response,
373372
):
374-
"""Test that discovery properly updates existing configuration."""
373+
"""Test that discovery properly updates existing version configuration.
374+
375+
Discovery should only add/update the discovered class within the target
376+
version's own classes — it should NOT pull in classes from the default version.
377+
"""
375378
# Mock S3 file content
376379
mock_get_bytes.return_value = b"fake content"
377380

@@ -381,35 +384,30 @@ def test_discovery_updates_existing_configuration(
381384
]["content"][0]["text"]
382385
service_with_mocks._mock_bedrock_client.return_value = mock_w4_bedrock_response
383386

384-
# Mock existing Default configuration with different forms in JSON Schema format
385-
existing_default = IDPConfig(
386-
classes=[
387-
{
388-
"$schema": "http://json-schema.org/draft-07/schema#",
389-
"$id": "i9",
390-
"type": "object",
391-
"title": "I-9",
392-
"description": "Employment Eligibility Verification",
393-
"x-aws-idp-document-type": "I-9",
394-
"properties": {},
395-
},
396-
{
397-
"$schema": "http://json-schema.org/draft-07/schema#",
398-
"$id": "w4",
399-
"type": "object",
400-
"title": "W-4",
401-
"description": "Old W-4 description",
402-
"x-aws-idp-document-type": "W-4",
403-
"properties": {},
404-
},
405-
]
406-
)
407-
# Create mocks for config_manager methods
408-
service_with_mocks.config_manager.get_configuration = MagicMock(
409-
return_value=existing_default
410-
)
387+
# Target version already has an I-9 and an old W-4
411388
service_with_mocks.config_manager.get_raw_configuration = MagicMock(
412-
return_value={} # No existing Custom
389+
return_value={
390+
"classes": [
391+
{
392+
"$schema": "http://json-schema.org/draft-07/schema#",
393+
"$id": "i9",
394+
"type": "object",
395+
"title": "I-9",
396+
"description": "Employment Eligibility Verification",
397+
"x-aws-idp-document-type": "I-9",
398+
"properties": {},
399+
},
400+
{
401+
"$schema": "http://json-schema.org/draft-07/schema#",
402+
"$id": "w4",
403+
"type": "object",
404+
"title": "W-4",
405+
"description": "Old W-4 description",
406+
"x-aws-idp-document-type": "W-4",
407+
"properties": {},
408+
},
409+
]
410+
}
413411
)
414412
service_with_mocks.config_manager.save_raw_configuration = MagicMock()
415413

@@ -429,7 +427,7 @@ def test_discovery_updates_existing_configuration(
429427
assert save_config_args[0] == "Config" # First arg is config type
430428
updated_classes = save_config_args[1]["classes"] # Second arg is config dict
431429

432-
# Should have 2 classes: I-9 (from Default) + W-4 (updated)
430+
# Should have 2 classes: I-9 (existing in version) + W-4 (updated by discovery)
433431
assert len(updated_classes) == 2
434432

435433
# Find and verify the updated W-4 class (JSON Schema format)
@@ -440,7 +438,7 @@ def test_discovery_updates_existing_configuration(
440438
)
441439
assert len(w4_class["properties"]) == 3
442440

443-
# Verify I-9 class is still present (JSON Schema format)
441+
# Verify I-9 class is still present from the version (JSON Schema format)
444442
i9_class = next(cls for cls in updated_classes if cls["$id"] == "i9")
445443
assert i9_class["description"] == "Employment Eligibility Verification"
446444

0 commit comments

Comments
 (0)