Skip to content

Commit ec27c18

Browse files
author
Bob Strahan
committed
feat(idp_cli): add.uploading marker to prevent resolver race condition
1 parent b1b4310 commit ec27c18

2 files changed

Lines changed: 45 additions & 1 deletion

File tree

  • lib/idp_cli_pkg/idp_cli
  • nested/appsync/src/lambda/test_set_resolver

lib/idp_cli_pkg/idp_cli/cli.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2432,6 +2432,15 @@ def generate_manifest(
24322432
f"[yellow]Warning: Could not clear existing files: {e}[/yellow]"
24332433
)
24342434

2435+
# Place .uploading marker to prevent resolver race condition
2436+
# The test set resolver's auto-detection skips folders with this marker,
2437+
# preventing premature validation before all files are uploaded.
2438+
# See: https://github.com/aws-solutions-library-samples/accelerated-intelligent-document-processing-on-aws/issues/193
2439+
marker_key = f"{test_set}/.uploading"
2440+
s3_client.put_object(
2441+
Bucket=test_set_bucket, Key=marker_key, Body=b"upload-in-progress"
2442+
)
2443+
24352444
# Upload input documents
24362445
for i, doc in enumerate(documents):
24372446
doc_path = doc["document_path"]
@@ -2487,6 +2496,15 @@ def generate_manifest(
24872496
console.print()
24882497

24892498
if test_set:
2499+
# Remove .uploading marker now that all files are uploaded
2500+
marker_key = f"{test_set}/.uploading"
2501+
try:
2502+
s3_client.delete_object(Bucket=test_set_bucket, Key=marker_key)
2503+
except Exception as e:
2504+
console.print(
2505+
f"[yellow]Warning: Could not remove upload marker: {e}[/yellow]"
2506+
)
2507+
24902508
# Auto-register test set in tracking table
24912509
from idp_cli.stack_info import StackInfo
24922510

@@ -2929,6 +2947,12 @@ def _create_test_set_from_manifest(
29292947
except Exception as e:
29302948
console.print(f"[yellow]Warning: Could not clear existing files: {e}[/yellow]")
29312949

2950+
# Place .uploading marker to prevent resolver race condition (issue #193)
2951+
marker_key = f"{test_set_name}/.uploading"
2952+
s3_client.put_object(
2953+
Bucket=test_set_bucket, Key=marker_key, Body=b"upload-in-progress"
2954+
)
2955+
29322956
# Copy input files
29332957
for _, row in df.iterrows():
29342958
source_path = row["document_path"]
@@ -2966,6 +2990,12 @@ def _create_test_set_from_manifest(
29662990
s3_key = f"{test_set_name}/baseline/{filename}/{rel_path}"
29672991
s3_client.upload_file(baseline_file, test_set_bucket, s3_key)
29682992

2993+
# Remove .uploading marker now that all files are uploaded (issue #193)
2994+
try:
2995+
s3_client.delete_object(Bucket=test_set_bucket, Key=marker_key)
2996+
except Exception as e:
2997+
console.print(f"[yellow]Warning: Could not remove upload marker: {e}[/yellow]")
2998+
29692999
console.print(
29703000
f"[green]✓ Test set '{test_set_name}' created with {len(df)} files[/green]"
29713001
)

nested/appsync/src/lambda/test_set_resolver/index.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -413,8 +413,22 @@ def get_test_sets():
413413
return result
414414

415415
def _is_valid_test_set_structure(s3_client, bucket, prefix):
416-
"""Check if prefix contains input/ and baseline/ folders"""
416+
"""Check if prefix contains input/ and baseline/ folders.
417+
418+
Also checks for a .uploading marker file which indicates the CLI is still
419+
uploading files. This prevents a race condition where the resolver auto-detects
420+
and validates a test set before all files (especially baselines) are uploaded.
421+
See: https://github.com/aws-solutions-library-samples/accelerated-intelligent-document-processing-on-aws/issues/193
422+
"""
417423
try:
424+
# Check for upload-in-progress marker
425+
try:
426+
s3_client.head_object(Bucket=bucket, Key=f"{prefix}/.uploading")
427+
logger.info(f"Skipping {prefix} - upload in progress (.uploading marker found)")
428+
return False
429+
except Exception:
430+
pass # No marker = not uploading, proceed with validation
431+
418432
# Check for input/ folder
419433
input_response = s3_client.list_objects_v2(
420434
Bucket=bucket,

0 commit comments

Comments
 (0)