Skip to content

Commit e57de39

Browse files
devin-ai-integration[bot]bot_apk
andcommitted
feat: make max_done_report_age_hours configurable via user config
Add max_done_report_age_hours to connector spec (integer, min=0, max=24, default=0). When 0 (default), DONE reports are never reused — new reports are always created. When set to a positive value, DONE reports younger than that threshold are reused. IN_PROGRESS/IN_QUEUE/CANCELLED/FATAL reports are unaffected by this setting. Updated 48 unit tests covering all configurable threshold scenarios. Co-Authored-By: bot_apk <apk@cognition.ai>
1 parent 6747652 commit e57de39

4 files changed

Lines changed: 173 additions & 34 deletions

File tree

airbyte-integrations/connectors/source-amazon-seller-partner/components.py

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -569,10 +569,11 @@ class ReportCreationRequester(HttpRequester):
569569
1. Before creating a new report via POST, call GET /reports to check for existing reports
570570
of the same reportType, matching date range, and marketplaceIds.
571571
2. If matching reports are found, select the most recently created one (by createdTime).
572-
DONE reports older than 1 day are skipped since their data snapshot may be stale and
573-
the report document may have expired. Status filtering (e.g. CANCELLED, FATAL) is NOT
574-
done here — the manifest's status_mapping is the single source of truth for which
575-
statuses are retryable, skippable, or terminal.
572+
DONE reports older than `max_done_report_age_hours` (from config, default 0) are skipped
573+
since their data snapshot may be stale and the report document may have expired. When the
574+
config value is 0 (default), DONE reports are never reused. Status filtering (e.g.
575+
CANCELLED, FATAL) is NOT done here — the manifest's status_mapping is the single source
576+
of truth for which statuses are retryable, skippable, or terminal.
576577
3. If no suitable report is found, fall through to super().send_request() to create a new one.
577578
"""
578579

@@ -682,7 +683,8 @@ def _find_existing_report(
682683
# DONE reports older than this threshold are considered stale (data snapshot may be outdated,
683684
# report document may have expired). IN_QUEUE and IN_PROGRESS reports are not subject to
684685
# this check because they are still being processed.
685-
max_done_report_age_hours = 24
686+
# When max_done_report_age_hours is 0 (default), DONE reports are never reused.
687+
max_done_report_age_hours = self.config.get("max_done_report_age_hours", 0)
686688
now = ab_datetime_now()
687689

688690
# Collect all matching candidates, then pick the most recently created one
@@ -699,22 +701,31 @@ def _find_existing_report(
699701
if not self._date_ranges_match(requested_start, requested_end, report_start, report_end):
700702
continue
701703

702-
# Check createdTime freshness for DONE reports
704+
# Check createdTime freshness for DONE reports.
705+
# When max_done_report_age_hours is 0 (default), DONE reports are never reused.
703706
report_status = report.get("processingStatus", "")
704707
created_time_str = report.get("createdTime", "")
705-
if report_status == "DONE" and created_time_str:
706-
try:
707-
created_time = ab_datetime_parse(created_time_str)
708-
age_hours = (now - created_time).total_seconds() / 3600
709-
if age_hours > max_done_report_age_hours:
710-
report_id = report.get("reportId", "")
711-
logger.info(
712-
f"Skipping stale DONE report {report_id} (created {age_hours:.1f}h ago) "
713-
f"for {report_type}. Will look for a newer one."
714-
)
715-
continue
716-
except (ValueError, TypeError):
717-
pass # If we can't parse createdTime, don't skip — still usable
708+
if report_status == "DONE":
709+
if max_done_report_age_hours == 0:
710+
report_id = report.get("reportId", "")
711+
logger.info(
712+
f"Skipping DONE report {report_id} for {report_type} "
713+
f"because max_done_report_age_hours is 0 (always create new reports)."
714+
)
715+
continue
716+
if created_time_str:
717+
try:
718+
created_time = ab_datetime_parse(created_time_str)
719+
age_hours = (now - created_time).total_seconds() / 3600
720+
if age_hours > max_done_report_age_hours:
721+
report_id = report.get("reportId", "")
722+
logger.info(
723+
f"Skipping stale DONE report {report_id} (created {age_hours:.1f}h ago) "
724+
f"for {report_type}. Will look for a newer one."
725+
)
726+
continue
727+
except (ValueError, TypeError):
728+
pass # If we can't parse createdTime, don't skip — still usable
718729

719730
# Parse createdTime to compare candidates
720731
report_created_time = None

airbyte-integrations/connectors/source-amazon-seller-partner/manifest.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,18 @@ spec:
242242
default: false
243243
order: 13
244244
airbyte_hidden: true
245+
max_done_report_age_hours:
246+
title: "Max Age of Completed Reports to Reuse (Hours)"
247+
type: integer
248+
description: "When the connector finds an existing completed (DONE) report
249+
matching the same date range and marketplace, it can reuse that report
250+
instead of creating a new one. This setting controls how old (in hours)
251+
a completed report can be and still be reused. Set to 0 to always create
252+
new reports. Maximum is 24 hours."
253+
default: 0
254+
minimum: 0
255+
maximum: 24
256+
order: 14
245257
financial_events_step:
246258
title: Financial Events Step Size
247259
description: 'The time window size for fetching financial events data in chunks

airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/integration/config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,5 +48,9 @@ def with_asin_granularity(self, granularity: str) -> ConfigBuilder:
4848
self._config["sales_and_traffic_report_asin_granularity"] = granularity
4949
return self
5050

51+
def with_max_done_report_age_hours(self, hours: int) -> ConfigBuilder:
52+
self._config["max_done_report_age_hours"] = hours
53+
return self
54+
5155
def build(self) -> Dict[str, str]:
5256
return self._config

airbyte-integrations/connectors/source-amazon-seller-partner/unit_tests/test_report_creation_requester.py

Lines changed: 127 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,13 @@ def _make_report(
4646
return report
4747

4848

49-
def _make_requester() -> ReportCreationRequester:
49+
def _make_requester(config: dict = None) -> ReportCreationRequester:
5050
"""Create a ReportCreationRequester instance with mocked internals for unit testing."""
5151
requester = object.__new__(ReportCreationRequester)
5252

53+
# Set config — defaults to empty dict (max_done_report_age_hours defaults to 0 in component)
54+
requester.config = config or {}
55+
5356
# Mock _request_body_json to return a controlled body
5457
requester._request_body_json = MagicMock()
5558

@@ -270,7 +273,7 @@ class TestFindExistingReport:
270273

271274
def test_returns_matching_report(self):
272275
requester = _make_requester()
273-
matching_report = _make_report(report_id="rpt-match", marketplace_ids=["ATVPDKIKX0DER"])
276+
matching_report = _make_report(report_id="rpt-match", status="IN_PROGRESS", marketplace_ids=["ATVPDKIKX0DER"])
274277
get_response = _make_get_reports_response([matching_report])
275278
requester._http_client.send_request.return_value = (None, get_response)
276279

@@ -476,9 +479,28 @@ def test_reuses_in_queue_report(self):
476479
assert result is not None
477480
assert result.json()["reportId"] == "rpt-in-queue"
478481

482+
def test_skips_done_report_when_max_age_is_zero(self):
483+
"""When max_done_report_age_hours is 0 (default), DONE reports should never be reused."""
484+
requester = _make_requester() # default config, max_done_report_age_hours=0
485+
now = datetime.now(tz=timezone.utc)
486+
report = _make_report(report_id="rpt-done", status="DONE", created_time=now.isoformat())
487+
get_response = _make_get_reports_response([report])
488+
requester._http_client.send_request.return_value = (None, get_response)
489+
490+
result = requester._find_existing_report(
491+
stream_state=None,
492+
stream_slice=None,
493+
report_type="GET_AMAZON_FULFILLED_SHIPMENTS_DATA_GENERAL",
494+
requested_start="2023-01-01T00:00:00Z",
495+
requested_end="2023-01-30T00:00:00Z",
496+
requested_marketplace_ids=["ATVPDKIKX0DER"],
497+
)
498+
499+
assert result is None
500+
479501
def test_skips_stale_done_report(self):
480-
"""DONE reports older than 24h should be skipped."""
481-
requester = _make_requester()
502+
"""DONE reports older than max_done_report_age_hours should be skipped."""
503+
requester = _make_requester(config={"max_done_report_age_hours": 24})
482504
# Report created 48 hours ago
483505
old_time = datetime(2023, 1, 1, 0, 0, 0, tzinfo=timezone.utc).isoformat()
484506
report = _make_report(report_id="rpt-stale", status="DONE", created_time=old_time)
@@ -496,9 +518,9 @@ def test_skips_stale_done_report(self):
496518

497519
assert result is None
498520

499-
def test_reuses_fresh_done_report(self):
500-
"""DONE reports created within 24h should be reusable."""
501-
requester = _make_requester()
521+
def test_reuses_fresh_done_report_when_max_age_set(self):
522+
"""DONE reports created within max_done_report_age_hours should be reusable."""
523+
requester = _make_requester(config={"max_done_report_age_hours": 24})
502524
# Use a very recent timestamp
503525
now = datetime.now(tz=timezone.utc)
504526
fresh_time = now.isoformat()
@@ -519,8 +541,8 @@ def test_reuses_fresh_done_report(self):
519541
assert result.json()["reportId"] == "rpt-fresh"
520542

521543
def test_in_progress_report_not_subject_to_staleness(self):
522-
"""IN_PROGRESS reports should not be subject to the 24h staleness check."""
523-
requester = _make_requester()
544+
"""IN_PROGRESS reports should not be subject to the staleness check even when max_age is 0."""
545+
requester = _make_requester() # default config, max_done_report_age_hours=0
524546
old_time = datetime(2023, 1, 1, 0, 0, 0, tzinfo=timezone.utc).isoformat()
525547
report = _make_report(report_id="rpt-old-ip", status="IN_PROGRESS", created_time=old_time)
526548
get_response = _make_get_reports_response([report])
@@ -540,7 +562,7 @@ def test_in_progress_report_not_subject_to_staleness(self):
540562

541563
def test_returns_latest_report_by_created_time(self):
542564
"""When multiple matching reports exist, return the most recently created one."""
543-
requester = _make_requester()
565+
requester = _make_requester(config={"max_done_report_age_hours": 24})
544566
now = datetime.now(tz=timezone.utc)
545567
older_time = (now - timedelta(hours=2)).isoformat()
546568
newer_time = (now - timedelta(hours=1)).isoformat()
@@ -565,7 +587,7 @@ def test_returns_latest_report_by_created_time(self):
565587

566588
def test_returns_latest_in_progress_over_older_done(self):
567589
"""A newer IN_PROGRESS report should be preferred over an older DONE report."""
568-
requester = _make_requester()
590+
requester = _make_requester(config={"max_done_report_age_hours": 24})
569591
now = datetime.now(tz=timezone.utc)
570592
done_time = (now - timedelta(hours=10)).isoformat()
571593
ip_time = (now - timedelta(hours=1)).isoformat()
@@ -587,9 +609,9 @@ def test_returns_latest_in_progress_over_older_done(self):
587609
assert result is not None
588610
assert result.json()["reportId"] == "rpt-ip"
589611

590-
def test_done_report_without_created_time_is_still_usable(self):
591-
"""DONE reports without createdTime should still be reusable (no staleness check)."""
592-
requester = _make_requester()
612+
def test_done_report_without_created_time_is_still_usable_when_max_age_set(self):
613+
"""DONE reports without createdTime should still be reusable when max_age > 0."""
614+
requester = _make_requester(config={"max_done_report_age_hours": 24})
593615
report = _make_report(report_id="rpt-no-time", status="DONE")
594616
get_response = _make_get_reports_response([report])
595617
requester._http_client.send_request.return_value = (None, get_response)
@@ -606,6 +628,96 @@ def test_done_report_without_created_time_is_still_usable(self):
606628
assert result is not None
607629
assert result.json()["reportId"] == "rpt-no-time"
608630

631+
def test_done_report_without_created_time_skipped_when_max_age_is_zero(self):
632+
"""DONE reports without createdTime should be skipped when max_age is 0."""
633+
requester = _make_requester() # default config, max_done_report_age_hours=0
634+
report = _make_report(report_id="rpt-no-time", status="DONE")
635+
get_response = _make_get_reports_response([report])
636+
requester._http_client.send_request.return_value = (None, get_response)
637+
638+
result = requester._find_existing_report(
639+
stream_state=None,
640+
stream_slice=None,
641+
report_type="GET_AMAZON_FULFILLED_SHIPMENTS_DATA_GENERAL",
642+
requested_start="2023-01-01T00:00:00Z",
643+
requested_end="2023-01-30T00:00:00Z",
644+
requested_marketplace_ids=["ATVPDKIKX0DER"],
645+
)
646+
647+
assert result is None
648+
649+
def test_reuses_done_report_with_custom_max_age(self):
650+
"""DONE report created 3h ago should be reused when max_done_report_age_hours=6."""
651+
requester = _make_requester(config={"max_done_report_age_hours": 6})
652+
now = datetime.now(tz=timezone.utc)
653+
report = _make_report(
654+
report_id="rpt-3h",
655+
status="DONE",
656+
created_time=(now - timedelta(hours=3)).isoformat(),
657+
)
658+
get_response = _make_get_reports_response([report])
659+
requester._http_client.send_request.return_value = (None, get_response)
660+
661+
result = requester._find_existing_report(
662+
stream_state=None,
663+
stream_slice=None,
664+
report_type="GET_AMAZON_FULFILLED_SHIPMENTS_DATA_GENERAL",
665+
requested_start="2023-01-01T00:00:00Z",
666+
requested_end="2023-01-30T00:00:00Z",
667+
requested_marketplace_ids=["ATVPDKIKX0DER"],
668+
)
669+
670+
assert result is not None
671+
assert result.json()["reportId"] == "rpt-3h"
672+
673+
def test_skips_done_report_exceeding_custom_max_age(self):
674+
"""DONE report created 10h ago should be skipped when max_done_report_age_hours=6."""
675+
requester = _make_requester(config={"max_done_report_age_hours": 6})
676+
now = datetime.now(tz=timezone.utc)
677+
report = _make_report(
678+
report_id="rpt-10h",
679+
status="DONE",
680+
created_time=(now - timedelta(hours=10)).isoformat(),
681+
)
682+
get_response = _make_get_reports_response([report])
683+
requester._http_client.send_request.return_value = (None, get_response)
684+
685+
result = requester._find_existing_report(
686+
stream_state=None,
687+
stream_slice=None,
688+
report_type="GET_AMAZON_FULFILLED_SHIPMENTS_DATA_GENERAL",
689+
requested_start="2023-01-01T00:00:00Z",
690+
requested_end="2023-01-30T00:00:00Z",
691+
requested_marketplace_ids=["ATVPDKIKX0DER"],
692+
)
693+
694+
assert result is None
695+
696+
def test_default_config_skips_done_but_reuses_in_progress(self):
697+
"""With default config (max_age=0), DONE reports are skipped but IN_PROGRESS is reused."""
698+
requester = _make_requester() # default config, max_done_report_age_hours=0
699+
now = datetime.now(tz=timezone.utc)
700+
done_report = _make_report(report_id="rpt-done", status="DONE", created_time=now.isoformat())
701+
ip_report = _make_report(
702+
report_id="rpt-ip",
703+
status="IN_PROGRESS",
704+
created_time=(now - timedelta(hours=1)).isoformat(),
705+
)
706+
get_response = _make_get_reports_response([done_report, ip_report])
707+
requester._http_client.send_request.return_value = (None, get_response)
708+
709+
result = requester._find_existing_report(
710+
stream_state=None,
711+
stream_slice=None,
712+
report_type="GET_AMAZON_FULFILLED_SHIPMENTS_DATA_GENERAL",
713+
requested_start="2023-01-01T00:00:00Z",
714+
requested_end="2023-01-30T00:00:00Z",
715+
requested_marketplace_ids=["ATVPDKIKX0DER"],
716+
)
717+
718+
assert result is not None
719+
assert result.json()["reportId"] == "rpt-ip"
720+
609721

610722
class TestSendRequest:
611723
"""Tests for ReportCreationRequester.send_request integration."""
@@ -618,7 +730,7 @@ def test_reuses_existing_report_when_found(self):
618730
"dataEndTime": "2023-01-30T00:00:00Z",
619731
"marketplaceIds": ["ATVPDKIKX0DER"],
620732
}
621-
matching_report = _make_report(report_id="rpt-existing")
733+
matching_report = _make_report(report_id="rpt-existing", status="IN_PROGRESS")
622734
get_response = _make_get_reports_response([matching_report])
623735
requester._http_client.send_request.return_value = (None, get_response)
624736

0 commit comments

Comments
 (0)