@@ -603,3 +603,215 @@ def test_final_state_cursor_skips_retention_check_and_uses_incremental():
603603
604604 records = get_records (source , _CONFIG , configured_catalog , state )
605605 assert len (records ) == 1
606+
607+
608+ _PARENT_CHILD_MANIFEST : dict = {
609+ "version" : "6.0.0" ,
610+ "type" : "DeclarativeSource" ,
611+ "check" : {"type" : "CheckStream" , "stream_names" : ["ChildStream" ]},
612+ "definitions" : {
613+ "ParentStream" : {
614+ "type" : "StateDelegatingStream" ,
615+ "name" : "ParentStream" ,
616+ "full_refresh_stream" : {
617+ "type" : "DeclarativeStream" ,
618+ "name" : "ParentStream" ,
619+ "primary_key" : [],
620+ "schema_loader" : {
621+ "type" : "InlineSchemaLoader" ,
622+ "schema" : {
623+ "$schema" : "http://json-schema.org/schema#" ,
624+ "properties" : {},
625+ "type" : "object" ,
626+ },
627+ },
628+ "retriever" : {
629+ "type" : "SimpleRetriever" ,
630+ "requester" : {
631+ "type" : "HttpRequester" ,
632+ "url_base" : "https://api.test.com" ,
633+ "path" : "/parents" ,
634+ "http_method" : "GET" ,
635+ },
636+ "record_selector" : {
637+ "type" : "RecordSelector" ,
638+ "extractor" : {"type" : "DpathExtractor" , "field_path" : []},
639+ },
640+ },
641+ "incremental_sync" : {
642+ "type" : "DatetimeBasedCursor" ,
643+ "start_datetime" : {
644+ "datetime" : "{{ format_datetime(config['start_date'], '%Y-%m-%d') }}"
645+ },
646+ "end_datetime" : {"datetime" : "{{ now_utc().strftime('%Y-%m-%d') }}" },
647+ "datetime_format" : "%Y-%m-%d" ,
648+ "cursor_datetime_formats" : ["%Y-%m-%d" , "%Y-%m-%dT%H:%M:%S" ],
649+ "cursor_field" : "updated_at" ,
650+ },
651+ },
652+ "incremental_stream" : {
653+ "type" : "DeclarativeStream" ,
654+ "name" : "ParentStream" ,
655+ "primary_key" : [],
656+ "schema_loader" : {
657+ "type" : "InlineSchemaLoader" ,
658+ "schema" : {
659+ "$schema" : "http://json-schema.org/schema#" ,
660+ "properties" : {},
661+ "type" : "object" ,
662+ },
663+ },
664+ "retriever" : {
665+ "type" : "SimpleRetriever" ,
666+ "requester" : {
667+ "type" : "HttpRequester" ,
668+ "url_base" : "https://api.test.com" ,
669+ "path" : "/parents_incremental" ,
670+ "http_method" : "GET" ,
671+ },
672+ "record_selector" : {
673+ "type" : "RecordSelector" ,
674+ "extractor" : {"type" : "DpathExtractor" , "field_path" : []},
675+ },
676+ },
677+ "incremental_sync" : {
678+ "type" : "DatetimeBasedCursor" ,
679+ "start_datetime" : {
680+ "datetime" : "{{ format_datetime(config['start_date'], '%Y-%m-%d') }}"
681+ },
682+ "end_datetime" : {"datetime" : "{{ now_utc().strftime('%Y-%m-%d') }}" },
683+ "datetime_format" : "%Y-%m-%d" ,
684+ "cursor_datetime_formats" : ["%Y-%m-%d" , "%Y-%m-%dT%H:%M:%S" ],
685+ "cursor_granularity" : "P1D" ,
686+ "step" : "P15D" ,
687+ "cursor_field" : "updated_at" ,
688+ "start_time_option" : {
689+ "type" : "RequestOption" ,
690+ "field_name" : "start" ,
691+ "inject_into" : "request_parameter" ,
692+ },
693+ "end_time_option" : {
694+ "type" : "RequestOption" ,
695+ "field_name" : "end" ,
696+ "inject_into" : "request_parameter" ,
697+ },
698+ },
699+ },
700+ },
701+ "ChildStream" : {
702+ "type" : "DeclarativeStream" ,
703+ "name" : "ChildStream" ,
704+ "primary_key" : [],
705+ "schema_loader" : {
706+ "type" : "InlineSchemaLoader" ,
707+ "schema" : {
708+ "$schema" : "http://json-schema.org/schema#" ,
709+ "properties" : {},
710+ "type" : "object" ,
711+ },
712+ },
713+ "retriever" : {
714+ "type" : "SimpleRetriever" ,
715+ "requester" : {
716+ "type" : "HttpRequester" ,
717+ "url_base" : "https://api.test.com" ,
718+ "path" : "/children/{{ stream_slice.parent_id }}" ,
719+ "http_method" : "GET" ,
720+ },
721+ "record_selector" : {
722+ "type" : "RecordSelector" ,
723+ "extractor" : {"type" : "DpathExtractor" , "field_path" : []},
724+ },
725+ "partition_router" : {
726+ "type" : "SubstreamPartitionRouter" ,
727+ "parent_stream_configs" : [
728+ {
729+ "stream" : "#/definitions/ParentStream" ,
730+ "parent_key" : "id" ,
731+ "partition_field" : "parent_id" ,
732+ "incremental_dependency" : True ,
733+ }
734+ ],
735+ },
736+ },
737+ "incremental_sync" : {
738+ "type" : "DatetimeBasedCursor" ,
739+ "start_datetime" : {
740+ "datetime" : "{{ format_datetime(config['start_date'], '%Y-%m-%d') }}"
741+ },
742+ "end_datetime" : {"datetime" : "{{ now_utc().strftime('%Y-%m-%d') }}" },
743+ "datetime_format" : "%Y-%m-%d" ,
744+ "cursor_datetime_formats" : ["%Y-%m-%d" ],
745+ "cursor_field" : "updated_at" ,
746+ },
747+ },
748+ },
749+ "streams" : [{"$ref" : "#/definitions/ChildStream" }],
750+ "spec" : {
751+ "connection_specification" : {
752+ "$schema" : "http://json-schema.org/draft-07/schema#" ,
753+ "type" : "object" ,
754+ "required" : [],
755+ "properties" : {},
756+ "additionalProperties" : True ,
757+ },
758+ "documentation_url" : "https://example.org" ,
759+ "type" : "Spec" ,
760+ },
761+ }
762+
763+
764+ def _create_parent_child_manifest_with_retention_period (
765+ api_retention_period : str ,
766+ ) -> dict :
767+ manifest = copy .deepcopy (_PARENT_CHILD_MANIFEST )
768+ manifest ["definitions" ]["ParentStream" ]["api_retention_period" ] = api_retention_period
769+ return manifest
770+
771+
772+ @freezegun .freeze_time ("2024-07-15" )
773+ def test_parent_state_delegating_stream_retention_falls_back_to_full_refresh ():
774+ """When parent StateDelegatingStream has old cursor in child state, retention triggers full refresh for parent."""
775+ manifest = _create_parent_child_manifest_with_retention_period ("P7D" )
776+
777+ with HttpMocker () as http_mocker :
778+ http_mocker .get (
779+ HttpRequest (url = "https://api.test.com/parents" ),
780+ HttpResponse (
781+ body = json .dumps (
782+ [{"id" : 1 , "name" : "parent_1" , "updated_at" : "2024-07-14" }]
783+ )
784+ ),
785+ )
786+ http_mocker .get (
787+ HttpRequest (url = "https://api.test.com/children/1" ),
788+ HttpResponse (
789+ body = json .dumps (
790+ [{"id" : 10 , "name" : "child_1" , "updated_at" : "2024-07-14" }]
791+ )
792+ ),
793+ )
794+
795+ state = [
796+ AirbyteStateMessage (
797+ type = AirbyteStateType .STREAM ,
798+ stream = AirbyteStreamState (
799+ stream_descriptor = StreamDescriptor (
800+ name = "ChildStream" , namespace = None
801+ ),
802+ stream_state = AirbyteStateBlob (
803+ use_global_cursor = False ,
804+ state = {"updated_at" : "2024-07-14" },
805+ states = [],
806+ parent_state = {"ParentStream" : {"updated_at" : "2024-06-01" }},
807+ lookback_window = 0 ,
808+ ),
809+ ),
810+ )
811+ ]
812+ source = ConcurrentDeclarativeSource (
813+ source_config = manifest , config = _CONFIG , catalog = None , state = state
814+ )
815+ configured_catalog = create_configured_catalog (source , _CONFIG )
816+ records = get_records (source , _CONFIG , configured_catalog , state )
817+ assert len (records ) == 1
0 commit comments