diff --git a/tdrs-backend/tdpservice/conftest.py b/tdrs-backend/tdpservice/conftest.py index 29850dcf38..7a4461b343 100644 --- a/tdrs-backend/tdpservice/conftest.py +++ b/tdrs-backend/tdpservice/conftest.py @@ -438,6 +438,12 @@ def system_user(): register(OwaspZapScanFactory) +@pytest.fixture(autouse=True) +def disable_keycloak_sync(settings): + """Keep tests from making live Keycloak calls unless explicitly enabled.""" + settings.KEYCLOAK_SYNC_ENABLED = False + + @pytest.fixture(autouse=True) def change_test_dir(monkeypatch, tmp_path): """Change the working directory to a temporary directory for all tests.""" diff --git a/tdrs-backend/tdpservice/core/test/test_admin.py b/tdrs-backend/tdpservice/core/test/test_admin.py index 770db547df..b441a7aa90 100644 --- a/tdrs-backend/tdpservice/core/test/test_admin.py +++ b/tdrs-backend/tdpservice/core/test/test_admin.py @@ -81,7 +81,6 @@ def test_user_change_request_approve(self): requested_value="NewAdmin", status=UserChangeRequestStatus.PENDING, ) - print(f"Change Request ID: {change_request.id}") # Approve the change request response = client.post( f"/admin/users/userchangerequest/{change_request.id}/approve/" diff --git a/tdrs-backend/tdpservice/parsers/test/conftest.py b/tdrs-backend/tdpservice/parsers/test/conftest.py index 6422ce7e21..eefd7ed9af 100644 --- a/tdrs-backend/tdpservice/parsers/test/conftest.py +++ b/tdrs-backend/tdpservice/parsers/test/conftest.py @@ -324,6 +324,7 @@ def tribal_section_1_inconsistency_file(stt_user, stt): stt, "Active Case Data", DataFile.ProgramType.TRIBAL, + year=2020, ) @@ -371,7 +372,7 @@ def tanf_section_4_file_with_errors(stt_user, stt): stt_user, stt, "Stratum Data", - DataFile.ProgramType.TRIBAL, + DataFile.ProgramType.TANF, year=2022, ) @@ -650,10 +651,9 @@ def m3_go_cat2_invalid_68_69_file(): program_type=DataFile.ProgramType.SSP, file__data=( b"HEADER20234A24 SSP1ED\n" - b"M12023101111111112721401400351021331100273000000000000000105400000000000000000000000000000000" - b"00222222000000002229 \n" - b"M320231011111111127120110615WTTTP99B#222122222043011000000004201001013333333330000000110000009999" - b"8888\n" + b"M120231011111111127214014003510213311002730000000000000001054" + b"0000000000000000000000000000000000222222000000002229 \n" + b"M320231011111111127120110615WTTTP99B#2221222220430010000000042010010133333333300000001100000099998888\n" b"TRAILER0000002 " ), ) @@ -796,7 +796,7 @@ def tanf_s4_exact_dup_file(): section="Stratum Data", file__name="s4_exact_duplicate.txt", file__section="Stratum Data", - program_type=DataFile.ProgramType.SSP, + program_type=DataFile.ProgramType.TANF, file__data=( b"HEADER20214S06 TAN1 D\n" b"T720214101006853700680540068454103000312400037850003180104000347400036460003583106" diff --git a/tdrs-backend/tdpservice/parsers/test/integration/test_go_parse.py b/tdrs-backend/tdpservice/parsers/test/integration/test_go_parse.py index 0e5be436db..e5d4c469e7 100644 --- a/tdrs-backend/tdpservice/parsers/test/integration/test_go_parse.py +++ b/tdrs-backend/tdpservice/parsers/test/integration/test_go_parse.py @@ -16,9 +16,7 @@ ParserError, ParserErrorCategoryChoices, ) - -# TODO: uncomment when fra tests implemented -# from tdpservice.search_indexes.models.fra import TANF_Exiter1 +from tdpservice.search_indexes.models.fra import TANF_Exiter1 from tdpservice.search_indexes.models.ssp import ( SSP_M1, SSP_M2, @@ -37,17 +35,15 @@ TANF_T6, TANF_T7, ) - -# TODO: uncomment when tribal tests implemented -# from tdpservice.search_indexes.models.tribal import ( -# Tribal_TANF_T1, -# Tribal_TANF_T2, -# Tribal_TANF_T3, -# Tribal_TANF_T4, -# Tribal_TANF_T5, -# Tribal_TANF_T6, -# Tribal_TANF_T7, -# ) +from tdpservice.search_indexes.models.tribal import ( + Tribal_TANF_T1, + Tribal_TANF_T2, + Tribal_TANF_T3, + Tribal_TANF_T4, + Tribal_TANF_T5, + Tribal_TANF_T6, + Tribal_TANF_T7, +) logger = logging.getLogger(__name__) @@ -100,16 +96,16 @@ def parse_datafile(dfs, datafile, timeout_seconds=GO_PARSE_TIMEOUT_SECONDS): class TestGoParse: """Tests for parse and validation flows.""" - # @pytest.fixture - # def parsed_small_correct_file(self, small_correct_file, dfs): - # """Return parsed small_correct_file and its DataFileSummary.""" - # small_correct_file.year = 2021 - # small_correct_file.quarter = "Q1" - # small_correct_file.save() + @pytest.fixture + def parsed_small_correct_file(self, small_correct_file, dfs): + """Return parsed small_correct_file and its DataFileSummary.""" + small_correct_file.year = 2021 + small_correct_file.quarter = "Q1" + small_correct_file.save() - # parse_datafile(dfs, small_correct_file) + parse_datafile(dfs, small_correct_file) - # return small_correct_file, dfs + return small_correct_file, dfs # @pytest.fixture # def parsed_bad_trailer_file(self, bad_trailer_file, dfs): @@ -136,197 +132,211 @@ class TestGoParse: # parser_errors = ParserError.objects.filter(file=bad_trailer_file_2) # return bad_trailer_file_2, dfs, parser_errors - # @pytest.mark.django_db(transaction=True) - # def test_small_correct_file_case_consistency_error( - # self, parsed_small_correct_file - # ): - # """Test case consistency errors are recorded for small_correct_file.""" - # datafile, _dfs = parsed_small_correct_file - # errors = ParserError.objects.filter(file=datafile).order_by("id") - # assert errors.count() == 2 - # assert errors.first().error_type == ParserErrorCategoryChoices.CASE_CONSISTENCY + @pytest.mark.django_db(transaction=True) + def test_go_small_correct_file_case_consistency_error( + self, parsed_small_correct_file + ): + """Test case consistency errors are recorded for small_correct_file.""" + datafile, _dfs = parsed_small_correct_file + errors = ParserError.objects.filter(file=datafile).order_by("id") + # Go parser generates cat4 error that the python parser misses + assert errors.count() == 3 + assert errors.first().error_type == ParserErrorCategoryChoices.CASE_CONSISTENCY - # @pytest.mark.django_db(transaction=True) - # def test_small_correct_file_case_aggregates_rejected( - # self, parsed_small_correct_file - # ): - # """Test case aggregates for rejected small_correct_file.""" - # _datafile, dfs = parsed_small_correct_file - # dfs.status = dfs.get_status() - # dfs.case_aggregates = aggregates.case_aggregates_by_month(dfs.datafile, dfs.status) - # assert dfs.case_aggregates == { - # "rejected": 1, - # "months": [ - # { - # "accepted_without_errors": "N/A", - # "accepted_with_errors": "N/A", - # "month": "Oct", - # }, - # { - # "accepted_without_errors": "N/A", - # "accepted_with_errors": "N/A", - # "month": "Nov", - # }, - # { - # "accepted_without_errors": "N/A", - # "accepted_with_errors": "N/A", - # "month": "Dec", - # }, - # ], - # } - # assert dfs.get_status() == DataFileSummary.Status.REJECTED + @pytest.mark.django_db(transaction=True) + def test_go_small_correct_file_case_aggregates_rejected( + self, parsed_small_correct_file + ): + """Test case aggregates for rejected small_correct_file.""" + _datafile, dfs = parsed_small_correct_file + dfs.status = dfs.get_status() + dfs.case_aggregates = aggregates.case_aggregates_by_month( + dfs.datafile, dfs.status + ) + assert dfs.case_aggregates == { + "rejected": 1, + "months": [ + { + "accepted_without_errors": "N/A", + "accepted_with_errors": "N/A", + "month": "Oct", + }, + { + "accepted_without_errors": "N/A", + "accepted_with_errors": "N/A", + "month": "Nov", + }, + { + "accepted_without_errors": "N/A", + "accepted_with_errors": "N/A", + "month": "Dec", + }, + ], + } + assert dfs.get_status() == DataFileSummary.Status.REJECTED - # @pytest.mark.django_db(transaction=True) - # def test_small_correct_file_no_records_created(self, parsed_small_correct_file): - # """Test that small_correct_file does not create records when rejected.""" - # _datafile, _dfs = parsed_small_correct_file - # assert TANF_T1.objects.count() == 0 + @pytest.mark.django_db(transaction=True) + def test_go_small_correct_file_no_records_created(self, parsed_small_correct_file): + """Test that small_correct_file does not create records when rejected.""" + _datafile, _dfs = parsed_small_correct_file + assert TANF_T1.objects.count() == 0 - # @pytest.mark.django_db(transaction=True) - # @pytest.mark.parametrize( - # "section, expected_message, expected_aggregates, save_dfs", - # [ - # ( - # "Closed Case Data", - # "Data does not match the expected layout for Closed Case Data.", - # { - # "rejected": 1, - # "months": [ - # { - # "accepted_without_errors": "N/A", - # "accepted_with_errors": "N/A", - # "month": "Oct", - # }, - # { - # "accepted_without_errors": "N/A", - # "accepted_with_errors": "N/A", - # "month": "Nov", - # }, - # { - # "accepted_without_errors": "N/A", - # "accepted_with_errors": "N/A", - # "month": "Dec", - # }, - # ], - # }, - # False, - # ), - # ( - # "SSP Active Case Data", - # "Data does not match the expected layout for " - # "SSP Active Case Data.", - # None, - # True, - # ), - # ], - # ) - # def test_go_parse_section_mismatch_variants( - # self, - # small_correct_file, - # dfs, - # section, - # expected_message, - # expected_aggregates, - # save_dfs, - # ): - # """Test parsing when file metadata does not match the raw data layout.""" - # small_correct_file.section = section - # small_correct_file.save() + @pytest.mark.django_db(transaction=True) + @pytest.mark.parametrize( + "program, section, expected_message, expected_aggregates, save_dfs, num_errors", + [ + ( + "TAN", + "Closed Case Data", + "Data does not match the expected layout for Closed Case Data.", + { + "rejected": 1, + "months": [ + { + "accepted_without_errors": "N/A", + "accepted_with_errors": "N/A", + "month": "Oct", + }, + { + "accepted_without_errors": "N/A", + "accepted_with_errors": "N/A", + "month": "Nov", + }, + { + "accepted_without_errors": "N/A", + "accepted_with_errors": "N/A", + "month": "Dec", + }, + ], + }, + False, + 2, + ), + ( + "SSP", + "Active Case Data", + # Go parser is explicitely looking for records prefixed with "M" + "Unknown record type was found.", + None, + True, + 2, + ), + ], + ) + def test_go_parse_section_mismatch_variants( + self, + small_correct_file, + dfs, + program, + section, + expected_message, + expected_aggregates, + save_dfs, + num_errors, + ): + """Test parsing when file metadata does not match the raw data layout.""" + small_correct_file.program_type = program + small_correct_file.section = section + small_correct_file.save() - # dfs.datafile = small_correct_file - # if save_dfs: - # dfs.save() + dfs.datafile = small_correct_file + if save_dfs: + dfs.save() - # parse_datafile(dfs, small_correct_file) + parse_datafile(dfs, small_correct_file) - # dfs.status = dfs.get_status() - # assert dfs.status == DataFileSummary.Status.REJECTED - # parser_errors = ParserError.objects.filter(file=small_correct_file) - # assert parser_errors.count() == 1 + dfs.status = dfs.get_status() + assert dfs.status == DataFileSummary.Status.REJECTED + parser_errors = ParserError.objects.filter(file=small_correct_file).order_by( + "-row_number" + ) + assert parser_errors.count() == num_errors - # if expected_aggregates is not None: - # dfs.case_aggregates = aggregates.case_aggregates_by_month( - # dfs.datafile, dfs.status - # ) - # assert dfs.case_aggregates == expected_aggregates + if expected_aggregates is not None: + dfs.case_aggregates = aggregates.case_aggregates_by_month( + dfs.datafile, dfs.status + ) + assert dfs.case_aggregates == expected_aggregates - # err = parser_errors.first() - # assert err.row_number == 1 - # assert err.error_type == ParserErrorCategoryChoices.PRE_CHECK - # assert err.error_message == expected_message - # assert err.content_type is None - # assert err.object_id is None + err = parser_errors.first() + assert ( + err.error_type == ParserErrorCategoryChoices.PRE_CHECK + or ParserErrorCategoryChoices.RECORD_PRE_CHECK + ) + assert err.error_message == expected_message + assert err.content_type is None + assert err.object_id is None - # @pytest.mark.django_db(transaction=True) - # @pytest.mark.parametrize( - # "fixture_name, updates, expected", - # [ - # ( - # "bad_test_file", - # {}, - # { - # "count": 1, - # "row_number": 1, - # "error_message": ( - # "HEADER: record length is 24 characters but must be 23." - # ), - # }, - # ), - # ( - # "bad_file_missing_header", - # {}, - # { - # "count": 2, - # "row_number": 1, - # "error_message": ( - # "HEADER: record length is 14 characters but must be 23." - # ), - # "status": DataFileSummary.Status.REJECTED, - # }, - # ), - # ( - # "bad_file_multiple_headers", - # {"year": 2024, "quarter": "Q1"}, - # { - # "count": 1, - # "row_number": 9, - # "error_message": "Multiple headers found.", - # "status": DataFileSummary.Status.REJECTED, - # }, - # ), - # ( - # "big_bad_test_file", - # {"year": 2022, "quarter": "Q1"}, - # { - # "count": 1, - # "row_number": 3679, - # "error_message": "Multiple headers found.", - # }, - # ), - # ], - # ) - # def test_go_parse_precheck_header_errors(self, request, fixture_name, updates, expected, dfs): - # """Test parsing failures triggered by header/pre-check validation.""" - # datafile = request.getfixturevalue(fixture_name) - # for field, value in updates.items(): - # setattr(datafile, field, value) - # if updates: - # datafile.save() + @pytest.mark.django_db(transaction=True) + @pytest.mark.parametrize( + "fixture_name, updates, expected", + [ + ( + "bad_test_file", + {}, + { + "count": 3, + "row_number": 1, + "error_message": ( + "HEADER: record length is 24 characters but must be 23." + ), + }, + ), + ( + "bad_file_missing_header", + {}, + { + "count": 2, + "row_number": 1, + "error_message": ("Your file does not start with a HEADER."), + "status": DataFileSummary.Status.REJECTED, + }, + ), + ( + "bad_file_multiple_headers", + {"year": 2024, "quarter": "Q1"}, + { + "count": 1, + "row_number": 9, + "error_message": "Multiple headers found.", + "status": DataFileSummary.Status.REJECTED, + }, + ), + ( + "big_bad_test_file", + {"year": 2022, "quarter": "Q1"}, + { + "count": 1, + "row_number": 3679, + "error_message": "Multiple headers found.", + }, + ), + ], + ) + def test_go_parse_precheck_header_errors( + self, request, fixture_name, updates, expected, dfs + ): + """Test parsing failures triggered by header/pre-check validation.""" + datafile = request.getfixturevalue(fixture_name) + for field, value in updates.items(): + setattr(datafile, field, value) + if updates: + datafile.save() - # parse_datafile(dfs, datafile) + parse_datafile(dfs, datafile) - # if expected.get("status"): - # assert dfs.get_status() == expected["status"] + if expected.get("status"): + assert dfs.get_status() == expected["status"] - # parser_errors = ParserError.objects.filter(file=datafile).order_by("id") - # assert parser_errors.count() == expected["count"] + parser_errors = ParserError.objects.filter(file=datafile).order_by("id") + assert parser_errors.count() == expected["count"] - # err = parser_errors.first() - # assert err.row_number == expected["row_number"] - # assert err.error_type == ParserErrorCategoryChoices.PRE_CHECK - # assert err.error_message == expected["error_message"] - # assert err.content_type is None - # assert err.object_id is None + err = parser_errors.first() + assert err.row_number == expected["row_number"] + assert err.error_type == ParserErrorCategoryChoices.PRE_CHECK + assert err.error_message == expected["error_message"] + assert err.content_type is None + assert err.object_id is None # @pytest.mark.django_db(transaction=True) # def test_bad_trailer_file_trailer_error(self, parsed_bad_trailer_file): @@ -488,29 +498,6 @@ def test_go_parse_big_file(self, big_file, dfs): assert TANF_T2.objects.filter(datafile=big_file).count() == 882 assert TANF_T3.objects.filter(datafile=big_file).count() == 1376 - @pytest.mark.django_db(transaction=True) - def test_go_parse_big_s1_file_with_rollback(self, big_s1_rollback_file, dfs): - """Test Go parser rollback when a second header is found mid-parse.""" - big_s1_rollback_file.year = 2023 - big_s1_rollback_file.quarter = "Q2" - big_s1_rollback_file.save() - - parse_datafile(dfs, big_s1_rollback_file) - - parser_errors = ParserError.objects.filter(file=big_s1_rollback_file) - assert parser_errors.count() == 1 - - err = parser_errors.first() - assert err.row_number == 13609 - assert err.error_type == ParserErrorCategoryChoices.PRE_CHECK - assert err.error_message == "Multiple headers found." - assert err.content_type is None - assert err.object_id is None - - assert TANF_T1.objects.filter(datafile=big_s1_rollback_file).count() == 0 - assert TANF_T2.objects.filter(datafile=big_s1_rollback_file).count() == 0 - assert TANF_T3.objects.filter(datafile=big_s1_rollback_file).count() == 0 - @pytest.mark.django_db(transaction=True) def test_go_parse_empty_file(self, empty_file, dfs): """Test parsing of empty_file.""" @@ -654,11 +641,20 @@ def test_go_parse_ssp_section1_datafile(self, ssp_section1_datafile, dfs): # We have a few more errors because the go parser separates the the OR'd # category1.validate_fieldYearMonth_with_headerYearQuarter(). and # category1.validateRptMonthYear() into separate checks. - assert parser_errors.count() == 31745 + assert parser_errors.count() == 31738 - assert SSP_M1.objects.count() == expected_m1_record_count - assert SSP_M2.objects.count() == expected_m2_record_count - assert SSP_M3.objects.count() == expected_m3_record_count + assert ( + SSP_M1.objects.filter(datafile=ssp_section1_datafile).count() + == expected_m1_record_count + ) + assert ( + SSP_M2.objects.filter(datafile=ssp_section1_datafile).count() + == expected_m2_record_count + ) + assert ( + SSP_M3.objects.filter(datafile=ssp_section1_datafile).count() + == expected_m3_record_count + ) @pytest.mark.django_db(transaction=True) def test_go_parse_tanf_section1_datafile(self, small_tanf_section1_datafile, dfs): @@ -693,9 +689,13 @@ def test_go_parse_tanf_section1_datafile(self, small_tanf_section1_datafile, dfs "rejected": 0, } - assert TANF_T2.objects.count() == 5 + assert ( + TANF_T2.objects.filter(datafile=small_tanf_section1_datafile).count() == 5 + ) - t2_models = TANF_T2.objects.all().order_by("CASE_NUMBER") + t2_models = TANF_T2.objects.filter( + datafile=small_tanf_section1_datafile + ).order_by("CASE_NUMBER") t2 = t2_models[0] assert t2.RPT_MONTH_YEAR == 202010 @@ -719,9 +719,15 @@ def test_go_parse_tanf_section1_datafile_obj_counts( parse_datafile(dfs, small_tanf_section1_datafile) - assert TANF_T1.objects.count() == 5 - assert TANF_T2.objects.count() == 5 - assert TANF_T3.objects.count() == 6 + assert ( + TANF_T1.objects.filter(datafile=small_tanf_section1_datafile).count() == 5 + ) + assert ( + TANF_T2.objects.filter(datafile=small_tanf_section1_datafile).count() == 5 + ) + assert ( + TANF_T3.objects.filter(datafile=small_tanf_section1_datafile).count() == 6 + ) @pytest.mark.django_db(transaction=True)() def test_go_parse_tanf_section1_datafile_t3s( @@ -733,9 +739,13 @@ def test_go_parse_tanf_section1_datafile_t3s( parse_datafile(dfs, small_tanf_section1_datafile) - assert TANF_T3.objects.count() == 6 + assert ( + TANF_T3.objects.filter(datafile=small_tanf_section1_datafile).count() == 6 + ) - t3_models = TANF_T3.objects.all().order_by("CASE_NUMBER") + t3_models = TANF_T3.objects.filter( + datafile=small_tanf_section1_datafile + ).order_by("CASE_NUMBER") t3_1 = t3_models[0] assert t3_1.RPT_MONTH_YEAR == 202010 assert t3_1.CASE_NUMBER == "11111111112" @@ -870,15 +880,15 @@ def test_go_parse_small_tanf_section2_file(self, small_tanf_section2_file, dfs): parse_datafile(dfs, small_tanf_section2_file) - assert TANF_T4.objects.all().count() == 1 - assert TANF_T5.objects.all().count() == 1 + assert TANF_T4.objects.filter(datafile=small_tanf_section2_file).count() == 1 + assert TANF_T5.objects.filter(datafile=small_tanf_section2_file).count() == 1 parser_errors = ParserError.objects.filter(file=small_tanf_section2_file) assert parser_errors.count() == 0 - t4 = TANF_T4.objects.first() - t5 = TANF_T5.objects.first() + t4 = TANF_T4.objects.filter(datafile=small_tanf_section2_file).first() + t5 = TANF_T5.objects.filter(datafile=small_tanf_section2_file).first() assert t4.DISPOSITION == 1 assert t4.REC_SUB_CC == 3 @@ -894,8 +904,8 @@ def test_go_parse_tanf_section2_file(self, tanf_section2_file, dfs): parse_datafile(dfs, tanf_section2_file) - assert TANF_T4.objects.all().count() == 223 - assert TANF_T5.objects.all().count() == 605 + assert TANF_T4.objects.filter(datafile=tanf_section2_file).count() == 223 + assert TANF_T5.objects.filter(datafile=tanf_section2_file).count() == 605 parser_errors = ParserError.objects.filter(file=tanf_section2_file).order_by( "row_number" @@ -928,11 +938,13 @@ def test_go_parse_tanf_section3_file(self, tanf_section3_file, dfs): assert dfs.get_status() == DataFileSummary.Status.ACCEPTED - assert TANF_T6.objects.all().count() == 3 + assert TANF_T6.objects.filter(datafile=tanf_section3_file).count() == 3 assert parser_errors.count() == 0 - t6_objs = TANF_T6.objects.all().order_by("NUM_APPROVED") + t6_objs = TANF_T6.objects.filter(datafile=tanf_section3_file).order_by( + "NUM_APPROVED" + ) first = t6_objs.first() second = t6_objs[1] @@ -968,9 +980,9 @@ def test_go_parse_tanf_section1_blanks_file( for error in parser_errors: assert error.error_type == ParserErrorCategoryChoices.VALUE_CONSISTENCY - t1 = TANF_T1.objects.first() - t2 = TANF_T2.objects.first() - t3 = TANF_T3.objects.first() + t1 = TANF_T1.objects.filter(datafile=tanf_section1_file_with_blanks).first() + t2 = TANF_T2.objects.filter(datafile=tanf_section1_file_with_blanks).first() + t3 = TANF_T3.objects.filter(datafile=tanf_section1_file_with_blanks).first() assert t1.FAMILY_SANC_ADULT is None assert t2.MARITAL_STATUS is None @@ -995,12 +1007,14 @@ def test_go_parse_tanf_section4_file(self, tanf_section4_file, dfs): assert dfs.get_status() == DataFileSummary.Status.ACCEPTED - assert TANF_T7.objects.all().count() == 18 + assert TANF_T7.objects.filter(datafile=tanf_section4_file).count() == 18 parser_errors = ParserError.objects.filter(file=tanf_section4_file) assert parser_errors.count() == 0 - t7_objs = TANF_T7.objects.all().order_by("FAMILIES_MONTH") + t7_objs = TANF_T7.objects.filter(datafile=tanf_section4_file).order_by( + "FAMILIES_MONTH" + ) first = t7_objs.first() sixth = t7_objs[5] @@ -1034,7 +1048,7 @@ def test_go_parse_bad_tanf_section4_file(self, bad_tanf_section4_file, dfs): assert dfs.get_status() == DataFileSummary.Status.REJECTED - assert TANF_T7.objects.all().count() == 0 + assert TANF_T7.objects.filter(datafile=bad_tanf_section4_file).count() == 0 parser_errors = ParserError.objects.filter( file=bad_tanf_section4_file @@ -1058,7 +1072,9 @@ def test_go_parse_ssp_section4_file(self, ssp_section4_file, dfs): parse_datafile(dfs, ssp_section4_file) - m7_objs = SSP_M7.objects.all().order_by("FAMILIES_MONTH") + m7_objs = SSP_M7.objects.filter(datafile=ssp_section4_file).order_by( + "FAMILIES_MONTH" + ) dfs.status = dfs.get_status() dfs.case_aggregates = aggregates.total_errors_by_month(dfs.datafile, dfs.status) @@ -1106,14 +1122,22 @@ def test_go_parse_ssp_section2_file(self, ssp_section2_file, dfs): assert dfs_case_aggregate["month"] in ["Oct", "Nov", "Dec"] assert dfs.get_status() == DataFileSummary.Status.PARTIALLY_ACCEPTED - m4_objs = SSP_M4.objects.all() - m5_objs = SSP_M5.objects.all() + m4_objs = SSP_M4.objects.filter(datafile=ssp_section2_file).order_by("id") + m5_objs = SSP_M5.objects.filter(datafile=ssp_section2_file).order_by( + "AMOUNT_EARNED_INCOME" + ) expected_m4_count = 231 expected_m5_count = 703 - assert SSP_M4.objects.count() == expected_m4_count - assert SSP_M5.objects.count() == expected_m5_count + assert ( + SSP_M4.objects.filter(datafile=ssp_section2_file).count() + == expected_m4_count + ) + assert ( + SSP_M5.objects.filter(datafile=ssp_section2_file).count() + == expected_m5_count + ) # Because the go parser inserts into tables in parallel we cant rely on ID ordering m4 = m4_objs.filter(DISPOSITION=1, REC_SUB_CC=3).first() @@ -1148,7 +1172,9 @@ def test_go_parse_ssp_section3_file(self, ssp_section3_file, dfs): assert dfs.get_status() == DataFileSummary.Status.ACCEPTED - m6_objs = SSP_M6.objects.all().order_by("RPT_MONTH_YEAR") + m6_objs = SSP_M6.objects.filter(datafile=ssp_section3_file).order_by( + "RPT_MONTH_YEAR" + ) assert m6_objs.count() == 3 parser_errors = ParserError.objects.filter(file=ssp_section3_file) @@ -1205,166 +1231,199 @@ def test_go_rpt_month_year_mismatch(self, header_datafile, dfs): err = parser_errors.get(error_message=msg) assert err.error_type == ParserErrorCategoryChoices.PRE_CHECK - # @pytest.mark.django_db(transaction=True)() - # def test_go_parse_tribal_section_1_file(self, tribal_section_1_file, dfs): - # """Test parsing Tribal TANF Section 1 submission.""" - # tribal_section_1_file.year = 2022 - # tribal_section_1_file.quarter = "Q1" - # tribal_section_1_file.save() - - # dfs.datafile = tribal_section_1_file - - # parse_datafile(dfs, tribal_section_1_file) - - # dfs.status = dfs.get_status() - # assert dfs.status == DataFileSummary.Status.ACCEPTED - # dfs.case_aggregates = aggregates.case_aggregates_by_month(dfs.datafile, dfs.status) - # assert dfs.case_aggregates == { - # "rejected": 0, - # "months": [ - # {"month": "Oct", "accepted_without_errors": 1, "accepted_with_errors": 0}, - # {"month": "Nov", "accepted_without_errors": 0, "accepted_with_errors": 0}, - # {"month": "Dec", "accepted_without_errors": 0, "accepted_with_errors": 0}, - # ], - # } - - # assert Tribal_TANF_T1.objects.all().count() == 1 - # assert Tribal_TANF_T2.objects.all().count() == 1 - # assert Tribal_TANF_T3.objects.all().count() == 2 - - # t1_objs = Tribal_TANF_T1.objects.all().order_by("CASH_AMOUNT") - # t2_objs = Tribal_TANF_T2.objects.all().order_by("MONTHS_FED_TIME_LIMIT") - # t3_objs = Tribal_TANF_T3.objects.all().order_by("EDUCATION_LEVEL") - - # t1 = t1_objs.first() - # t2 = t2_objs.first() - # t3 = t3_objs.last() - - # assert t1.CASH_AMOUNT == 502 - # assert t2.MONTHS_FED_TIME_LIMIT == " 0" - # assert t3.EDUCATION_LEVEL == "98" + @pytest.mark.django_db(transaction=True)() + def test_go_parse_tribal_section_1_file(self, tribal_section_1_file, dfs): + """Test parsing Tribal TANF Section 1 submission.""" + tribal_section_1_file.year = 2022 + tribal_section_1_file.quarter = "Q1" + tribal_section_1_file.save() - # @pytest.mark.django_db(transaction=True)() - # def test_go_parse_tribal_section_1_inconsistency_file( - # self, - # tribal_section_1_inconsistency_file, dfs - # ): - # """Test parsing inconsistent Tribal TANF Section 1 submission.""" - # parse_datafile(dfs, tribal_section_1_inconsistency_file) + dfs.datafile = tribal_section_1_file + + parse_datafile(dfs, tribal_section_1_file) + + dfs.status = dfs.get_status() + assert dfs.status == DataFileSummary.Status.ACCEPTED + dfs.case_aggregates = aggregates.case_aggregates_by_month( + dfs.datafile, dfs.status + ) + assert dfs.case_aggregates == { + "rejected": 0, + "months": [ + { + "month": "Oct", + "accepted_without_errors": 1, + "accepted_with_errors": 0, + }, + { + "month": "Nov", + "accepted_without_errors": 0, + "accepted_with_errors": 0, + }, + { + "month": "Dec", + "accepted_without_errors": 0, + "accepted_with_errors": 0, + }, + ], + } - # assert Tribal_TANF_T1.objects.all().count() == 0 + assert Tribal_TANF_T1.objects.all().count() == 1 + assert Tribal_TANF_T2.objects.all().count() == 1 + assert Tribal_TANF_T3.objects.all().count() == 2 - # parser_errors = ParserError.objects.filter(file=tribal_section_1_inconsistency_file) - # assert parser_errors.count() == 1 + t1_objs = Tribal_TANF_T1.objects.all().order_by("CASH_AMOUNT") + t2_objs = Tribal_TANF_T2.objects.all().order_by("MONTHS_FED_TIME_LIMIT") + t3_objs = Tribal_TANF_T3.objects.all().order_by("EDUCATION_LEVEL") - # assert ( - # parser_errors.first().error_message - # == "Tribe Code (142) inconsistency with Program Type (TAN) " - # + "and FIPS Code (01)." - # ) + t1 = t1_objs.first() + t2 = t2_objs.first() + t3 = t3_objs.last() - # @pytest.mark.django_db(transaction=True)() - # def test_go_parse_tribal_section_2_file(self, tribal_section_2_file, dfs): - # """Test parsing Tribal TANF Section 2 submission.""" - # tribal_section_2_file.year = 2020 - # tribal_section_2_file.quarter = "Q1" + assert t1.CASH_AMOUNT == 502 + assert t2.MONTHS_FED_TIME_LIMIT == " 0" + assert t3.EDUCATION_LEVEL == "98" - # dfs.datafile = tribal_section_2_file + @pytest.mark.django_db(transaction=True)() + def test_go_parse_tribal_section_1_inconsistency_file( + self, tribal_section_1_inconsistency_file, dfs + ): + """Test parsing inconsistent Tribal TANF Section 1 submission.""" + parse_datafile(dfs, tribal_section_1_inconsistency_file) - # parse_datafile(dfs, tribal_section_2_file) + assert Tribal_TANF_T1.objects.all().count() == 0 + + parser_errors = ParserError.objects.filter( + file=tribal_section_1_inconsistency_file + ) + # Extra error for no records created + assert parser_errors.count() == 2 - # dfs.status = dfs.get_status() - # dfs.case_aggregates = aggregates.case_aggregates_by_month(dfs.datafile, dfs.status) - # assert dfs.case_aggregates == { - # "rejected": 0, - # "months": [ - # {"accepted_without_errors": 3, "accepted_with_errors": 0, "month": "Oct"}, - # {"accepted_without_errors": 3, "accepted_with_errors": 0, "month": "Nov"}, - # {"accepted_without_errors": 0, "accepted_with_errors": 0, "month": "Dec"}, - # ], - # } + assert ( + parser_errors.first().error_message + == "Tribe Code (142) inconsistency with Program Type (TAN) " + + "and FIPS Code (01)." + ) - # assert dfs.get_status() == DataFileSummary.Status.ACCEPTED + @pytest.mark.django_db(transaction=True)() + def test_go_parse_tribal_section_2_file(self, tribal_section_2_file, dfs): + """Test parsing Tribal TANF Section 2 submission.""" + tribal_section_2_file.year = 2020 + tribal_section_2_file.quarter = "Q1" + tribal_section_2_file.save() - # assert Tribal_TANF_T4.objects.all().count() == 6 - # assert Tribal_TANF_T5.objects.all().count() == 13 + dfs.datafile = tribal_section_2_file - # t4_objs = Tribal_TANF_T4.objects.all().order_by("CLOSURE_REASON") - # t5_objs = Tribal_TANF_T5.objects.all().order_by("COUNTABLE_MONTH_FED_TIME") + parse_datafile(dfs, tribal_section_2_file) - # t4 = t4_objs.first() - # t5 = t5_objs.last() + dfs.status = dfs.get_status() + dfs.case_aggregates = aggregates.case_aggregates_by_month( + dfs.datafile, dfs.status + ) + assert dfs.case_aggregates == { + "rejected": 0, + "months": [ + { + "accepted_without_errors": 3, + "accepted_with_errors": 0, + "month": "Oct", + }, + { + "accepted_without_errors": 3, + "accepted_with_errors": 0, + "month": "Nov", + }, + { + "accepted_without_errors": 0, + "accepted_with_errors": 0, + "month": "Dec", + }, + ], + } - # assert t4.CLOSURE_REASON == "15" - # assert t5.COUNTABLE_MONTH_FED_TIME == " 8" + assert dfs.get_status() == DataFileSummary.Status.ACCEPTED - # @pytest.mark.django_db(transaction=True)() - # def test_go_parse_tribal_section_3_file(self, tribal_section_3_file, dfs): - # """Test parsing Tribal TANF Section 3 submission.""" - # tribal_section_3_file.year = 2022 - # tribal_section_3_file.quarter = "Q1" - - # dfs.datafile = tribal_section_3_file - - # parse_datafile(dfs, tribal_section_3_file) - - # dfs.status = dfs.get_status() - # dfs.case_aggregates = aggregates.total_errors_by_month(dfs.datafile, dfs.status) - # assert dfs.case_aggregates == { - # "months": [ - # {"month": "Oct", "total_errors": 0}, - # {"month": "Nov", "total_errors": 0}, - # {"month": "Dec", "total_errors": 0}, - # ] - # } + assert Tribal_TANF_T4.objects.all().count() == 6 + assert Tribal_TANF_T5.objects.all().count() == 13 - # assert dfs.get_status() == DataFileSummary.Status.ACCEPTED + t4_objs = Tribal_TANF_T4.objects.all().order_by("CLOSURE_REASON") + t5_objs = Tribal_TANF_T5.objects.all().order_by("COUNTABLE_MONTH_FED_TIME") - # assert Tribal_TANF_T6.objects.all().count() == 3 + t4 = t4_objs.first() + t5 = t5_objs.last() - # t6_objs = Tribal_TANF_T6.objects.all().order_by("NUM_APPLICATIONS") + assert t4.CLOSURE_REASON == "15" + assert t5.COUNTABLE_MONTH_FED_TIME == " 8" - # t6 = t6_objs.first() + @pytest.mark.django_db(transaction=True)() + def test_go_parse_tribal_section_3_file(self, tribal_section_3_file, dfs): + """Test parsing Tribal TANF Section 3 submission.""" + tribal_section_3_file.year = 2022 + tribal_section_3_file.quarter = "Q1" + tribal_section_3_file.save() - # assert t6.NUM_APPLICATIONS == 1 - # assert t6.NUM_FAMILIES == 41 - # assert t6.NUM_CLOSED_CASES == 3 + dfs.datafile = tribal_section_3_file - # @pytest.mark.django_db(transaction=True)() - # def test_go_parse_tribal_section_4_file(self, tribal_section_4_file, dfs): - # """Test parsing Tribal TANF Section 4 submission.""" - # tribal_section_4_file.year = 2022 - # tribal_section_4_file.quarter = "Q1" - - # dfs.datafile = tribal_section_4_file - - # parse_datafile(dfs, tribal_section_4_file) - - # dfs.status = dfs.get_status() - # dfs.case_aggregates = aggregates.total_errors_by_month(dfs.datafile, dfs.status) - # assert dfs.case_aggregates == { - # "months": [ - # {"month": "Oct", "total_errors": 0}, - # {"month": "Nov", "total_errors": 0}, - # {"month": "Dec", "total_errors": 0}, - # ] - # } + parse_datafile(dfs, tribal_section_3_file) - # assert Tribal_TANF_T7.objects.all().count() == 18 + dfs.status = dfs.get_status() + dfs.case_aggregates = aggregates.total_errors_by_month(dfs.datafile, dfs.status) + assert dfs.case_aggregates == { + "months": [ + {"month": "Oct", "total_errors": 0}, + {"month": "Nov", "total_errors": 0}, + {"month": "Dec", "total_errors": 0}, + ] + } - # t7_objs = Tribal_TANF_T7.objects.all().order_by("FAMILIES_MONTH") + assert dfs.get_status() == DataFileSummary.Status.ACCEPTED - # first = t7_objs.first() - # sixth = t7_objs[5] + assert Tribal_TANF_T6.objects.all().count() == 3 - # assert first.RPT_MONTH_YEAR == 202111 - # assert sixth.RPT_MONTH_YEAR == 202112 + t6_objs = Tribal_TANF_T6.objects.all().order_by("NUM_APPLICATIONS") - # assert first.TDRS_SECTION_IND == "2" - # assert sixth.TDRS_SECTION_IND == "2" + t6 = t6_objs.first() - # assert first.FAMILIES_MONTH == 274 - # assert sixth.FAMILIES_MONTH == 499 + assert t6.NUM_APPLICATIONS == 1 + assert t6.NUM_FAMILIES == 41 + assert t6.NUM_CLOSED_CASES == 3 + + @pytest.mark.django_db(transaction=True)() + def test_go_parse_tribal_section_4_file(self, tribal_section_4_file, dfs): + """Test parsing Tribal TANF Section 4 submission.""" + tribal_section_4_file.year = 2022 + tribal_section_4_file.quarter = "Q1" + tribal_section_4_file.save() + + dfs.datafile = tribal_section_4_file + + parse_datafile(dfs, tribal_section_4_file) + + dfs.status = dfs.get_status() + dfs.case_aggregates = aggregates.total_errors_by_month(dfs.datafile, dfs.status) + assert dfs.case_aggregates == { + "months": [ + {"month": "Oct", "total_errors": 0}, + {"month": "Nov", "total_errors": 0}, + {"month": "Dec", "total_errors": 0}, + ] + } + + assert Tribal_TANF_T7.objects.all().count() == 18 + + t7_objs = Tribal_TANF_T7.objects.all().order_by("FAMILIES_MONTH") + + first = t7_objs.first() + sixth = t7_objs[5] + + assert first.RPT_MONTH_YEAR == 202111 + assert sixth.RPT_MONTH_YEAR == 202112 + + assert first.TDRS_SECTION_IND == "2" + assert sixth.TDRS_SECTION_IND == "2" + + assert first.FAMILIES_MONTH == 274 + assert sixth.FAMILIES_MONTH == 499 # TODO: this requires more sophisticated segment based validation to gain parity with python parser. I made a test # branch `segment-validation-arch` to see what this could look like. Will explore other options and discuss with @@ -1428,8 +1487,8 @@ def test_go_empty_t4_t5_values(self, t4_t5_empty_values, dfs): dfs.datafile = t4_t5_empty_values parse_datafile(dfs, t4_t5_empty_values) parser_errors = ParserError.objects.filter(file=t4_t5_empty_values) - t4 = TANF_T4.objects.all() - t5 = TANF_T5.objects.all() + t4 = TANF_T4.objects.filter(datafile=t4_t5_empty_values) + t5 = TANF_T5.objects.filter(datafile=t4_t5_empty_values) assert t4.count() == 1 assert t4[0].STRATUM is None logger.info(t4[0].__dict__) @@ -1460,47 +1519,53 @@ def test_go_parse_t2_invalid_dob(self, t2_go_invalid_dob_file, dfs): assert year_error.error_message == "T2 Item 32: year must be larger than 1900" assert digits_error.error_message == "T2 Item 32: must be numeric" - # Tribal file - # @pytest.mark.django_db(transaction=True)() - # def test_go_parse_tanf_section4_file_with_errors(self, tanf_section_4_file_with_errors, dfs): - # """Test parsing TANF Section 4 submission.""" - # tanf_section_4_file_with_errors.year = 2022 - # tanf_section_4_file_with_errors.quarter = "Q1" - # dfs.datafile = tanf_section_4_file_with_errors - - # parse_datafile(dfs, tanf_section_4_file_with_errors) - - # dfs.status = dfs.get_status() - # dfs.case_aggregates = aggregates.total_errors_by_month(dfs.datafile, dfs.status) - # assert dfs.case_aggregates == { - # "months": [ - # {"month": "Oct", "total_errors": 2}, - # {"month": "Nov", "total_errors": 3}, - # {"month": "Dec", "total_errors": 2}, - # ] - # } + @pytest.mark.django_db(transaction=True)() + def test_go_parse_tanf_section4_file_with_errors( + self, tanf_section_4_file_with_errors, dfs + ): + """Test parsing TANF Section 4 submission.""" + tanf_section_4_file_with_errors.year = 2022 + tanf_section_4_file_with_errors.quarter = "Q1" + dfs.datafile = tanf_section_4_file_with_errors + + parse_datafile(dfs, tanf_section_4_file_with_errors) + + dfs.status = dfs.get_status() + dfs.case_aggregates = aggregates.total_errors_by_month(dfs.datafile, dfs.status) + assert dfs.case_aggregates == { + "months": [ + {"month": "Oct", "total_errors": 2}, + {"month": "Nov", "total_errors": 3}, + {"month": "Dec", "total_errors": 2}, + ] + } - # assert dfs.get_status() == DataFileSummary.Status.ACCEPTED_WITH_ERRORS + assert dfs.get_status() == DataFileSummary.Status.ACCEPTED_WITH_ERRORS - # assert TANF_T7.objects.all().count() == 18 + assert ( + TANF_T7.objects.filter(datafile=tanf_section_4_file_with_errors).count() + == 18 + ) - # parser_errors = ParserError.objects.filter(file=tanf_section_4_file_with_errors) + parser_errors = ParserError.objects.filter(file=tanf_section_4_file_with_errors) - # assert parser_errors.count() == 7 + assert parser_errors.count() == 7 - # t7_objs = TANF_T7.objects.all().order_by("FAMILIES_MONTH") + t7_objs = TANF_T7.objects.filter( + datafile=tanf_section_4_file_with_errors + ).order_by("FAMILIES_MONTH") - # first = t7_objs.first() - # sixth = t7_objs[5] + first = t7_objs.first() + sixth = t7_objs[5] - # assert first.RPT_MONTH_YEAR == 202111 - # assert sixth.RPT_MONTH_YEAR == 202110 + assert first.RPT_MONTH_YEAR == 202111 + assert sixth.RPT_MONTH_YEAR == 202110 - # assert first.TDRS_SECTION_IND == "1" - # assert sixth.TDRS_SECTION_IND == "1" + assert first.TDRS_SECTION_IND == "1" + assert sixth.TDRS_SECTION_IND == "1" - # assert first.FAMILIES_MONTH == 0 - # assert sixth.FAMILIES_MONTH == 446 + assert first.FAMILIES_MONTH == 0 + assert sixth.FAMILIES_MONTH == 446 @pytest.mark.django_db(transaction=True)() def test_go_parse_no_records_file(self, no_records_file, dfs): @@ -1535,26 +1600,26 @@ def test_go_parse_aggregates_rejected_datafile( dfs.case_aggregates = aggregates.case_aggregates_by_month( dfs.datafile, dfs.status ) - # assert dfs.case_aggregates == { - # "months": [ - # { - # "month": "Oct", - # "accepted_without_errors": "N/A", - # "accepted_with_errors": "N/A", - # }, - # { - # "month": "Nov", - # "accepted_without_errors": "N/A", - # "accepted_with_errors": "N/A", - # }, - # { - # "month": "Dec", - # "accepted_without_errors": "N/A", - # "accepted_with_errors": "N/A", - # }, - # ], - # "rejected": 1, - # } + assert dfs.case_aggregates == { + "months": [ + { + "month": "Oct", + "accepted_without_errors": "N/A", + "accepted_with_errors": "N/A", + }, + { + "month": "Nov", + "accepted_without_errors": "N/A", + "accepted_with_errors": "N/A", + }, + { + "month": "Dec", + "accepted_without_errors": "N/A", + "accepted_with_errors": "N/A", + }, + ], + "rejected": 1, + } # Again, group validators run first in the go parser and block other non precheck validation results. So we get # more errors since we always capture cat1/cat4 errors with the go parser. @@ -1573,7 +1638,9 @@ def test_go_parse_aggregates_rejected_datafile( assert errors.last().error_type == ParserErrorCategoryChoices.PRE_CHECK - assert TANF_T2.objects.count() == 0 + assert ( + TANF_T2.objects.filter(datafile=aggregates_rejected_datafile).count() == 0 + ) @pytest.mark.django_db(transaction=True)() def test_go_parse_tanf_section_1_file_with_bad_update_indicator( @@ -1600,26 +1667,32 @@ def test_go_parse_tanf_section_1_file_with_bad_update_indicator( in error_messages ) - # @pytest.mark.django_db(transaction=True)() - # def test_go_parse_tribal_section_4_bad_quarter(self, tribal_section_4_bad_quarter, dfs): - # """Test handling invalid quarter value that raises a ValueError exception.""" - # tribal_section_4_bad_quarter.year = 2021 - # tribal_section_4_bad_quarter.quarter = "Q1" - # dfs.datafile = tribal_section_4_bad_quarter - - # parse_datafile(dfs, tribal_section_4_bad_quarter) - # parser_errors = ParserError.objects.filter( - # file=tribal_section_4_bad_quarter - # ).order_by("id") - - # assert parser_errors.count() == 3 - - # parser_errors.first().error_message == ( - # "T7: 2020 is invalid. Calendar Quarter must be a numeric " - # "representing the Calendar Year and Quarter formatted as YYYYQ" - # ) + @pytest.mark.django_db(transaction=True)() + def test_go_parse_tribal_section_4_bad_quarter( + self, tribal_section_4_bad_quarter, dfs + ): + """Test handling invalid quarter value that raises a ValueError exception.""" + tribal_section_4_bad_quarter.year = 2021 + tribal_section_4_bad_quarter.quarter = "Q1" + tribal_section_4_bad_quarter.save() + dfs.datafile = tribal_section_4_bad_quarter + + parse_datafile(dfs, tribal_section_4_bad_quarter) + parser_errors = ParserError.objects.filter( + file=tribal_section_4_bad_quarter + ).order_by("id") + + # We get 37 errors because go treats schema precheck validators as independent over each record/segment whereas + # Python validates based on the raw row. There is a ticket in the backlog to enable go to behave like Python if + # we want/need. + assert parser_errors.count() == 37 - # Tribal_TANF_T7.objects.count() == 0 + parser_errors.first().error_message == ( + "T7: 2020 is invalid. Calendar Quarter must be a numeric " + "representing the Calendar Year and Quarter formatted as YYYYQ" + ) + + Tribal_TANF_T7.objects.count() == 0 @pytest.mark.django_db(transaction=True)() def test_go_parse_t3_cat2_invalid_citizenship( @@ -1735,88 +1808,171 @@ def test_go_zero_filled_fips_code_file(self, test_file_zero_filled_fips_code, df in [i.error_message for i in parser_errors] ) - # TODO: Section logic for go parser FRA not working correctly - # @pytest.mark.parametrize( - # "file", - # [ - # ("fra_bad_header_csv"), - # ("fra_bad_header_xlsx"), - # ], - # ) - # @pytest.mark.django_db(transaction=True)() - # def test_go_parse_fra_bad_header(self, request, file, dfs): - # """Test parsing FRA files with bad header data.""" - # datafile = request.getfixturevalue(file) - # datafile.year = 2024 - # datafile.quarter = "Q1" - # datafile.save() - - # dfs.datafile = datafile - # dfs.save() + @pytest.mark.parametrize( + "file", + [ + ("fra_bad_header_csv"), + ("fra_bad_header_xlsx"), + ], + ) + @pytest.mark.django_db(transaction=True)() + def test_go_parse_fra_bad_header(self, request, file, dfs): + """Test parsing FRA files with bad header data.""" + datafile = request.getfixturevalue(file) + datafile.year = 2024 + datafile.quarter = "Q1" + datafile.save() - # parse_datafile(dfs, datafile) + dfs.datafile = datafile + dfs.save() - # assert TANF_Exiter1.objects.all().count() == 0 + parse_datafile(dfs, datafile) - # errors = ParserError.objects.filter(file=datafile).order_by("id") - # assert len(errors) == 1 - # for e in errors: - # assert e.error_message == "File does not begin with FRA data." - # assert e.error_type == ParserErrorCategoryChoices.PRE_CHECK - # assert dfs.get_status() == DataFileSummary.Status.REJECTED + assert TANF_Exiter1.objects.all().count() == 0 - # @pytest.mark.parametrize( - # "file", - # [ - # ("fra_empty_first_row_csv"), - # ("fra_empty_first_row_xlsx"), - # ], - # ) - # @pytest.mark.django_db(transaction=True)() - # def test_go_parse_fra_empty_first_row(self, request, file, dfs): - # """Test parsing FRA files with an empty first row/no header data.""" - # datafile = request.getfixturevalue(file) - # datafile.year = 2024 - # datafile.quarter = "Q1" + errors = ParserError.objects.filter(file=datafile).order_by("id") + assert len(errors) == 1 + for e in errors: + assert e.error_message == "File does not begin with FRA data." + assert e.error_type == ParserErrorCategoryChoices.PRE_CHECK + assert dfs.get_status() == DataFileSummary.Status.REJECTED - # dfs.datafile = datafile - # dfs.save() + @pytest.mark.parametrize( + "file", + [ + ("fra_empty_first_row_csv"), + ("fra_empty_first_row_xlsx"), + ], + ) + @pytest.mark.django_db(transaction=True)() + def test_go_parse_fra_empty_first_row(self, request, file, dfs): + """Test parsing FRA files with an empty first row/no header data.""" + datafile = request.getfixturevalue(file) + datafile.year = 2024 + datafile.quarter = "Q1" - # parse_datafile(dfs, datafile) + dfs.datafile = datafile + dfs.save() - # assert TANF_Exiter1.objects.all().count() == 0 + parse_datafile(dfs, datafile) - # errors = ParserError.objects.filter(file=datafile).order_by("id") - # assert len(errors) == 1 - # for e in errors: - # assert e.error_message == "File does not begin with FRA data." - # assert e.error_type == ParserErrorCategoryChoices.PRE_CHECK - # assert dfs.get_status() == DataFileSummary.Status.REJECTED + assert TANF_Exiter1.objects.all().count() == 0 - # @pytest.mark.django_db(transaction=True)() - # def test_go_parse_fra_decoder_unknown(self, fra_decoder_unknown, dfs): - # """Test parsing a FRA file with bad encoding.""" - # datafile = fra_decoder_unknown - # datafile.year = 2025 - # datafile.quarter = "Q3" + errors = ParserError.objects.filter(file=datafile).order_by("id") + assert len(errors) == 1 + for e in errors: + assert e.error_message == "File does not begin with FRA data." + assert e.error_type == ParserErrorCategoryChoices.PRE_CHECK + assert dfs.get_status() == DataFileSummary.Status.REJECTED - # dfs.datafile = datafile - # dfs.save() + @pytest.mark.parametrize( + "file", + [ + ("fra_work_outcome_exiter_csv_file"), + ("fra_work_outcome_exiter_xlsx_file"), + ], + ) + @pytest.mark.django_db(transaction=True) + def test_go_parse_fra_work_outcome_exiters(self, request, file, dfs): + """Test parsing FRA Work Outcome Exiters files.""" + datafile = request.getfixturevalue(file) + datafile.year = 2024 + datafile.quarter = "Q2" + datafile.save() - # try: - # parse_datafile(dfs, datafile) - # except util.DecoderUnknownException: - # pass - - # errors = ParserError.objects.filter(file=datafile).order_by("id") - # assert errors.count() == 1 - # assert errors.first().error_type == ParserErrorCategoryChoices.PRE_CHECK - # assert errors.first().error_message == ( - # "Could not determine encoding of FRA file. If the file is an XLSX file, " - # "ensure it can be opened in Excel. If the file is a CSV, ensure it can be " - # "opened in a text editor and is UTF-8 encoded." - # ) - # assert dfs.get_status() == DataFileSummary.Status.REJECTED + dfs.datafile = datafile + dfs.save() + + parse_datafile(dfs, datafile) + errors = ParserError.objects.filter(file=datafile).order_by("id") + + assert TANF_Exiter1.objects.all().count() == 5 + + errors = ParserError.objects.filter(file=datafile).order_by("id") + assert errors.count() == 8 + for e in errors: + assert e.error_type == ParserErrorCategoryChoices.CASE_CONSISTENCY + # TODO: need to update go parser to handle updating the DFS' record counts + # assert dfs.total_number_of_records_in_file == 11 + # assert dfs.total_number_of_records_created == 5 + assert dfs.get_status() == DataFileSummary.Status.PARTIALLY_ACCEPTED + + @pytest.mark.parametrize( + "file", + [ + ("fra_ofa_test_csv"), + ("fra_ofa_test_xlsx"), + ], + ) + @pytest.mark.django_db(transaction=True) + def test_go_parse_fra_ofa_test_cases(self, request, file, dfs): + """Test parsing OFA FRA files.""" + datafile = request.getfixturevalue(file) + datafile.year = 2025 + datafile.quarter = "Q3" + datafile.save() + + dfs.datafile = datafile + dfs.save() + + parse_datafile(dfs, datafile) + + errors = ParserError.objects.filter(file=datafile).order_by("row_number") + for e in errors: + assert e.error_type == ParserErrorCategoryChoices.CASE_CONSISTENCY + + # We get one extra duplicate that the Python parser doesn't detect! The Python parser hasn't been catching that + # line 13 is a duplicate of line 3 + assert errors.count() == 24 + assert TANF_Exiter1.objects.all().count() == 8 + # assert dfs.total_number_of_records_in_file == 28 + # assert dfs.total_number_of_records_created == 10 + assert dfs.get_status() == DataFileSummary.Status.PARTIALLY_ACCEPTED + + @pytest.mark.django_db(transaction=True) + # TODO: Failing + def test_go_parse_fra_formula_fields(self, fra_formula_fields_test_xlsx, dfs): + """Test parsing a correct FRA file with formula fields.""" + datafile = fra_formula_fields_test_xlsx + datafile.year = 2025 + datafile.quarter = "Q3" + datafile.save() + + dfs.datafile = datafile + dfs.save() + + parse_datafile(dfs, datafile) + + errors = ParserError.objects.filter(file=datafile).order_by("id") + assert errors.count() == 0 + assert TANF_Exiter1.objects.all().count() == 8 + # See above TODO + # assert dfs.total_number_of_records_in_file == 8 + # assert dfs.total_number_of_records_created == 8 + assert dfs.get_status() == DataFileSummary.Status.ACCEPTED + + @pytest.mark.django_db(transaction=True)() + def test_go_parse_fra_decoder_unknown(self, fra_decoder_unknown, dfs): + """Test parsing a FRA file with bad encoding.""" + datafile = fra_decoder_unknown + datafile.year = 2025 + datafile.quarter = "Q3" + datafile.save() + + dfs.datafile = datafile + dfs.save() + + parse_datafile(dfs, datafile) + + errors = ParserError.objects.filter(file=datafile).order_by("id") + assert errors.count() == 1 + assert errors.first().error_type == ParserErrorCategoryChoices.PRE_CHECK + assert errors.first().error_message == ( + "Could not determine encoding of FRA file. If the file is an XLSX file, " + "ensure it can be opened in Excel. If the file is a CSV, ensure it can be " + "opened in a text editor and is UTF-8 encoded." + ) + assert dfs.get_status() == DataFileSummary.Status.REJECTED @pytest.mark.django_db(transaction=True)() def test_go_parse_section2_no_records(self, section2_no_records, dfs): @@ -1889,6 +2045,229 @@ def test_go_parse_case_aggregates_edge_case(self, case_aggregates_edge_case, dfs "rejected": 1, # Rejected is 1 for go parser since it doesn't worry about trailer errors } - assert TANF_T1.objects.count() == 3 - assert TANF_T2.objects.count() == 3 - assert TANF_T3.objects.count() == 6 + assert TANF_T1.objects.filter(datafile=case_aggregates_edge_case).count() == 3 + assert TANF_T2.objects.filter(datafile=case_aggregates_edge_case).count() == 3 + assert TANF_T3.objects.filter(datafile=case_aggregates_edge_case).count() == 6 + + @pytest.mark.django_db(transaction=True) + def test_go_parse_super_big_s1_file(self, super_big_s1_file, dfs): + """Test parsing super_big_s1_file and validate all records are created.""" + super_big_s1_file.year = 2023 + super_big_s1_file.quarter = "Q2" + super_big_s1_file.save() + + dfs.datafile = super_big_s1_file + dfs.save() + + parse_datafile(dfs, super_big_s1_file) + expected_t1_record_count = 96497 + expected_t2_record_count = 112622 + expected_t3_record_count = 172552 + + assert TANF_T1.objects.count() == expected_t1_record_count + assert TANF_T2.objects.count() == expected_t2_record_count + assert TANF_T3.objects.count() == expected_t3_record_count + + @pytest.mark.django_db(transaction=True) + def test_go_parse_big_s1_file_with_rollback(self, big_s1_rollback_file, dfs): + """Test parsing big_s1_rollback_file with rollback on error.""" + big_s1_rollback_file.year = 2023 + big_s1_rollback_file.quarter = "Q2" + big_s1_rollback_file.save() + + dfs.datafile = big_s1_rollback_file + dfs.save() + + parse_datafile(dfs, big_s1_rollback_file) + + parser_errors = ParserError.objects.filter(file=big_s1_rollback_file) + assert parser_errors.count() == 1 + + err = parser_errors.first() + + assert err.row_number == 13609 + assert err.error_type == ParserErrorCategoryChoices.PRE_CHECK + assert err.error_message == "Multiple headers found." + assert err.content_type is None + assert err.object_id is None + + assert TANF_T1.objects.count() == 0 + assert TANF_T2.objects.count() == 0 + assert TANF_T3.objects.count() == 0 + + @pytest.mark.parametrize( + "file, batch_size, model, record_type, num_errors", + [ + ("tanf_s1_exact_dup_file", 10000, TANF_T1, "T1", 5), + ("tanf_s1_exact_dup_file", 1, TANF_T1, "T1", 5), + ("tanf_s2_exact_dup_file", 10000, TANF_T4, "T4", 3), + ("tanf_s2_exact_dup_file", 1, TANF_T4, "T4", 3), + ("tanf_s3_exact_dup_file", 10000, TANF_T6, "T6", 3), + ("tanf_s3_exact_dup_file", 1, TANF_T6, "T6", 3), + ("tanf_s4_exact_dup_file", 10000, TANF_T7, "T7", 18), + ("tanf_s4_exact_dup_file", 1, TANF_T7, "T7", 18), + ("ssp_s1_exact_dup_file", 10000, SSP_M1, "M1", 5), + ("ssp_s1_exact_dup_file", 1, SSP_M1, "M1", 5), + ("ssp_s2_exact_dup_file", 10000, SSP_M4, "M4", 3), + ("ssp_s2_exact_dup_file", 1, SSP_M4, "M4", 3), + ("ssp_s3_exact_dup_file", 10000, SSP_M6, "M6", 3), + ("ssp_s3_exact_dup_file", 1, SSP_M6, "M6", 3), + ("ssp_s4_exact_dup_file", 10000, SSP_M7, "M7", 12), + ("ssp_s4_exact_dup_file", 1, SSP_M7, "M7", 12), + ], + ) + @pytest.mark.django_db(transaction=True) + def test_go_parse_duplicate( + self, file, batch_size, model, record_type, num_errors, dfs, request + ): + """Test cases for datafiles that have exact duplicate records.""" + datafile = request.getfixturevalue(file) + dfs.datafile = datafile + + parse_datafile(dfs, datafile) + + parser_errors = ParserError.objects.filter( + file=datafile, error_type=ParserErrorCategoryChoices.CASE_CONSISTENCY + ).order_by("error_message") + + for e in parser_errors: + assert e.error_type == ParserErrorCategoryChoices.CASE_CONSISTENCY + assert parser_errors.count() == num_errors + + dup_error = parser_errors.first() + + assert ( + dup_error.error_message + == f"Duplicate record detected with record type {record_type} at line 3." + ) + + model.objects.count() == 0 + + @pytest.mark.parametrize( + "file, batch_size, model, record_type, num_errors, err_msg", + [ + ( + "tanf_s1_partial_dup_file", + 10000, + TANF_T1, + "T1", + 5, + "Partial duplicate record detected with record type T1 at line 3.", + ), + ( + "tanf_s1_partial_dup_file", + 1, + TANF_T1, + "T1", + 5, + "Partial duplicate record detected with record type T1 at line 3.", + ), + ( + "tanf_s2_partial_dup_file", + 10000, + TANF_T5, + "T5", + 3, + "Partial duplicate record detected with record type T5 at line 3.", + ), + ( + "tanf_s2_partial_dup_file", + 1, + TANF_T5, + "T5", + 3, + "Partial duplicate record detected with record type T5 at line 3.", + ), + ( + "ssp_s1_partial_dup_file", + 10000, + SSP_M1, + "M1", + 5, + "Partial duplicate record detected with record type M1 at line 3.", + ), + ( + "ssp_s1_partial_dup_file", + 1, + SSP_M1, + "M1", + 5, + "Partial duplicate record detected with record type M1 at line 3.", + ), + ( + "ssp_s2_partial_dup_file", + 10000, + SSP_M5, + "M5", + 3, + "Partial duplicate record detected with record type M5 at line 3.", + ), + ( + "ssp_s2_partial_dup_file", + 1, + SSP_M5, + "M5", + 3, + "Partial duplicate record detected with record type M5 at line 3.", + ), + ], + ) + @pytest.mark.django_db(transaction=True) + def test_go_parse_partial_duplicate( + self, file, batch_size, model, record_type, num_errors, err_msg, dfs, request + ): + """Test cases for datafiles that have partial duplicate records.""" + datafile = request.getfixturevalue(file) + expected_error_msg = err_msg + + dfs.datafile = datafile + + parse_datafile(dfs, datafile) + + parser_errors = ParserError.objects.filter( + file=datafile, error_type=ParserErrorCategoryChoices.CASE_CONSISTENCY + ).order_by("-error_message") + for e in parser_errors: + assert e.error_type == ParserErrorCategoryChoices.CASE_CONSISTENCY + assert parser_errors.count() == num_errors + + dup_error = parser_errors.first() + assert ( + expected_error_msg.format(record_type=record_type) + in dup_error.error_message + ) + + model.objects.count() == 0 + + @pytest.mark.django_db(transaction=True) + def test_go_parse_cat_4_edge_case_file(self, cat4_edge_case_file, dfs): + """Test parsing file with a cat4 error edge case submission.""" + cat4_edge_case_file.year = 2024 + cat4_edge_case_file.quarter = "Q1" + cat4_edge_case_file.save() + + dfs.datafile = cat4_edge_case_file + dfs.save() + + parse_datafile(dfs, cat4_edge_case_file) + + parser_errors = ( + ParserError.objects.filter(file=cat4_edge_case_file) + .filter(error_type=ParserErrorCategoryChoices.CASE_CONSISTENCY) + .order_by("row_number") + ) + + assert TANF_T1.objects.all().count() == 2 + assert TANF_T2.objects.all().count() == 2 + assert TANF_T3.objects.all().count() == 4 + + # TODO + # assert dfs.total_number_of_records_in_file == 17 + # assert dfs.total_number_of_records_created == 8 + + err = parser_errors.first() + assert err.error_message == ( + "Every T1 record should have at least one corresponding T2 or T3 record " + "with the same RPT_MONTH_YEAR and CASE_NUMBER" + ) + assert dfs.get_status() == DataFileSummary.Status.PARTIALLY_ACCEPTED diff --git a/tdrs-backend/tdpservice/parsers/test/test_parse.py b/tdrs-backend/tdpservice/parsers/test/test_parse.py index 4de83bd5dc..4713970d5e 100644 --- a/tdrs-backend/tdpservice/parsers/test/test_parse.py +++ b/tdrs-backend/tdpservice/parsers/test/test_parse.py @@ -135,11 +135,16 @@ def test_small_correct_file_no_records_created(self, parsed_small_correct_file): _datafile, _dfs = parsed_small_correct_file assert TANF_T1.objects.count() == 0 + # This test was using out of date parameters but still somehow passing. However, this shows now that the go and + # python parsers don't cross check the datafile's program type against the datafile model's program type which + # can lead to confusing errors. I wrote an enhancement ticket to address this for both parsers since we will soon + # allow api submissions and shouldn't strictly rely on client side validation of this. @pytest.mark.django_db @pytest.mark.parametrize( - "section, expected_message, expected_aggregates, save_dfs", + "program, section, expected_message, expected_aggregates, save_dfs, num_errors", [ ( + "TAN", "Closed Case Data", "Data does not match the expected layout for Closed Case Data.", { @@ -163,12 +168,16 @@ def test_small_correct_file_no_records_created(self, parsed_small_correct_file): ], }, False, + 1, ), + # This is the proof that server side model to file program type validation is not occurring. ( - "SSP Active Case Data", - "Data does not match the expected layout for " "SSP Active Case Data.", + "SSP", + "Active Case Data", + "No records created.", None, True, + 2, ), ], ) @@ -176,12 +185,15 @@ def test_parse_section_mismatch_variants( self, small_correct_file, dfs, + program, section, expected_message, expected_aggregates, save_dfs, + num_errors, ): """Test parsing when file metadata does not match the raw data layout.""" + small_correct_file.program_type = program small_correct_file.section = section small_correct_file.save() @@ -194,7 +206,9 @@ def test_parse_section_mismatch_variants( dfs.status = dfs.get_status() assert dfs.status == DataFileSummary.Status.REJECTED parser_errors = ParserError.objects.filter(file=small_correct_file) - assert parser_errors.count() == 1 + + # Extra error for No records created error and other cat4 errors + assert parser_errors.count() == num_errors if expected_aggregates is not None: dfs.case_aggregates = aggregates.case_aggregates_by_month( @@ -202,8 +216,7 @@ def test_parse_section_mismatch_variants( ) assert dfs.case_aggregates == expected_aggregates - err = parser_errors.first() - assert err.row_number == 1 + err = parser_errors.order_by("error_type").first() assert err.error_type == ParserErrorCategoryChoices.PRE_CHECK assert err.error_message == expected_message assert err.content_type is None @@ -1529,8 +1542,6 @@ def test_parse_aggregates_rejected_datafile( aggregates_rejected_datafile.year = 2021 aggregates_rejected_datafile.quarter = "Q1" - print(aggregates_rejected_datafile) - dfs.datafile = aggregates_rejected_datafile parse_datafile(dfs, aggregates_rejected_datafile) @@ -1873,7 +1884,6 @@ def test_parse_case_aggregates_edge_case(self, case_aggregates_edge_case, dfs): dfs.case_aggregates = aggregates.case_aggregates_by_month( dfs.datafile, dfs.status ) - print(dfs.case_aggregates) assert dfs.case_aggregates == { "months": [ { diff --git a/tdrs-backend/tdpservice/parsers/test/test_parse_large_files.py b/tdrs-backend/tdpservice/parsers/test/test_parse_large_files.py index a29fcc9b02..fe8e0c8323 100644 --- a/tdrs-backend/tdpservice/parsers/test/test_parse_large_files.py +++ b/tdrs-backend/tdpservice/parsers/test/test_parse_large_files.py @@ -75,9 +75,9 @@ def test_parse_super_big_s1_file(self, super_big_s1_file, dfs): dfs.save() parse_datafile(dfs, super_big_s1_file) - expected_t1_record_count = 96607 - expected_t2_record_count = 112753 - expected_t3_record_count = 172525 + expected_t1_record_count = 96497 + expected_t2_record_count = 112622 + expected_t3_record_count = 172552 assert TANF_T1.objects.count() == expected_t1_record_count assert TANF_T2.objects.count() == expected_t2_record_count diff --git a/tdrs-backend/tdpservice/users/test/test_keycloak_sync.py b/tdrs-backend/tdpservice/users/test/test_keycloak_sync.py index 353514dc1f..f6d7d05f9d 100644 --- a/tdrs-backend/tdpservice/users/test/test_keycloak_sync.py +++ b/tdrs-backend/tdpservice/users/test/test_keycloak_sync.py @@ -176,6 +176,17 @@ def test_django_to_kc_group_mapping_complete(): # --- Signal handler tests --- +@patch("tdpservice.users.keycloak_sync._get_client") +@pytest.mark.django_db +def test_post_save_signal_noop_by_default_in_tests(mock_get_client): + """Test default pytest settings disable live Keycloak sync.""" + user = UserFactory.create() + user.first_name = "Updated" + user.save() + + mock_get_client.assert_not_called() + + @override_settings(KEYCLOAK_SYNC_ENABLED=True) @patch("tdpservice.users.keycloak_sync._get_client") @pytest.mark.django_db diff --git a/tdrs-services/parser/cmd/docgen/main.go b/tdrs-services/parser/cmd/docgen/main.go index 64ae6dcc58..1801e25819 100644 --- a/tdrs-services/parser/cmd/docgen/main.go +++ b/tdrs-services/parser/cmd/docgen/main.go @@ -249,7 +249,7 @@ func buildRecordDoc(cs *schema.CompiledSchema, schemaPath string, validators *va Description: cs.Description, } - for _, cv := range validators.GetRecordValidators(cs.RecordType) { + for _, cv := range validators.GetRecordValidators(schemaPath) { vDoc := validatorToDoc(cv, cs.RecordType, nil) if cv.ErrorType == validation.ErrorTypeRecordPreCheck { recDoc.PreCheck = append(recDoc.PreCheck, vDoc) @@ -259,19 +259,19 @@ func buildRecordDoc(cs *schema.CompiledSchema, schemaPath string, validators *va } for _, field := range cs.Shared { - recDoc.Fields = append(recDoc.Fields, buildFieldDoc(field, cs.RecordType, validators)) + recDoc.Fields = append(recDoc.Fields, buildFieldDoc(field, cs.RecordType, schemaPath, validators)) } if len(cs.Segments) > 0 { for _, field := range cs.Segments[0].Fields { - recDoc.Fields = append(recDoc.Fields, buildFieldDoc(field, cs.RecordType, validators)) + recDoc.Fields = append(recDoc.Fields, buildFieldDoc(field, cs.RecordType, schemaPath, validators)) } } return recDoc } -func buildFieldDoc(field schema.FieldDef, recordType string, validators *validation.ValidatorRegistry) FieldDoc { +func buildFieldDoc(field schema.FieldDef, recordType string, schemaPath string, validators *validation.ValidatorRegistry) FieldDoc { fDoc := FieldDoc{ Name: field.Name, Item: field.Item, @@ -283,7 +283,7 @@ func buildFieldDoc(field schema.FieldDef, recordType string, validators *validat // Deduplicate validators — multi-segment records (e.g. T3) register the same // validators once per segment, producing duplicates for the same field name. seen := make(map[string]bool) - for _, cv := range validators.GetFieldValidators(recordType, field.Name) { + for _, cv := range validators.GetFieldValidators(schemaPath, field.Name) { key := cv.ID + "|" + cv.Expr.Expr if seen[key] { continue @@ -344,9 +344,9 @@ func htmlFuncMap() template.FuncMap { } return "No" }, - "joinFields": func(fields []string) string { return strings.Join(fields, ", ") }, - "friendlyError": friendlyErrorType, - "hasValidators": func(f FieldDoc) bool { return len(f.Validators) > 0 }, - "hasFields": func(v ValidatorDoc) bool { return len(v.Fields) > 0 }, + "joinFields": func(fields []string) string { return strings.Join(fields, ", ") }, + "friendlyError": friendlyErrorType, + "hasValidators": func(f FieldDoc) bool { return len(f.Validators) > 0 }, + "hasFields": func(v ValidatorDoc) bool { return len(v.Fields) > 0 }, } } diff --git a/tdrs-services/parser/config/filespecs/fra/s1.yaml b/tdrs-services/parser/config/filespecs/fra/s1.yaml index 602fde48da..7f937d647a 100644 --- a/tdrs-services/parser/config/filespecs/fra/s1.yaml +++ b/tdrs-services/parser/config/filespecs/fra/s1.yaml @@ -50,13 +50,42 @@ record_type_detection: # Configuration for how records are collected and grouped for processing. # # FRA files don't have case-based grouping like TANF/SSP. -# Each record is independent (one exiter per row). # Duplicates are detected by EXIT_DATE + SSN combination. accumulator: - - # No key_fields for FRA - records are not grouped by case + # For columnar files, start is interpreted as the column index. + key_fields: + fields: + - name: exit_date + start: 0 + end: 1 + - name: ssn + start: 1 + end: 2 batch_size: 1 - # No grouping needed for FRA records - grouped_schemas: [] + # Sort by EXIT_DATE + SSN before accumulation so duplicate records are adjacent. + presort: true + + grouped_schemas: + - fra/te1 + +validation_orchestrator: + categories: + - id: 1 + name: Record pre-check + default_error_type: CASE_CONSISTENCY + - id: 2 + name: Field validation + default_error_type: CASE_CONSISTENCY + - id: 3 + name: Cross-field + default_error_type: CASE_CONSISTENCY + - id: 4 + name: Group validation + default_error_type: CASE_CONSISTENCY + +group_validators: + - id: exact_duplicates + params: + record_type: TE1 diff --git a/tdrs-services/parser/config/filespecs/ssp/s1.yaml b/tdrs-services/parser/config/filespecs/ssp/s1.yaml index 56697237c1..2bc3196e9d 100644 --- a/tdrs-services/parser/config/filespecs/ssp/s1.yaml +++ b/tdrs-services/parser/config/filespecs/ssp/s1.yaml @@ -77,12 +77,13 @@ accumulator: # These byte positions are used BEFORE full parsing to quickly # determine which case a record belongs to. key_fields: - rpt_month_year: - start: 2 - end: 8 - case_number: - start: 8 - end: 19 + fields: + - name: rpt_month_year + start: 2 + end: 8 + - name: case_number + start: 8 + end: 19 batch_size: 1 @@ -145,17 +146,17 @@ group_validators: message: "Every M1 record should have at least one corresponding M2 or M3 record with FAMILY_AFFILIATION == 1" # M2 records require at least one M1 record in the same case - - id: requires_corresponding_record + - id: requires_related_record params: record_type: M2 - required_record_type: M1 + related_record_types: [M1] message: "Every M2 record should have at least one corresponding M1 record with the same RPT_MONTH_YEAR and CASE_NUMBER" # M3 records require at least one M1 record in the same case - - id: requires_corresponding_record + - id: requires_related_record params: record_type: M3 - required_record_type: M1 + related_record_types: [M1] message: "Every M3 record should have at least one corresponding M1 record with the same RPT_MONTH_YEAR and CASE_NUMBER" # Federally funded recipients diff --git a/tdrs-services/parser/config/filespecs/ssp/s2.yaml b/tdrs-services/parser/config/filespecs/ssp/s2.yaml index f82ffe42d4..5c5abaa546 100644 --- a/tdrs-services/parser/config/filespecs/ssp/s2.yaml +++ b/tdrs-services/parser/config/filespecs/ssp/s2.yaml @@ -74,12 +74,13 @@ accumulator: # These byte positions are used BEFORE full parsing to quickly # determine which case a record belongs to. key_fields: - rpt_month_year: - start: 2 - end: 8 - case_number: - start: 8 - end: 19 + fields: + - name: rpt_month_year + start: 2 + end: 8 + - name: case_number + start: 8 + end: 19 batch_size: 1 @@ -124,17 +125,17 @@ validation_orchestrator: group_validators: # M4 records require at least one M5 record in the same case - - id: requires_corresponding_record + - id: requires_related_record params: record_type: M4 - required_record_type: M5 + related_record_types: [M5] message: "Every M4 record should have at least one corresponding M5 record with the same RPT_MONTH_YEAR and CASE_NUMBER" # M5 records require at least one M4 record in the same case - - id: requires_corresponding_record + - id: requires_related_record params: record_type: M5 - required_record_type: M4 + related_record_types: [M4] message: "Every M5 record should have at least one corresponding M4 record with the same RPT_MONTH_YEAR and CASE_NUMBER" # --- Exact duplicate detection --- diff --git a/tdrs-services/parser/config/filespecs/ssp/s3.yaml b/tdrs-services/parser/config/filespecs/ssp/s3.yaml index ed284ea1f2..d6ca649849 100644 --- a/tdrs-services/parser/config/filespecs/ssp/s3.yaml +++ b/tdrs-services/parser/config/filespecs/ssp/s3.yaml @@ -57,15 +57,22 @@ record_type_detection: # # SSP Section 3 files contain aggregate data (not case-level data). # M6 records are file-level summaries without case numbers, -# so no key-based grouping is needed. +# but still require grouping for duplicate checks. # -# Each record is processed independently. accumulator: - # No key_fields for aggregate data - records don't belong to cases - # key_fields: (none) + key_fields: + fields: + - name: record_type + start: 0 + end: 2 batch_size: 1 - # All record types are processed individually (no grouping needed) - grouped_schemas: [] + grouped_schemas: + - ssp/m6 + +group_validators: + - id: exact_duplicates + params: + record_type: M6 diff --git a/tdrs-services/parser/config/filespecs/ssp/s4.yaml b/tdrs-services/parser/config/filespecs/ssp/s4.yaml index 3766c7efa5..71fbabf98e 100644 --- a/tdrs-services/parser/config/filespecs/ssp/s4.yaml +++ b/tdrs-services/parser/config/filespecs/ssp/s4.yaml @@ -56,16 +56,22 @@ record_type_detection: # Configuration for how records are collected and grouped for processing. # # SSP Section 4 files contain stratum data (not case-level data). -# M7 records are file-level summaries without case numbers, -# so no key-based grouping is needed. -# -# Each record is processed independently. +# m7 records are file-level summaries without case numbers, +# but still require grouping for duplicate checks. accumulator: - # No key_fields for stratum data - records don't belong to cases - # key_fields: (none) + key_fields: + fields: + - name: record_type + start: 0 + end: 2 batch_size: 1 - # All record types are processed individually (no grouping needed) - grouped_schemas: [] + grouped_schemas: + - ssp/m7 + +group_validators: + - id: exact_duplicates + params: + record_type: M7 diff --git a/tdrs-services/parser/config/filespecs/tanf/s1.yaml b/tdrs-services/parser/config/filespecs/tanf/s1.yaml index 3447e11c2c..c29924c0a3 100644 --- a/tdrs-services/parser/config/filespecs/tanf/s1.yaml +++ b/tdrs-services/parser/config/filespecs/tanf/s1.yaml @@ -76,12 +76,13 @@ accumulator: # These byte positions are used BEFORE full parsing to quickly # determine which case a record belongs to. key_fields: - rpt_month_year: - start: 2 - end: 8 - case_number: - start: 8 - end: 19 + fields: + - name: rpt_month_year + start: 2 + end: 8 + - name: case_number + start: 8 + end: 19 batch_size: 1 @@ -151,17 +152,17 @@ group_validators: message: "Every T1 record should have at least one corresponding T2 or T3 record with FAMILY_AFFILIATION == 1" # T2 records require at least one T1 record in the same case - - id: requires_corresponding_record + - id: requires_related_record params: record_type: T2 - required_record_type: T1 + related_record_types: [T1] message: "Every T2 record should have at least one corresponding T1 record with the same RPT_MONTH_YEAR and CASE_NUMBER" # T3 records require at least one T1 record in the same case - - id: requires_corresponding_record + - id: requires_related_record params: record_type: T3 - required_record_type: T1 + related_record_types: [T1] message: "Every T3 record should have at least one corresponding T1 record with the same RPT_MONTH_YEAR and CASE_NUMBER" # Federally funded recipients diff --git a/tdrs-services/parser/config/filespecs/tanf/s2.yaml b/tdrs-services/parser/config/filespecs/tanf/s2.yaml index 2e8e981b14..ace3db3294 100644 --- a/tdrs-services/parser/config/filespecs/tanf/s2.yaml +++ b/tdrs-services/parser/config/filespecs/tanf/s2.yaml @@ -73,12 +73,13 @@ accumulator: # These byte positions are used BEFORE full parsing to quickly # determine which case a record belongs to. key_fields: - rpt_month_year: - start: 2 - end: 8 - case_number: - start: 8 - end: 19 + fields: + - name: rpt_month_year + start: 2 + end: 8 + - name: case_number + start: 8 + end: 19 batch_size: 1 @@ -137,18 +138,18 @@ group_validators: # T4 records require at least one T5 record in the same case # Error is reported on T4 records when no T5 exists - - id: requires_corresponding_record + - id: requires_related_record params: record_type: T4 - required_record_type: T5 + related_record_types: [T5] message: "Every T4 record should have at least one corresponding T5 record with the same RPT_MONTH_YEAR and CASE_NUMBER" # T5 records require at least one T4 record in the same case # Error is reported on T5 records when no T4 exists - - id: requires_corresponding_record + - id: requires_related_record params: record_type: T5 - required_record_type: T4 + related_record_types: [T4] message: "Every T5 record should have at least one corresponding T4 record with the same RPT_MONTH_YEAR and CASE_NUMBER" # --- Exact duplicate detection --- diff --git a/tdrs-services/parser/config/filespecs/tanf/s3.yaml b/tdrs-services/parser/config/filespecs/tanf/s3.yaml index 47386a9ce5..acddf10f0f 100644 --- a/tdrs-services/parser/config/filespecs/tanf/s3.yaml +++ b/tdrs-services/parser/config/filespecs/tanf/s3.yaml @@ -57,15 +57,22 @@ record_type_detection: # # TANF Section 3 files contain aggregate data (not case-level data). # T6 records are file-level summaries without case numbers, -# so no key-based grouping is needed. +# but still require grouping for duplicate checks. # -# Each record is processed independently. accumulator: - # No key_fields for aggregate data - records don't belong to cases - # key_fields: (none) + key_fields: + fields: + - name: record_type + start: 0 + end: 2 batch_size: 1 - # All record types are processed individually (no grouping needed) - grouped_schemas: [] + grouped_schemas: + - tanf/t6 + +group_validators: + - id: exact_duplicates + params: + record_type: T6 diff --git a/tdrs-services/parser/config/filespecs/tanf/s4.yaml b/tdrs-services/parser/config/filespecs/tanf/s4.yaml index adf87bffaf..0753441fb6 100644 --- a/tdrs-services/parser/config/filespecs/tanf/s4.yaml +++ b/tdrs-services/parser/config/filespecs/tanf/s4.yaml @@ -57,15 +57,21 @@ record_type_detection: # # TANF Section 4 files contain stratum data (not case-level data). # T7 records are file-level summaries without case numbers, -# so no key-based grouping is needed. -# -# Each record is processed independently. +# but still require grouping for dulicate checks. accumulator: - # No key_fields for stratum data - records don't belong to cases - # key_fields: (none) + key_fields: + fields: + - name: record_type + start: 0 + end: 2 batch_size: 1 - # All record types are processed individually (no grouping needed) - grouped_schemas: [] + grouped_schemas: + - tanf/t7 + +group_validators: + - id: exact_duplicates + params: + record_type: T7 diff --git a/tdrs-services/parser/config/filespecs/tribal/s1.yaml b/tdrs-services/parser/config/filespecs/tribal/s1.yaml index 9ee9f40930..92010e4ec5 100644 --- a/tdrs-services/parser/config/filespecs/tribal/s1.yaml +++ b/tdrs-services/parser/config/filespecs/tribal/s1.yaml @@ -70,12 +70,13 @@ accumulator: # These byte positions are used BEFORE full parsing to quickly # determine which case a record belongs to. key_fields: - rpt_month_year: - start: 2 - end: 8 - case_number: - start: 8 - end: 19 + fields: + - name: rpt_month_year + start: 2 + end: 8 + - name: case_number + start: 8 + end: 19 batch_size: 1 @@ -137,17 +138,17 @@ group_validators: message: "Every T1 record should have at least one corresponding T2 or T3 record with FAMILY_AFFILIATION == 1" # T2 records require at least one T1 record in the same case - - id: requires_corresponding_record + - id: requires_related_record params: record_type: T2 - required_record_type: T1 + related_record_types: [T1] message: "Every T2 record should have at least one corresponding T1 record with the same RPT_MONTH_YEAR and CASE_NUMBER" # T3 records require at least one T1 record in the same case - - id: requires_corresponding_record + - id: requires_related_record params: record_type: T3 - required_record_type: T1 + related_record_types: [T1] message: "Every T3 record should have at least one corresponding T1 record with the same RPT_MONTH_YEAR and CASE_NUMBER" # Federally funded recipients diff --git a/tdrs-services/parser/config/filespecs/tribal/s2.yaml b/tdrs-services/parser/config/filespecs/tribal/s2.yaml index 4d757ab8b3..e9522346eb 100644 --- a/tdrs-services/parser/config/filespecs/tribal/s2.yaml +++ b/tdrs-services/parser/config/filespecs/tribal/s2.yaml @@ -65,12 +65,13 @@ accumulator: # Fields to extract the grouping key from raw row data. key_fields: - rpt_month_year: - start: 2 - end: 8 - case_number: - start: 8 - end: 19 + fields: + - name: rpt_month_year + start: 2 + end: 8 + - name: case_number + start: 8 + end: 19 batch_size: 1 @@ -113,17 +114,17 @@ validation_orchestrator: group_validators: # T4 records require at least one T5 record in the same case - - id: requires_corresponding_record + - id: requires_related_record params: record_type: T4 - required_record_type: T5 + related_record_types: [T5] message: "Every T4 record should have at least one corresponding T5 record with the same RPT_MONTH_YEAR and CASE_NUMBER" # T5 records require at least one T4 record in the same case - - id: requires_corresponding_record + - id: requires_related_record params: record_type: T5 - required_record_type: T4 + related_record_types: [T4] message: "Every T5 record should have at least one corresponding T4 record with the same RPT_MONTH_YEAR and CASE_NUMBER" # --- Exact duplicate detection --- diff --git a/tdrs-services/parser/config/filespecs/tribal/s3.yaml b/tdrs-services/parser/config/filespecs/tribal/s3.yaml index 7cd841142f..da470a07a9 100644 --- a/tdrs-services/parser/config/filespecs/tribal/s3.yaml +++ b/tdrs-services/parser/config/filespecs/tribal/s3.yaml @@ -57,12 +57,35 @@ record_type_detection: # # Tribal TANF Section 3 files contain aggregate data (not case-level data). # T6 records are file-level summaries without case numbers, -# so no key-based grouping is needed. +# but still require grouping for duplicate checks. accumulator: - - # No key_fields for aggregate data - records don't belong to cases + key_fields: + fields: + - name: record_type + start: 0 + end: 2 batch_size: 1 - # All record types are processed individually (no grouping needed) - grouped_schemas: [] + grouped_schemas: + - tribal_tanf/t6 + +group_validators: + - id: exact_duplicates + params: + record_type: T6 + +validation_orchestrator: + categories: + - id: 1 + name: Record pre-check + default_error_type: RECORD_PRE_CHECK + - id: 2 + name: Field validation + default_error_type: FIELD_VALUE + - id: 3 + name: Cross-field + default_error_type: VALUE_CONSISTENCY + - id: 4 + name: Group validation + default_error_type: CASE_CONSISTENCY diff --git a/tdrs-services/parser/config/filespecs/tribal/s4.yaml b/tdrs-services/parser/config/filespecs/tribal/s4.yaml index c2adbe4b53..e9d9ef84b5 100644 --- a/tdrs-services/parser/config/filespecs/tribal/s4.yaml +++ b/tdrs-services/parser/config/filespecs/tribal/s4.yaml @@ -57,12 +57,35 @@ record_type_detection: # # Tribal TANF Section 4 files contain stratum data (not case-level data). # T7 records are file-level summaries without case numbers, -# so no key-based grouping is needed. +# but still require grouping for duplicate checks. accumulator: - - # No key_fields for stratum data - records don't belong to cases + key_fields: + fields: + - name: record_type + start: 0 + end: 2 batch_size: 1 - # All record types are processed individually (no grouping needed) - grouped_schemas: [] + grouped_schemas: + - tribal_tanf/t7 + +group_validators: + - id: exact_duplicates + params: + record_type: T7 + +validation_orchestrator: + categories: + - id: 1 + name: Record pre-check + default_error_type: RECORD_PRE_CHECK + - id: 2 + name: Field validation + default_error_type: FIELD_VALUE + - id: 3 + name: Cross-field + default_error_type: VALUE_CONSISTENCY + - id: 4 + name: Group validation + default_error_type: CASE_CONSISTENCY diff --git a/tdrs-services/parser/config/schemas/fra/te1.yaml b/tdrs-services/parser/config/schemas/fra/te1.yaml index 4aab646391..422dc18e2e 100644 --- a/tdrs-services/parser/config/schemas/fra/te1.yaml +++ b/tdrs-services/parser/config/schemas/fra/te1.yaml @@ -12,6 +12,9 @@ section: 1 document: "FRA Work Outcome TANF Exiters" format: csv +record_validators: + - id: exit_date_matches_fiscal_period + # RecordType is a constant field (always "TE1") - not present in CSV data # but required in the database table shared: @@ -31,6 +34,8 @@ segments: column: 0 type: integer required: true + transform: + name: fra_exit_date - name: SSN friendly_name: Social Security Number @@ -38,3 +43,5 @@ segments: column: 1 type: string required: true + field_validators: + - id: fra_ssn diff --git a/tdrs-services/parser/config/schemas/ssp/m3.yaml b/tdrs-services/parser/config/schemas/ssp/m3.yaml index cfb2ecdeb6..ae3cef2ced 100644 --- a/tdrs-services/parser/config/schemas/ssp/m3.yaml +++ b/tdrs-services/parser/config/schemas/ssp/m3.yaml @@ -142,14 +142,6 @@ shared: field: - id: not_empty - - name: FIPS_CODE - friendly_name: State FIPS Code - item: "1" - start: 19 - end: 21 - type: string - required: true - # Each segment produces one output row (combined with shared fields) # M3 has 2 segments: Child 1 and Child 2 segments: diff --git a/tdrs-services/parser/config/schemas/ssp/m6.yaml b/tdrs-services/parser/config/schemas/ssp/m6.yaml index 14e0fe5551..dadb18d6a8 100644 --- a/tdrs-services/parser/config/schemas/ssp/m6.yaml +++ b/tdrs-services/parser/config/schemas/ssp/m6.yaml @@ -16,6 +16,10 @@ record_validators: - id: record_length_min params: min: 259 + - id: rpt_month_year_matches_header_year_quarter + - id: calendar_quarter_is_valid + error_type: RECORD_PRE_CHECK + # SSPMOE_FAMILIES must equal sum of 2-parent + 1-parent + no-parent families - id: sum_equals params: diff --git a/tdrs-services/parser/config/schemas/ssp/m7.yaml b/tdrs-services/parser/config/schemas/ssp/m7.yaml index d568430b31..50cdd0e433 100644 --- a/tdrs-services/parser/config/schemas/ssp/m7.yaml +++ b/tdrs-services/parser/config/schemas/ssp/m7.yaml @@ -15,6 +15,9 @@ record_validators: - id: record_length_min params: min: 247 + - id: rpt_month_year_matches_header_year_quarter + - id: calendar_quarter_is_valid + error_type: RECORD_PRE_CHECK # Fields shared across all segments (included in every output row) shared: diff --git a/tdrs-services/parser/config/schemas/tanf/t6.yaml b/tdrs-services/parser/config/schemas/tanf/t6.yaml index 95656691de..dacdc09b55 100644 --- a/tdrs-services/parser/config/schemas/tanf/t6.yaml +++ b/tdrs-services/parser/config/schemas/tanf/t6.yaml @@ -16,6 +16,10 @@ record_validators: - id: record_length_min params: min: 379 + - id: rpt_month_year_matches_header_year_quarter + - id: calendar_quarter_is_valid + error_type: RECORD_PRE_CHECK + # Sum validators - id: sum_equals params: diff --git a/tdrs-services/parser/config/schemas/tanf/t7.yaml b/tdrs-services/parser/config/schemas/tanf/t7.yaml index 35f8fc7ac7..bcbe741a8e 100644 --- a/tdrs-services/parser/config/schemas/tanf/t7.yaml +++ b/tdrs-services/parser/config/schemas/tanf/t7.yaml @@ -16,6 +16,9 @@ record_validators: - id: record_length_min params: min: 247 + - id: rpt_month_year_matches_header_year_quarter + - id: calendar_quarter_is_valid + error_type: RECORD_PRE_CHECK # Fields shared across all segments (included in every output row) shared: @@ -213,7 +216,7 @@ segments: start: 31 end: 32 type: string - required: false + required: true field_validators: - id: in_values params: @@ -225,11 +228,11 @@ segments: start: 32 end: 34 type: string - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 99 - name: FAMILIES_MONTH @@ -238,11 +241,11 @@ segments: start: 34 end: 41 type: integer - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 9999999 # Stratum 2, Month B @@ -264,7 +267,7 @@ segments: start: 31 end: 32 type: string - required: false + required: true field_validators: - id: in_values params: @@ -276,11 +279,11 @@ segments: start: 32 end: 34 type: string - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 99 - name: FAMILIES_MONTH @@ -289,11 +292,11 @@ segments: start: 41 end: 48 type: integer - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 9999999 # Stratum 2, Month C @@ -315,7 +318,7 @@ segments: start: 31 end: 32 type: string - required: false + required: true field_validators: - id: in_values params: @@ -327,11 +330,11 @@ segments: start: 32 end: 34 type: string - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 99 - name: FAMILIES_MONTH @@ -340,11 +343,11 @@ segments: start: 48 end: 55 type: integer - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 9999999 # Stratum 3, Month A @@ -366,7 +369,7 @@ segments: start: 55 end: 56 type: string - required: false + required: true field_validators: - id: in_values params: @@ -378,11 +381,11 @@ segments: start: 56 end: 58 type: string - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 99 - name: FAMILIES_MONTH @@ -391,11 +394,11 @@ segments: start: 58 end: 65 type: integer - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 9999999 # Stratum 3, Month B @@ -417,7 +420,7 @@ segments: start: 55 end: 56 type: string - required: false + required: true field_validators: - id: in_values params: @@ -429,11 +432,11 @@ segments: start: 56 end: 58 type: string - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 99 - name: FAMILIES_MONTH @@ -442,11 +445,11 @@ segments: start: 65 end: 72 type: integer - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 9999999 # Stratum 3, Month C @@ -468,7 +471,7 @@ segments: start: 55 end: 56 type: string - required: false + required: true field_validators: - id: in_values params: @@ -480,11 +483,11 @@ segments: start: 56 end: 58 type: string - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 99 - name: FAMILIES_MONTH @@ -493,11 +496,11 @@ segments: start: 72 end: 79 type: integer - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 9999999 # Stratum 4, Month A @@ -519,7 +522,7 @@ segments: start: 79 end: 80 type: string - required: false + required: true field_validators: - id: in_values params: @@ -531,11 +534,11 @@ segments: start: 80 end: 82 type: string - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 99 - name: FAMILIES_MONTH @@ -544,11 +547,11 @@ segments: start: 82 end: 89 type: integer - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 9999999 # Stratum 4, Month B @@ -570,7 +573,7 @@ segments: start: 79 end: 80 type: string - required: false + required: true field_validators: - id: in_values params: @@ -582,11 +585,11 @@ segments: start: 80 end: 82 type: string - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 99 - name: FAMILIES_MONTH @@ -595,11 +598,11 @@ segments: start: 89 end: 96 type: integer - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 9999999 # Stratum 4, Month C @@ -621,7 +624,7 @@ segments: start: 79 end: 80 type: string - required: false + required: true field_validators: - id: in_values params: @@ -633,11 +636,11 @@ segments: start: 80 end: 82 type: string - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 99 - name: FAMILIES_MONTH @@ -646,11 +649,11 @@ segments: start: 96 end: 103 type: integer - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 9999999 # Stratum 5, Month A @@ -672,7 +675,7 @@ segments: start: 103 end: 104 type: string - required: false + required: true field_validators: - id: in_values params: @@ -684,11 +687,11 @@ segments: start: 104 end: 106 type: string - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 99 - name: FAMILIES_MONTH @@ -697,11 +700,11 @@ segments: start: 106 end: 113 type: integer - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 9999999 # Stratum 5, Month B @@ -723,7 +726,7 @@ segments: start: 103 end: 104 type: string - required: false + required: true field_validators: - id: in_values params: @@ -735,11 +738,11 @@ segments: start: 104 end: 106 type: string - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 99 - name: FAMILIES_MONTH @@ -748,11 +751,11 @@ segments: start: 113 end: 120 type: integer - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 9999999 # Stratum 5, Month C @@ -774,7 +777,7 @@ segments: start: 103 end: 104 type: string - required: false + required: true field_validators: - id: in_values params: @@ -786,11 +789,11 @@ segments: start: 104 end: 106 type: string - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 99 - name: FAMILIES_MONTH @@ -799,11 +802,11 @@ segments: start: 120 end: 127 type: integer - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 9999999 # Stratum 6, Month A @@ -825,7 +828,7 @@ segments: start: 127 end: 128 type: string - required: false + required: true field_validators: - id: in_values params: @@ -837,11 +840,11 @@ segments: start: 128 end: 130 type: string - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 99 - name: FAMILIES_MONTH @@ -850,11 +853,11 @@ segments: start: 130 end: 137 type: integer - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 9999999 # Stratum 6, Month B @@ -876,7 +879,7 @@ segments: start: 127 end: 128 type: string - required: false + required: true field_validators: - id: in_values params: @@ -888,11 +891,11 @@ segments: start: 128 end: 130 type: string - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 99 - name: FAMILIES_MONTH @@ -901,11 +904,11 @@ segments: start: 137 end: 144 type: integer - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 9999999 # Stratum 6, Month C @@ -927,7 +930,7 @@ segments: start: 127 end: 128 type: string - required: false + required: true field_validators: - id: in_values params: @@ -939,11 +942,11 @@ segments: start: 128 end: 130 type: string - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 99 - name: FAMILIES_MONTH @@ -952,11 +955,11 @@ segments: start: 144 end: 151 type: integer - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 9999999 # Stratum 7, Month A @@ -978,7 +981,7 @@ segments: start: 151 end: 152 type: string - required: false + required: true field_validators: - id: in_values params: @@ -990,11 +993,11 @@ segments: start: 152 end: 154 type: string - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 99 - name: FAMILIES_MONTH @@ -1003,11 +1006,11 @@ segments: start: 154 end: 161 type: integer - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 9999999 # Stratum 7, Month B @@ -1029,7 +1032,7 @@ segments: start: 151 end: 152 type: string - required: false + required: true field_validators: - id: in_values params: @@ -1041,11 +1044,11 @@ segments: start: 152 end: 154 type: string - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 99 - name: FAMILIES_MONTH @@ -1054,11 +1057,11 @@ segments: start: 161 end: 168 type: integer - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 9999999 # Stratum 7, Month C @@ -1080,7 +1083,7 @@ segments: start: 151 end: 152 type: string - required: false + required: true field_validators: - id: in_values params: @@ -1092,11 +1095,11 @@ segments: start: 152 end: 154 type: string - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 99 - name: FAMILIES_MONTH @@ -1105,11 +1108,11 @@ segments: start: 168 end: 175 type: integer - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 9999999 # Stratum 8, Month A @@ -1131,7 +1134,7 @@ segments: start: 175 end: 176 type: string - required: false + required: true field_validators: - id: in_values params: @@ -1143,11 +1146,11 @@ segments: start: 176 end: 178 type: string - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 99 - name: FAMILIES_MONTH @@ -1156,11 +1159,11 @@ segments: start: 178 end: 185 type: integer - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 9999999 # Stratum 8, Month B @@ -1182,7 +1185,7 @@ segments: start: 175 end: 176 type: string - required: false + required: true field_validators: - id: in_values params: @@ -1194,11 +1197,11 @@ segments: start: 176 end: 178 type: string - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 99 - name: FAMILIES_MONTH @@ -1207,11 +1210,11 @@ segments: start: 185 end: 192 type: integer - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 9999999 # Stratum 8, Month C @@ -1233,7 +1236,7 @@ segments: start: 175 end: 176 type: string - required: false + required: true field_validators: - id: in_values params: @@ -1245,11 +1248,11 @@ segments: start: 176 end: 178 type: string - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 99 - name: FAMILIES_MONTH @@ -1258,11 +1261,11 @@ segments: start: 192 end: 199 type: integer - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 9999999 # Stratum 9, Month A @@ -1284,7 +1287,7 @@ segments: start: 199 end: 200 type: string - required: false + required: true field_validators: - id: in_values params: @@ -1296,11 +1299,11 @@ segments: start: 200 end: 202 type: string - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 99 - name: FAMILIES_MONTH @@ -1309,11 +1312,11 @@ segments: start: 202 end: 209 type: integer - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 9999999 # Stratum 9, Month B @@ -1335,7 +1338,7 @@ segments: start: 199 end: 200 type: string - required: false + required: true field_validators: - id: in_values params: @@ -1347,11 +1350,11 @@ segments: start: 200 end: 202 type: string - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 99 - name: FAMILIES_MONTH @@ -1360,11 +1363,11 @@ segments: start: 209 end: 216 type: integer - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 9999999 # Stratum 9, Month C @@ -1386,7 +1389,7 @@ segments: start: 199 end: 200 type: string - required: false + required: true field_validators: - id: in_values params: @@ -1398,11 +1401,11 @@ segments: start: 200 end: 202 type: string - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 99 - name: FAMILIES_MONTH @@ -1411,11 +1414,11 @@ segments: start: 216 end: 223 type: integer - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 9999999 # Stratum 10, Month A @@ -1437,7 +1440,7 @@ segments: start: 223 end: 224 type: string - required: false + required: true field_validators: - id: in_values params: @@ -1449,11 +1452,11 @@ segments: start: 224 end: 226 type: string - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 99 - name: FAMILIES_MONTH @@ -1462,11 +1465,11 @@ segments: start: 226 end: 233 type: integer - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 9999999 # Stratum 10, Month B @@ -1488,7 +1491,7 @@ segments: start: 223 end: 224 type: string - required: false + required: true field_validators: - id: in_values params: @@ -1500,11 +1503,11 @@ segments: start: 224 end: 226 type: string - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 99 - name: FAMILIES_MONTH @@ -1513,11 +1516,11 @@ segments: start: 233 end: 240 type: integer - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 9999999 # Stratum 10, Month C @@ -1539,7 +1542,7 @@ segments: start: 223 end: 224 type: string - required: false + required: true field_validators: - id: in_values params: @@ -1551,11 +1554,11 @@ segments: start: 224 end: 226 type: string - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 99 - name: FAMILIES_MONTH @@ -1564,9 +1567,9 @@ segments: start: 240 end: 247 type: integer - required: false + required: true field_validators: - id: in_range_int params: - min: 0 + min: 1 max: 9999999 diff --git a/tdrs-services/parser/config/schemas/tribal_tanf/t1.yaml b/tdrs-services/parser/config/schemas/tribal_tanf/t1.yaml index bc1c6f73a1..d5b1e5edcf 100644 --- a/tdrs-services/parser/config/schemas/tribal_tanf/t1.yaml +++ b/tdrs-services/parser/config/schemas/tribal_tanf/t1.yaml @@ -11,6 +11,90 @@ section: 1 document: "Tribal TANF Active Case Data" format: positional +record_validators: + - id: record_length_min + params: + min: 117 + - id: case_number_not_empty + - id: rpt_month_year_is_valid + - id: rpt_month_year_matches_header_year_quarter + + # Record validators with VALUE_CONSISTENCY error type (cross-field validation) + # Amount requires positive validators + - id: amount_requires_positive + params: + amount_field: CASH_AMOUNT + required_field: NBR_MONTHS + - id: amount_requires_positive + params: + amount_field: CC_AMOUNT + required_field: CHILDREN_COVERED + - id: amount_requires_positive + params: + amount_field: CC_AMOUNT + required_field: CC_NBR_MONTHS + - id: amount_requires_positive + params: + amount_field: TRANSP_AMOUNT + required_field: TRANSP_NBR_MONTHS + - id: amount_requires_positive + params: + amount_field: TRANSITION_SERVICES_AMOUNT + required_field: TRANSITION_NBR_MONTHS + - id: amount_requires_positive + params: + amount_field: OTHER_AMOUNT + required_field: OTHER_NBR_MONTHS + # Sanction reduction amount validators + - id: amount_requires_value_in + params: + amount_field: SANC_REDUCTION_AMT + required_field: WORK_REQ_SANCTION + values: [1, 2] + - id: amount_requires_value_in + params: + amount_field: SANC_REDUCTION_AMT + required_field: FAMILY_SANC_ADULT + values: [1, 2] + - id: amount_requires_value_in + params: + amount_field: SANC_REDUCTION_AMT + required_field: SANC_TEEN_PARENT + values: [1, 2] + - id: amount_requires_value_in + params: + amount_field: SANC_REDUCTION_AMT + required_field: NON_COOPERATION_CSE + values: [1, 2] + - id: amount_requires_value_in + params: + amount_field: SANC_REDUCTION_AMT + required_field: FAILURE_TO_COMPLY + values: [1, 2] + - id: amount_requires_value_in + params: + amount_field: SANC_REDUCTION_AMT + required_field: OTHER_SANCTION + values: [1, 2] + # Other total reductions validators + - id: amount_requires_value_in + params: + amount_field: OTHER_TOTAL_REDUCTIONS + required_field: FAMILY_CAP + values: [1, 2] + - id: amount_requires_value_in + params: + amount_field: OTHER_TOTAL_REDUCTIONS + required_field: REDUCTIONS_ON_RECEIPTS + values: [1, 2] + - id: amount_requires_value_in + params: + amount_field: OTHER_TOTAL_REDUCTIONS + required_field: OTHER_NON_SANCTION + values: [1, 2] + # Sum validator + - id: t1_sum_assistance_positive + # Fields shared across all segments (included in every output row) shared: - name: RecordType @@ -47,6 +131,8 @@ segments: end: 22 type: string required: false + field_validators: + - id: is_numeric transform: name: zero_pad params: @@ -59,6 +145,11 @@ segments: end: 24 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: ZIP_CODE friendly_name: ZIP Code @@ -67,6 +158,8 @@ segments: end: 29 type: string required: true + field_validators: + - id: is_numeric - name: FUNDING_STREAM friendly_name: Funding Stream @@ -75,6 +168,10 @@ segments: end: 30 type: integer required: true + field_validators: + - id: in_values + params: + values: [1, 2] - name: DISPOSITION friendly_name: Disposition @@ -83,6 +180,10 @@ segments: end: 31 type: integer required: true + field_validators: + - id: equals + params: + value: 1 - name: NEW_APPLICANT friendly_name: New Applicant @@ -91,6 +192,10 @@ segments: end: 32 type: integer required: true + field_validators: + - id: in_values + params: + values: [1, 2] - name: NBR_FAMILY_MEMBERS friendly_name: Number of Family Members @@ -99,6 +204,11 @@ segments: end: 34 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 1 + max: 99 - name: FAMILY_TYPE friendly_name: Type of Family for Work Participation @@ -107,6 +217,10 @@ segments: end: 35 type: integer required: true + field_validators: + - id: in_values + params: + values: [1, 2, 3] - name: RECEIVES_SUB_HOUSING friendly_name: Receives Subsidized Housing @@ -115,6 +229,11 @@ segments: end: 36 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 1 + max: 3 - name: RECEIVES_MED_ASSISTANCE friendly_name: Receives Medical Assistance @@ -123,6 +242,10 @@ segments: end: 37 type: integer required: true + field_validators: + - id: in_values + params: + values: [1, 2] - name: RECEIVES_FOOD_STAMPS friendly_name: Receives Food Stamps @@ -131,6 +254,10 @@ segments: end: 38 type: integer required: false + field_validators: + - id: in_values + params: + values: [0, 1, 2] - name: AMT_FOOD_STAMP_ASSISTANCE friendly_name: Food Stamp Assistance @@ -139,6 +266,11 @@ segments: end: 42 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999 - name: RECEIVES_SUB_CC friendly_name: Receives Subsidized Child Care @@ -147,6 +279,10 @@ segments: end: 43 type: integer required: false + field_validators: + - id: in_values + params: + values: [0, 1, 2, 3] - name: AMT_SUB_CC friendly_name: Subsidized Child Care @@ -155,6 +291,11 @@ segments: end: 47 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999 - name: CHILD_SUPPORT_AMT friendly_name: Amount of Child Support @@ -163,6 +304,11 @@ segments: end: 51 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999 - name: FAMILY_CASH_RESOURCES friendly_name: Amount of the Family's Cash Resources @@ -171,6 +317,11 @@ segments: end: 55 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999 - name: CASH_AMOUNT friendly_name: Cash @@ -179,6 +330,11 @@ segments: end: 59 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999 - name: NBR_MONTHS friendly_name: Number of Months @@ -187,6 +343,11 @@ segments: end: 62 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 999 - name: CC_AMOUNT friendly_name: TANF Child Care @@ -195,6 +356,11 @@ segments: end: 66 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999 - name: CHILDREN_COVERED friendly_name: TANF Child Care Number of Children Covered @@ -203,6 +369,11 @@ segments: end: 68 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: CC_NBR_MONTHS friendly_name: TANF Child Care Number of Months @@ -211,6 +382,11 @@ segments: end: 71 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 999 - name: TRANSP_AMOUNT friendly_name: Transportation @@ -219,6 +395,11 @@ segments: end: 75 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999 - name: TRANSP_NBR_MONTHS friendly_name: Transportation Number of Months @@ -227,6 +408,11 @@ segments: end: 78 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 999 - name: TRANSITION_SERVICES_AMOUNT friendly_name: Transitional Services @@ -235,6 +421,11 @@ segments: end: 82 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999 - name: TRANSITION_NBR_MONTHS friendly_name: Transitional Services Number of Months @@ -243,6 +434,11 @@ segments: end: 85 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 999 - name: OTHER_AMOUNT friendly_name: Other @@ -251,6 +447,11 @@ segments: end: 89 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999 - name: OTHER_NBR_MONTHS friendly_name: Other Number of Months @@ -259,6 +460,11 @@ segments: end: 92 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 999 - name: SANC_REDUCTION_AMT friendly_name: Total Dollar Amount of Reductions due to Sanctions @@ -267,6 +473,11 @@ segments: end: 96 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999 - name: WORK_REQ_SANCTION friendly_name: Work Requirements Sanction @@ -275,6 +486,10 @@ segments: end: 97 type: integer required: true + field_validators: + - id: in_values + params: + values: [1, 2] - name: FAMILY_SANC_ADULT friendly_name: Family Sanction for Adult with No High School Diploma @@ -283,6 +498,10 @@ segments: end: 98 type: integer required: false + field_validators: + - id: in_values + params: + values: [0, 1, 2] - name: SANC_TEEN_PARENT friendly_name: Sanction for Teen Parent not Attending School @@ -291,6 +510,10 @@ segments: end: 99 type: integer required: true + field_validators: + - id: in_values + params: + values: [1, 2] - name: NON_COOPERATION_CSE friendly_name: Non-Cooperation with Child Support @@ -299,6 +522,10 @@ segments: end: 100 type: integer required: true + field_validators: + - id: in_values + params: + values: [1, 2] - name: FAILURE_TO_COMPLY friendly_name: Failure to comply with Individual Responsibility Plan @@ -307,6 +534,10 @@ segments: end: 101 type: integer required: true + field_validators: + - id: in_values + params: + values: [1, 2] - name: OTHER_SANCTION friendly_name: Other Sanction @@ -315,6 +546,10 @@ segments: end: 102 type: integer required: true + field_validators: + - id: in_values + params: + values: [1, 2] - name: RECOUPMENT_PRIOR_OVRPMT friendly_name: Recoupment of Prior Overpayment @@ -323,6 +558,11 @@ segments: end: 106 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999 - name: OTHER_TOTAL_REDUCTIONS friendly_name: Total Dollar Amount of Reductions due to Other Reasons @@ -331,6 +571,11 @@ segments: end: 110 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999 - name: FAMILY_CAP friendly_name: Family Cap @@ -339,6 +584,10 @@ segments: end: 111 type: integer required: true + field_validators: + - id: in_values + params: + values: [1, 2] - name: REDUCTIONS_ON_RECEIPTS friendly_name: Reduction Based on Length of Receipt of Assistance @@ -347,6 +596,10 @@ segments: end: 112 type: integer required: true + field_validators: + - id: in_values + params: + values: [1, 2] - name: OTHER_NON_SANCTION friendly_name: Other, Non-sanction @@ -355,6 +608,10 @@ segments: end: 113 type: integer required: true + field_validators: + - id: in_values + params: + values: [1, 2] - name: WAIVER_EVAL_CONTROL_GRPS friendly_name: Waiver Evaluation Control Groups @@ -363,6 +620,11 @@ segments: end: 114 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 9 - name: FAMILY_EXEMPT_TIME_LIMITS friendly_name: Exempt from Tribal Time-Limit Provisions @@ -371,6 +633,11 @@ segments: end: 116 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 9 - name: FAMILY_NEW_CHILD friendly_name: A New Child-Only Family @@ -379,3 +646,7 @@ segments: end: 117 type: integer required: false + field_validators: + - id: in_values + params: + values: [0, 1, 2] diff --git a/tdrs-services/parser/config/schemas/tribal_tanf/t2.yaml b/tdrs-services/parser/config/schemas/tribal_tanf/t2.yaml index 511ca2b002..5ab14ed065 100644 --- a/tdrs-services/parser/config/schemas/tribal_tanf/t2.yaml +++ b/tdrs-services/parser/config/schemas/tribal_tanf/t2.yaml @@ -11,6 +11,115 @@ section: 1 document: "Tribal TANF Active Case Data" format: positional +record_validators: + - id: record_length_min + params: + min: 122 + - id: case_number_not_empty + - id: rpt_month_year_is_valid + - id: rpt_month_year_matches_header_year_quarter + + # Record validators with VALUE_CONSISTENCY error type (cross-field validation) + # Race fields when FAMILY_AFFILIATION is 1-3 + - id: ifthenalso_range_to_range + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 3 + target_field: RACE_HISPANIC + target_min: 1 + target_max: 2 + - id: ifthenalso_range_to_range + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 3 + target_field: RACE_AMER_INDIAN + target_min: 1 + target_max: 2 + - id: ifthenalso_range_to_range + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 3 + target_field: RACE_ASIAN + target_min: 1 + target_max: 2 + - id: ifthenalso_range_to_range + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 3 + target_field: RACE_BLACK + target_min: 1 + target_max: 2 + - id: ifthenalso_range_to_range + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 3 + target_field: RACE_HAWAIIAN + target_min: 1 + target_max: 2 + - id: ifthenalso_range_to_range + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 3 + target_field: RACE_WHITE + target_min: 1 + target_max: 2 + # MARITAL_STATUS when FAMILY_AFFILIATION is 1-3 + - id: ifthenalso_range_to_range + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 3 + target_field: MARITAL_STATUS + target_min: 1 + target_max: 5 + # PARENT_MINOR_CHILD when FAMILY_AFFILIATION is 1-2 + - id: ifthenalso_range_to_range + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 2 + target_field: PARENT_MINOR_CHILD + target_min: 1 + target_max: 3 + - id: t2_family_affil_2_3_education_level + - id: ifthenalso_range_to_not_values + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 1 + target_field: EDUCATION_LEVEL + excluded_values: [0, 99] + - id: ifthenalso_range_to_values + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 1 + target_field: CITIZENSHIP_STATUS + values: [1, 2] + - id: ifthenalso_range_to_values + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 3 + target_field: COOPERATION_CHILD_SUPPORT + values: [1, 2, 9] + # EMPLOYMENT_STATUS when FAMILY_AFFILIATION is 1-3 + - id: ifthenalso_range_to_range + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 3 + target_field: EMPLOYMENT_STATUS + target_min: 1 + target_max: 3 + - id: tribal_t2_family_affil_1_2_work_part_status + # Fields shared across all segments (included in every output row) shared: - name: RecordType @@ -47,6 +156,10 @@ segments: end: 20 type: integer required: true + field_validators: + - id: in_values + params: + values: [1, 2, 3, 5] - name: NONCUSTODIAL_PARENT friendly_name: Noncustodial Parent Indicator @@ -55,6 +168,10 @@ segments: end: 21 type: integer required: true + field_validators: + - id: in_values + params: + values: [1, 2] - name: DATE_OF_BIRTH friendly_name: Date of Birth @@ -63,6 +180,14 @@ segments: end: 29 type: string required: true + field_validators: + - id: length + params: + n: 8 + - id: valid_year + - id: valid_month + - id: valid_day + - id: is_numeric - name: SSN friendly_name: Social Security Number @@ -71,6 +196,8 @@ segments: end: 38 type: string required: true + field_validators: + - id: is_numeric transform: name: ssn_decrypt @@ -81,6 +208,11 @@ segments: end: 39 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 2 - name: RACE_AMER_INDIAN friendly_name: American Indian or Alaska Native @@ -89,6 +221,11 @@ segments: end: 40 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 2 - name: RACE_ASIAN friendly_name: Asian @@ -97,6 +234,11 @@ segments: end: 41 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 2 - name: RACE_BLACK friendly_name: Black or African American @@ -105,6 +247,11 @@ segments: end: 42 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 2 - name: RACE_HAWAIIAN friendly_name: Native Hawaiian or Pacific Islander @@ -113,6 +260,11 @@ segments: end: 43 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 2 - name: RACE_WHITE friendly_name: White @@ -121,6 +273,11 @@ segments: end: 44 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 2 - name: SEX friendly_name: Sex @@ -129,6 +286,8 @@ segments: end: 45 type: integer required: false + field_validators: + - id: not_negative - name: FED_OASDI_PROGRAM friendly_name: Receives Disability Benefits OASDI Program @@ -137,6 +296,10 @@ segments: end: 46 type: integer required: true + field_validators: + - id: in_values + params: + values: [1, 2] - name: FED_DISABILITY_STATUS friendly_name: Receives Disability Benefits Other Federal Disability Status @@ -145,6 +308,10 @@ segments: end: 47 type: integer required: true + field_validators: + - id: in_values + params: + values: [1, 2] - name: DISABLED_TITLE_XIVAPDT friendly_name: Receives Disability Benefits Permanently and Totally Disabled @@ -153,6 +320,10 @@ segments: end: 48 type: string required: true + field_validators: + - id: in_values_or_blank + params: + values: ["1", "2"] - name: AID_AGED_BLIND friendly_name: Receives Disability Benefits AABD @@ -161,6 +332,11 @@ segments: end: 49 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 2 - name: RECEIVE_SSI friendly_name: Receives Disability Benefits SSI @@ -169,6 +345,10 @@ segments: end: 50 type: integer required: true + field_validators: + - id: in_values + params: + values: [1, 2] - name: MARITAL_STATUS friendly_name: Marital Status @@ -177,6 +357,11 @@ segments: end: 51 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 5 - name: RELATIONSHIP_HOH friendly_name: Relationship to Head-of-Household @@ -185,6 +370,11 @@ segments: end: 53 type: string required: true + field_validators: + - id: in_range_int + params: + min: 1 + max: 10 - name: PARENT_MINOR_CHILD friendly_name: Parent with Minor Child in the Family @@ -193,6 +383,11 @@ segments: end: 54 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 3 - name: NEEDS_PREGNANT_WOMAN friendly_name: Needs of a Pregnant Woman @@ -201,6 +396,11 @@ segments: end: 55 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 2 - name: EDUCATION_LEVEL friendly_name: Educational Level @@ -209,6 +409,8 @@ segments: end: 57 type: string required: false + field_validators: + - id: education_level - name: CITIZENSHIP_STATUS friendly_name: Citizenship/Alienage @@ -217,6 +419,10 @@ segments: end: 58 type: integer required: false + field_validators: + - id: in_values + params: + values: [1, 2, 9] - name: COOPERATION_CHILD_SUPPORT friendly_name: Cooperation with Child Support @@ -225,6 +431,10 @@ segments: end: 59 type: integer required: false + field_validators: + - id: in_values + params: + values: [0, 1, 2, 9] - name: MONTHS_FED_TIME_LIMIT friendly_name: Number of Months Countable toward Tribal Time Limit @@ -233,6 +443,11 @@ segments: end: 62 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 999 - name: MONTHS_STATE_TIME_LIMIT friendly_name: Number of Countable Months Remaining Under Tribe's Time Limit @@ -241,6 +456,11 @@ segments: end: 64 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: CURRENT_MONTH_STATE_EXEMPT friendly_name: Current Month Exempt from Tribe's Time Limit @@ -249,6 +469,11 @@ segments: end: 65 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 2 - name: EMPLOYMENT_STATUS friendly_name: Employment Status @@ -257,6 +482,11 @@ segments: end: 66 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 3 - name: WORK_PART_STATUS friendly_name: Work Participation Status @@ -265,6 +495,10 @@ segments: end: 68 type: string required: false + field_validators: + - id: in_values + params: + values: ["00", "01", "02", "03", "05", "06", "07", "08", "09", "11", "12", "13", "14", "15", "16", "17", "18", "19", "99"] - name: UNSUB_EMPLOYMENT friendly_name: Unsubsidized Employment @@ -273,6 +507,11 @@ segments: end: 70 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: SUB_PRIVATE_EMPLOYMENT friendly_name: Subsidized Private-Sector Employment @@ -281,6 +520,11 @@ segments: end: 72 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: SUB_PUBLIC_EMPLOYMENT friendly_name: Subsidized Public-Sector Employment @@ -289,6 +533,11 @@ segments: end: 74 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: WORK_EXPERIENCE friendly_name: Work Experience @@ -297,6 +546,11 @@ segments: end: 76 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: OJT friendly_name: On-the-job Training @@ -305,6 +559,11 @@ segments: end: 78 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: JOB_SEARCH friendly_name: Job Search and Job Readiness @@ -313,6 +572,11 @@ segments: end: 80 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: COMM_SERVICES friendly_name: Community Service Programs @@ -321,6 +585,11 @@ segments: end: 82 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: VOCATIONAL_ED_TRAINING friendly_name: Vocational Educational Training @@ -329,6 +598,11 @@ segments: end: 84 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: JOB_SKILLS_TRAINING friendly_name: Job Skills Training @@ -337,6 +611,11 @@ segments: end: 86 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: ED_NO_HIGH_SCHOOL_DIPLOMA friendly_name: Education Directly Related to Employment @@ -345,6 +624,11 @@ segments: end: 88 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: SCHOOL_ATTENDENCE friendly_name: Satisfactory School Attendance @@ -353,6 +637,11 @@ segments: end: 90 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: PROVIDE_CC friendly_name: Providing Child Care @@ -361,6 +650,11 @@ segments: end: 92 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: ADD_WORK_ACTIVITIES friendly_name: Additional Work Activities @@ -369,6 +663,10 @@ segments: end: 94 type: string required: false + field_validators: + - id: equals + params: + value: "00" transform: name: zero_pad params: @@ -381,6 +679,11 @@ segments: end: 96 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: REQ_HRS_WAIVER_DEMO friendly_name: Required Hours of Work under Waiver Demonstration @@ -389,6 +692,11 @@ segments: end: 98 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: EARNED_INCOME friendly_name: Amount of Earned Income @@ -397,6 +705,11 @@ segments: end: 102 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999 - name: UNEARNED_INCOME_TAX_CREDIT friendly_name: Amount of Unearned Income @@ -405,6 +718,11 @@ segments: end: 106 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999 - name: UNEARNED_SOCIAL_SECURITY friendly_name: Amount of Unearned Income Social Security @@ -413,6 +731,11 @@ segments: end: 110 type: string required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999 - name: UNEARNED_SSI friendly_name: Amount of Unearned Income SSI @@ -421,6 +744,11 @@ segments: end: 114 type: string required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999 - name: UNEARNED_WORKERS_COMP friendly_name: Amount of Unearned Income Workers Compensation @@ -429,6 +757,11 @@ segments: end: 118 type: string required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999 - name: OTHER_UNEARNED_INCOME friendly_name: Amount of Unearned Income Other @@ -437,3 +770,8 @@ segments: end: 122 type: string required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999 diff --git a/tdrs-services/parser/config/schemas/tribal_tanf/t3.yaml b/tdrs-services/parser/config/schemas/tribal_tanf/t3.yaml index 7d022e4bc8..638fd0f6cc 100644 --- a/tdrs-services/parser/config/schemas/tribal_tanf/t3.yaml +++ b/tdrs-services/parser/config/schemas/tribal_tanf/t3.yaml @@ -11,6 +11,103 @@ section: 1 document: "Tribal TANF Active Case Data" format: positional +record_validators: + - id: case_number_not_empty + - id: rpt_month_year_is_valid + - id: rpt_month_year_matches_header_year_quarter + + # Record validators with VALUE_CONSISTENCY error type (cross-field validation) + # Race fields when FAMILY_AFFILIATION is 1-2 + - id: ifthenalso_range_to_range + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 2 + target_field: RACE_HISPANIC + target_min: 1 + target_max: 2 + - id: ifthenalso_range_to_range + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 2 + target_field: RACE_AMER_INDIAN + target_min: 1 + target_max: 2 + - id: ifthenalso_range_to_range + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 2 + target_field: RACE_ASIAN + target_min: 1 + target_max: 2 + - id: ifthenalso_range_to_range + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 2 + target_field: RACE_BLACK + target_min: 1 + target_max: 2 + - id: ifthenalso_range_to_range + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 2 + target_field: RACE_HAWAIIAN + target_min: 1 + target_max: 2 + - id: ifthenalso_range_to_range + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 2 + target_field: RACE_WHITE + target_min: 1 + target_max: 2 + # RELATIONSHIP_HOH when FAMILY_AFFILIATION is 1-2 + - id: ifthenalso_range_to_range + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 2 + target_field: RELATIONSHIP_HOH + target_min: 4 + target_max: 9 + # PARENT_MINOR_CHILD when FAMILY_AFFILIATION is 1-2 + - id: ifthenalso_range_to_values + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 2 + target_field: PARENT_MINOR_CHILD + values: [2, 3] + # EDUCATION_LEVEL when FAMILY_AFFILIATION is 1 must not be 99 + - id: ifthenalso_range_to_not_values + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 1 + target_field: EDUCATION_LEVEL + excluded_values: [99] + # CITIZENSHIP_STATUS when FAMILY_AFFILIATION is 1 + - id: ifthenalso_range_to_values + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 1 + target_field: CITIZENSHIP_STATUS + values: [1, 2] + # CITIZENSHIP_STATUS when FAMILY_AFFILIATION is 2 + - id: ifthenalso_range_to_values + params: + condition_field: FAMILY_AFFILIATION + condition_min: 2 + condition_max: 2 + target_field: CITIZENSHIP_STATUS + values: [1, 2, 9] + # Fields shared across all segments (included in every output row) shared: - name: RecordType @@ -49,6 +146,10 @@ segments: end: 20 type: integer required: true + field_validators: + - id: in_values + params: + values: [1, 2, 4] - name: DATE_OF_BIRTH friendly_name: Date of Birth @@ -57,6 +158,14 @@ segments: end: 28 type: string required: true + field_validators: + - id: length + params: + n: 8 + - id: valid_year + - id: valid_month + - id: valid_day + - id: is_numeric - name: SSN friendly_name: Social Security Number @@ -65,6 +174,8 @@ segments: end: 37 type: string required: true + field_validators: + - id: is_numeric transform: name: ssn_decrypt @@ -75,6 +186,8 @@ segments: end: 38 type: integer required: false + field_validators: + - id: validate_race - name: RACE_AMER_INDIAN friendly_name: American Indian or Alaska Native @@ -83,6 +196,8 @@ segments: end: 39 type: integer required: false + field_validators: + - id: validate_race - name: RACE_ASIAN friendly_name: Asian @@ -91,6 +206,8 @@ segments: end: 40 type: integer required: false + field_validators: + - id: validate_race - name: RACE_BLACK friendly_name: Black or African American @@ -99,6 +216,8 @@ segments: end: 41 type: integer required: false + field_validators: + - id: validate_race - name: RACE_HAWAIIAN friendly_name: Native Hawaiian or Pacific Islander @@ -107,6 +226,8 @@ segments: end: 42 type: integer required: false + field_validators: + - id: validate_race - name: RACE_WHITE friendly_name: White @@ -115,6 +236,8 @@ segments: end: 43 type: integer required: false + field_validators: + - id: validate_race - name: SEX friendly_name: Sex @@ -123,6 +246,11 @@ segments: end: 44 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 9 - name: RECEIVE_NONSSA_BENEFITS friendly_name: Receives Federal Disability Benefits @@ -131,6 +259,10 @@ segments: end: 45 type: integer required: true + field_validators: + - id: in_values + params: + values: [1, 2] - name: RECEIVE_SSI friendly_name: Receives SSI @@ -139,6 +271,10 @@ segments: end: 46 type: integer required: true + field_validators: + - id: in_values + params: + values: [1, 2] - name: RELATIONSHIP_HOH friendly_name: Relationship to Head-of-Household @@ -147,6 +283,11 @@ segments: end: 48 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 10 - name: PARENT_MINOR_CHILD friendly_name: Parent with Minor Child in the Family @@ -155,6 +296,10 @@ segments: end: 49 type: integer required: false + field_validators: + - id: in_values + params: + values: [0, 2, 3] - name: EDUCATION_LEVEL friendly_name: Educational Level @@ -163,6 +308,8 @@ segments: end: 51 type: string required: true + field_validators: + - id: education_level - name: CITIZENSHIP_STATUS friendly_name: Citizenship/Alienage @@ -171,6 +318,10 @@ segments: end: 52 type: integer required: false + field_validators: + - id: in_values + params: + values: [1, 2, 9] - name: UNEARNED_SSI friendly_name: Amount of Unearned Income SSI @@ -179,6 +330,11 @@ segments: end: 56 type: string required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999 - name: OTHER_UNEARNED_INCOME friendly_name: Amount of Unearned Income Other @@ -187,7 +343,11 @@ segments: end: 60 type: string required: true - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999 # Child 2 (optional - only if present in the line) - fields: - name: FAMILY_AFFILIATION @@ -197,6 +357,10 @@ segments: end: 61 type: integer required: true + field_validators: + - id: in_values + params: + values: [1, 2, 4] - name: DATE_OF_BIRTH friendly_name: Date of Birth @@ -205,6 +369,14 @@ segments: end: 69 type: string required: true + field_validators: + - id: length + params: + n: 8 + - id: valid_year + - id: valid_month + - id: valid_day + - id: is_numeric - name: SSN friendly_name: Social Security Number @@ -213,6 +385,8 @@ segments: end: 78 type: string required: true + field_validators: + - id: is_numeric transform: name: ssn_decrypt @@ -223,6 +397,8 @@ segments: end: 79 type: integer required: false + field_validators: + - id: validate_race - name: RACE_AMER_INDIAN friendly_name: American Indian or Alaska Native @@ -231,6 +407,8 @@ segments: end: 80 type: integer required: false + field_validators: + - id: validate_race - name: RACE_ASIAN friendly_name: Asian @@ -239,6 +417,8 @@ segments: end: 81 type: integer required: false + field_validators: + - id: validate_race - name: RACE_BLACK friendly_name: Black or African American @@ -247,6 +427,8 @@ segments: end: 82 type: integer required: false + field_validators: + - id: validate_race - name: RACE_HAWAIIAN friendly_name: Native Hawaiian or Pacific Islander @@ -255,6 +437,8 @@ segments: end: 83 type: integer required: false + field_validators: + - id: validate_race - name: RACE_WHITE friendly_name: White @@ -263,6 +447,8 @@ segments: end: 84 type: integer required: false + field_validators: + - id: validate_race - name: SEX friendly_name: Sex @@ -271,6 +457,11 @@ segments: end: 85 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 9 - name: RECEIVE_NONSSA_BENEFITS friendly_name: Receives Federal Disability Benefits @@ -279,6 +470,10 @@ segments: end: 86 type: integer required: true + field_validators: + - id: in_values + params: + values: [1, 2] - name: RECEIVE_SSI friendly_name: Receives SSI @@ -287,6 +482,10 @@ segments: end: 87 type: integer required: true + field_validators: + - id: in_values + params: + values: [1, 2] - name: RELATIONSHIP_HOH friendly_name: Relationship to Head-of-Household @@ -295,6 +494,11 @@ segments: end: 89 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 10 - name: PARENT_MINOR_CHILD friendly_name: Parent with Minor Child in the Family @@ -303,6 +507,10 @@ segments: end: 90 type: integer required: false + field_validators: + - id: in_values + params: + values: [0, 2, 3] - name: EDUCATION_LEVEL friendly_name: Educational Level @@ -311,6 +519,8 @@ segments: end: 92 type: string required: true + field_validators: + - id: education_level - name: CITIZENSHIP_STATUS friendly_name: Citizenship/Alienage @@ -319,6 +529,10 @@ segments: end: 93 type: integer required: false + field_validators: + - id: in_values + params: + values: [1, 2, 9] - name: UNEARNED_SSI friendly_name: Amount of Unearned Income SSI @@ -327,6 +541,11 @@ segments: end: 97 type: string required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999 - name: OTHER_UNEARNED_INCOME friendly_name: Amount of Unearned Income Other @@ -335,3 +554,8 @@ segments: end: 101 type: string required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999 diff --git a/tdrs-services/parser/config/schemas/tribal_tanf/t4.yaml b/tdrs-services/parser/config/schemas/tribal_tanf/t4.yaml index 47f67a545b..880ab9d482 100644 --- a/tdrs-services/parser/config/schemas/tribal_tanf/t4.yaml +++ b/tdrs-services/parser/config/schemas/tribal_tanf/t4.yaml @@ -10,6 +10,14 @@ section: 2 document: "Tribal TANF Closed Case Data" format: positional +record_validators: + - id: record_length_min + params: + min: 36 + - id: case_number_not_empty + - id: rpt_month_year_is_valid + - id: rpt_month_year_matches_header_year_quarter + # Fields shared across all segments (included in every output row) shared: - name: RecordType @@ -27,6 +35,8 @@ shared: end: 8 type: integer required: true + field_validators: + - id: year_after_1998 - name: CASE_NUMBER friendly_name: Case Number--TANF @@ -46,6 +56,8 @@ segments: end: 22 type: string required: false + field_validators: + - id: is_numeric transform: name: zero_pad params: @@ -58,6 +70,11 @@ segments: end: 24 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: ZIP_CODE friendly_name: ZIP Code @@ -66,6 +83,11 @@ segments: end: 29 type: string required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999 - name: DISPOSITION friendly_name: Disposition @@ -74,6 +96,10 @@ segments: end: 30 type: integer required: true + field_validators: + - id: in_values + params: + values: [1, 2] - name: CLOSURE_REASON friendly_name: Reason for Closure @@ -82,6 +108,8 @@ segments: end: 32 type: string required: true + field_validators: + - id: tribal_closure_reason - name: REC_SUB_HOUSING friendly_name: Received Subsidized Housing @@ -90,6 +118,11 @@ segments: end: 33 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 1 + max: 3 - name: REC_MED_ASSIST friendly_name: Received Medical Assistance @@ -98,6 +131,10 @@ segments: end: 34 type: integer required: true + field_validators: + - id: in_values + params: + values: [1, 2] - name: REC_FOOD_STAMPS friendly_name: Received SNAP Assistance @@ -106,6 +143,10 @@ segments: end: 35 type: integer required: true + field_validators: + - id: in_values + params: + values: [1, 2] - name: REC_SUB_CC friendly_name: Received Subsidized Child Care @@ -114,3 +155,7 @@ segments: end: 36 type: integer required: true + field_validators: + - id: in_values + params: + values: [1, 2, 3] diff --git a/tdrs-services/parser/config/schemas/tribal_tanf/t5.yaml b/tdrs-services/parser/config/schemas/tribal_tanf/t5.yaml index 1cf1a664c0..2f2cfeda20 100644 --- a/tdrs-services/parser/config/schemas/tribal_tanf/t5.yaml +++ b/tdrs-services/parser/config/schemas/tribal_tanf/t5.yaml @@ -10,6 +10,102 @@ section: 2 document: "Tribal TANF Closed Case Data" format: positional +record_validators: + - id: record_length_min + params: + min: 71 + - id: case_number_not_empty + - id: rpt_month_year_is_valid + - id: rpt_month_year_matches_header_year_quarter + + # Record validators with VALUE_CONSISTENCY error type (cross-field validation) + # Race fields when FAMILY_AFFILIATION is 1-3 + - id: ifthenalso_range_to_range + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 3 + target_field: RACE_HISPANIC + target_min: 1 + target_max: 2 + - id: ifthenalso_range_to_range + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 3 + target_field: RACE_AMER_INDIAN + target_min: 1 + target_max: 2 + - id: ifthenalso_range_to_range + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 3 + target_field: RACE_ASIAN + target_min: 1 + target_max: 2 + - id: ifthenalso_range_to_range + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 3 + target_field: RACE_BLACK + target_min: 1 + target_max: 2 + - id: ifthenalso_range_to_range + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 3 + target_field: RACE_HAWAIIAN + target_min: 1 + target_max: 2 + - id: ifthenalso_range_to_range + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 3 + target_field: RACE_WHITE + target_min: 1 + target_max: 2 + # MARITAL_STATUS when FAMILY_AFFILIATION is 1-3 + - id: ifthenalso_range_to_range + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 3 + target_field: MARITAL_STATUS + target_min: 1 + target_max: 5 + # PARENT_MINOR_CHILD when FAMILY_AFFILIATION is 1-2 + - id: ifthenalso_range_to_range + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 2 + target_field: PARENT_MINOR_CHILD + target_min: 1 + target_max: 3 + # EDUCATION_LEVEL when FAMILY_AFFILIATION is 1-3 must be 1-16 or 98-99 + - id: m5_family_affil_1_3_education_level + # CITIZENSHIP_STATUS when FAMILY_AFFILIATION is 1 + - id: ifthenalso_range_to_values + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 1 + target_field: CITIZENSHIP_STATUS + values: [1, 2] + # REC_FEDERAL_DISABILITY when FAMILY_AFFILIATION is 1 + - id: ifthenalso_range_to_range + params: + condition_field: FAMILY_AFFILIATION + condition_min: 1 + condition_max: 1 + target_field: REC_FEDERAL_DISABILITY + target_min: 1 + target_max: 2 + # Fields shared across all segments (included in every output row) shared: - name: RecordType @@ -27,6 +123,8 @@ shared: end: 8 type: integer required: true + field_validators: + - id: year_after_1998 - name: CASE_NUMBER friendly_name: Case Number--TANF @@ -46,6 +144,11 @@ segments: end: 20 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 1 + max: 5 - name: DATE_OF_BIRTH friendly_name: Date of Birth @@ -54,6 +157,14 @@ segments: end: 28 type: string required: true + field_validators: + - id: length + params: + n: 8 + - id: valid_year + - id: valid_month + - id: valid_day + - id: is_numeric - name: SSN friendly_name: Social Security Number @@ -62,6 +173,8 @@ segments: end: 37 type: string required: true + field_validators: + - id: is_numeric transform: name: ssn_decrypt @@ -72,6 +185,8 @@ segments: end: 38 type: integer required: false + field_validators: + - id: validate_race - name: RACE_AMER_INDIAN friendly_name: American Indian or Alaska Native @@ -80,6 +195,8 @@ segments: end: 39 type: integer required: false + field_validators: + - id: validate_race - name: RACE_ASIAN friendly_name: Asian @@ -88,6 +205,8 @@ segments: end: 40 type: integer required: false + field_validators: + - id: validate_race - name: RACE_BLACK friendly_name: Black or African American @@ -96,6 +215,8 @@ segments: end: 41 type: integer required: false + field_validators: + - id: validate_race - name: RACE_HAWAIIAN friendly_name: Hawaiian or Pacific Islander @@ -104,6 +225,8 @@ segments: end: 42 type: integer required: false + field_validators: + - id: validate_race - name: RACE_WHITE friendly_name: White @@ -112,6 +235,8 @@ segments: end: 43 type: integer required: false + field_validators: + - id: validate_race - name: SEX friendly_name: Sex @@ -120,6 +245,11 @@ segments: end: 44 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 9 - name: REC_OASDI_INSURANCE friendly_name: Received OASDI Insurance @@ -128,6 +258,11 @@ segments: end: 45 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 2 - name: REC_FEDERAL_DISABILITY friendly_name: Receives Federal Disability @@ -136,6 +271,11 @@ segments: end: 46 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 2 - name: REC_AID_TOTALLY_DISABLED friendly_name: Received Aid Totally Disabled @@ -144,6 +284,11 @@ segments: end: 47 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 2 - name: REC_AID_AGED_BLIND friendly_name: Received Aid Aged Blind @@ -152,6 +297,11 @@ segments: end: 48 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 2 - name: REC_SSI friendly_name: Received SSI @@ -160,6 +310,11 @@ segments: end: 49 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 2 - name: MARITAL_STATUS friendly_name: Marital Status @@ -168,6 +323,11 @@ segments: end: 50 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 5 - name: RELATIONSHIP_HOH friendly_name: Relationship to Head-of-Household @@ -176,6 +336,11 @@ segments: end: 52 type: string required: true + field_validators: + - id: in_range_int + params: + min: 1 + max: 10 - name: PARENT_MINOR_CHILD friendly_name: Parent with Minor Child in the Family @@ -184,6 +349,11 @@ segments: end: 53 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 2 - name: NEEDS_OF_PREGNANT_WOMAN friendly_name: Needs of a Pregnant Woman @@ -192,6 +362,11 @@ segments: end: 54 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 2 - name: EDUCATION_LEVEL friendly_name: Educational Level @@ -200,6 +375,8 @@ segments: end: 56 type: string required: false + field_validators: + - id: education_level - name: CITIZENSHIP_STATUS friendly_name: Citizenship/Alienage @@ -208,6 +385,10 @@ segments: end: 57 type: integer required: false + field_validators: + - id: in_values + params: + values: [1, 2, 9] - name: COUNTABLE_MONTH_FED_TIME friendly_name: Number of Months Countable Toward Tribal Time Limit @@ -216,6 +397,11 @@ segments: end: 60 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 999 - name: COUNTABLE_MONTHS_STATE_TRIBE friendly_name: Number of Countable Months Remaining Under Tribe's Time Limit @@ -224,6 +410,11 @@ segments: end: 62 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: EMPLOYMENT_STATUS friendly_name: Employment Status @@ -232,6 +423,11 @@ segments: end: 63 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 3 - name: AMOUNT_EARNED_INCOME friendly_name: Amount of Earned Income @@ -240,6 +436,11 @@ segments: end: 67 type: string required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999 - name: AMOUNT_UNEARNED_INCOME friendly_name: Amount of Unearned Income @@ -248,3 +449,8 @@ segments: end: 71 type: string required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999 diff --git a/tdrs-services/parser/config/schemas/tribal_tanf/t6.yaml b/tdrs-services/parser/config/schemas/tribal_tanf/t6.yaml index 2fc85b0d2d..1b8e04fc42 100644 --- a/tdrs-services/parser/config/schemas/tribal_tanf/t6.yaml +++ b/tdrs-services/parser/config/schemas/tribal_tanf/t6.yaml @@ -12,6 +12,28 @@ section: 3 document: "Tribal TANF Aggregate Data" format: positional +record_validators: + - id: record_length_min + params: + min: 379 + - id: rpt_month_year_matches_header_year_quarter + - id: calendar_quarter_is_valid + error_type: RECORD_PRE_CHECK + + # Sum validators + - id: sum_equals + params: + total_field: NUM_APPLICATIONS + component_fields: [NUM_APPROVED, NUM_DENIED] + - id: sum_equals + params: + total_field: NUM_FAMILIES + component_fields: [NUM_2_PARENTS, NUM_1_PARENTS, NUM_NO_PARENTS] + - id: sum_equals + params: + total_field: NUM_RECIPIENTS + component_fields: [NUM_ADULT_RECIPIENTS, NUM_CHILD_RECIPIENTS] + # Fields shared across all segments (included in every output row) shared: - name: RecordType @@ -29,6 +51,9 @@ shared: end: 7 type: integer required: true + field_validators: + - id: year_after_2019 + - id: calendar_quarter_is_valid # Each segment produces one output row (combined with shared fields) # T6 has 3 segments: Month A, Month B, Month C @@ -53,6 +78,11 @@ segments: end: 15 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_APPROVED friendly_name: Total Number of Approved Applications @@ -61,6 +91,11 @@ segments: end: 39 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_DENIED friendly_name: Total Number of Denied Applications @@ -69,6 +104,11 @@ segments: end: 63 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: ASSISTANCE friendly_name: Total Amount of Assistance @@ -77,6 +117,11 @@ segments: end: 91 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 999999999999 - name: NUM_FAMILIES friendly_name: Total Number of Families @@ -85,6 +130,11 @@ segments: end: 123 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_2_PARENTS friendly_name: Total Number of Two-parent Families @@ -93,6 +143,11 @@ segments: end: 147 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_1_PARENTS friendly_name: Total Number of One-Parent Families @@ -101,6 +156,11 @@ segments: end: 171 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_NO_PARENTS friendly_name: Total Number of No-Parent Families @@ -109,6 +169,11 @@ segments: end: 195 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_RECIPIENTS friendly_name: Total Number of Recipients @@ -117,6 +182,11 @@ segments: end: 219 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_ADULT_RECIPIENTS friendly_name: Total Number of Adult Recipients @@ -125,6 +195,11 @@ segments: end: 243 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_CHILD_RECIPIENTS friendly_name: Total Number of Child Recipients @@ -133,6 +208,11 @@ segments: end: 267 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_NONCUSTODIALS friendly_name: Total Number of Noncustodial Parents @@ -141,6 +221,11 @@ segments: end: 291 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_BIRTHS friendly_name: Total Number of Births @@ -149,6 +234,11 @@ segments: end: 315 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_OUTWEDLOCK_BIRTHS friendly_name: Total Number of Out-of-Wedlock Births @@ -157,6 +247,11 @@ segments: end: 339 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_CLOSED_CASES friendly_name: Total Number of Closed Cases @@ -165,7 +260,11 @@ segments: end: 363 type: integer required: true - + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 # Month B (second month of quarter) - fields: - name: RPT_MONTH_YEAR @@ -186,6 +285,11 @@ segments: end: 23 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_APPROVED friendly_name: Total Number of Approved Applications @@ -194,6 +298,11 @@ segments: end: 47 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_DENIED friendly_name: Total Number of Denied Applications @@ -202,6 +311,11 @@ segments: end: 71 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: ASSISTANCE friendly_name: Total Amount of Assistance @@ -210,6 +324,11 @@ segments: end: 103 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 999999999999 - name: NUM_FAMILIES friendly_name: Total Number of Families @@ -218,6 +337,11 @@ segments: end: 131 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_2_PARENTS friendly_name: Total Number of Two-parent Families @@ -226,6 +350,11 @@ segments: end: 155 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_1_PARENTS friendly_name: Total Number of One-Parent Families @@ -234,6 +363,11 @@ segments: end: 179 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_NO_PARENTS friendly_name: Total Number of No-Parent Families @@ -242,6 +376,11 @@ segments: end: 203 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_RECIPIENTS friendly_name: Total Number of Recipients @@ -250,6 +389,11 @@ segments: end: 227 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_ADULT_RECIPIENTS friendly_name: Total Number of Adult Recipients @@ -258,6 +402,11 @@ segments: end: 251 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_CHILD_RECIPIENTS friendly_name: Total Number of Child Recipients @@ -266,6 +415,11 @@ segments: end: 275 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_NONCUSTODIALS friendly_name: Total Number of Noncustodial Parents @@ -274,6 +428,11 @@ segments: end: 299 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_BIRTHS friendly_name: Total Number of Births @@ -282,6 +441,11 @@ segments: end: 323 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_OUTWEDLOCK_BIRTHS friendly_name: Total Number of Out-of-Wedlock Births @@ -290,6 +454,11 @@ segments: end: 347 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_CLOSED_CASES friendly_name: Total Number of Closed Cases @@ -298,7 +467,11 @@ segments: end: 371 type: integer required: true - + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 # Month C (third month of quarter) - fields: - name: RPT_MONTH_YEAR @@ -319,6 +492,11 @@ segments: end: 31 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_APPROVED friendly_name: Total Number of Approved Applications @@ -327,6 +505,11 @@ segments: end: 55 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_DENIED friendly_name: Total Number of Denied Applications @@ -335,6 +518,11 @@ segments: end: 79 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: ASSISTANCE friendly_name: Total Amount of Assistance @@ -343,6 +531,11 @@ segments: end: 115 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 999999999999 - name: NUM_FAMILIES friendly_name: Total Number of Families @@ -351,6 +544,11 @@ segments: end: 139 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_2_PARENTS friendly_name: Total Number of Two-parent Families @@ -359,6 +557,11 @@ segments: end: 163 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_1_PARENTS friendly_name: Total Number of One-parent Families @@ -367,6 +570,11 @@ segments: end: 187 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_NO_PARENTS friendly_name: Total Number of No-parent Families @@ -375,6 +583,11 @@ segments: end: 211 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_RECIPIENTS friendly_name: Total Number of Recipients @@ -383,6 +596,11 @@ segments: end: 235 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_ADULT_RECIPIENTS friendly_name: Total Number of Adult Recipients @@ -391,6 +609,11 @@ segments: end: 259 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_CHILD_RECIPIENTS friendly_name: Total Number of Child Recipients @@ -399,6 +622,11 @@ segments: end: 283 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_NONCUSTODIALS friendly_name: Total Number of Noncustodial Parents @@ -407,6 +635,11 @@ segments: end: 307 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_BIRTHS friendly_name: Total Number of Births @@ -415,6 +648,11 @@ segments: end: 331 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_OUTWEDLOCK_BIRTHS friendly_name: Total Number of Out-of-Wedlock Births @@ -423,6 +661,11 @@ segments: end: 355 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 - name: NUM_CLOSED_CASES friendly_name: Total Number of Closed Cases @@ -431,3 +674,8 @@ segments: end: 379 type: integer required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99999999 diff --git a/tdrs-services/parser/config/schemas/tribal_tanf/t7.yaml b/tdrs-services/parser/config/schemas/tribal_tanf/t7.yaml index 07f1b0217a..f964b940e1 100644 --- a/tdrs-services/parser/config/schemas/tribal_tanf/t7.yaml +++ b/tdrs-services/parser/config/schemas/tribal_tanf/t7.yaml @@ -11,6 +11,14 @@ section: 4 document: "Tribal TANF Stratum Data" format: positional +record_validators: + - id: record_length_min + params: + min: 247 + - id: rpt_month_year_matches_header_year_quarter + - id: calendar_quarter_is_valid + error_type: RECORD_PRE_CHECK + # Fields shared across all segments (included in every output row) shared: - name: RecordType @@ -28,6 +36,9 @@ shared: end: 7 type: integer required: true + field_validators: + - id: year_after_2019 + - id: calendar_quarter_is_valid # Each segment produces one output row (combined with shared fields) # T7 has 30 segments: 10 strata x 3 months (A, B, C) @@ -52,6 +63,10 @@ segments: end: 8 type: string required: true + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -60,6 +75,11 @@ segments: end: 10 type: string required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -68,7 +88,11 @@ segments: end: 17 type: integer required: true - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 1, Month B - fields: - name: RPT_MONTH_YEAR @@ -89,6 +113,10 @@ segments: end: 8 type: string required: true + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -97,6 +125,11 @@ segments: end: 10 type: string required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -105,7 +138,11 @@ segments: end: 24 type: integer required: true - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 1, Month C - fields: - name: RPT_MONTH_YEAR @@ -126,6 +163,10 @@ segments: end: 8 type: string required: true + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -134,6 +175,11 @@ segments: end: 10 type: string required: true + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -142,7 +188,11 @@ segments: end: 31 type: integer required: true - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 2, Month A - fields: - name: RPT_MONTH_YEAR @@ -163,6 +213,10 @@ segments: end: 32 type: string required: false + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -171,6 +225,11 @@ segments: end: 34 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -179,7 +238,11 @@ segments: end: 41 type: integer required: false - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 2, Month B - fields: - name: RPT_MONTH_YEAR @@ -200,6 +263,10 @@ segments: end: 32 type: string required: false + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -208,6 +275,11 @@ segments: end: 34 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -216,7 +288,11 @@ segments: end: 48 type: integer required: false - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 2, Month C - fields: - name: RPT_MONTH_YEAR @@ -237,6 +313,10 @@ segments: end: 32 type: string required: false + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -245,6 +325,11 @@ segments: end: 34 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -253,7 +338,11 @@ segments: end: 55 type: integer required: false - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 3, Month A - fields: - name: RPT_MONTH_YEAR @@ -274,6 +363,10 @@ segments: end: 56 type: string required: false + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -282,6 +375,11 @@ segments: end: 58 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -290,7 +388,11 @@ segments: end: 65 type: integer required: false - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 3, Month B - fields: - name: RPT_MONTH_YEAR @@ -311,6 +413,10 @@ segments: end: 56 type: string required: false + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -319,6 +425,11 @@ segments: end: 58 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -327,7 +438,11 @@ segments: end: 72 type: integer required: false - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 3, Month C - fields: - name: RPT_MONTH_YEAR @@ -348,6 +463,10 @@ segments: end: 56 type: string required: false + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -356,6 +475,11 @@ segments: end: 58 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -364,7 +488,11 @@ segments: end: 79 type: integer required: false - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 4, Month A - fields: - name: RPT_MONTH_YEAR @@ -385,6 +513,10 @@ segments: end: 80 type: string required: false + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -393,6 +525,11 @@ segments: end: 82 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -401,7 +538,11 @@ segments: end: 89 type: integer required: false - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 4, Month B - fields: - name: RPT_MONTH_YEAR @@ -422,6 +563,10 @@ segments: end: 80 type: string required: false + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -430,6 +575,11 @@ segments: end: 82 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -438,7 +588,11 @@ segments: end: 96 type: integer required: false - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 4, Month C - fields: - name: RPT_MONTH_YEAR @@ -459,6 +613,10 @@ segments: end: 80 type: string required: false + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -467,6 +625,11 @@ segments: end: 82 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -475,7 +638,11 @@ segments: end: 103 type: integer required: false - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 5, Month A - fields: - name: RPT_MONTH_YEAR @@ -496,6 +663,10 @@ segments: end: 104 type: string required: false + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -504,6 +675,11 @@ segments: end: 106 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -512,7 +688,11 @@ segments: end: 113 type: integer required: false - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 5, Month B - fields: - name: RPT_MONTH_YEAR @@ -533,6 +713,10 @@ segments: end: 104 type: string required: false + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -541,6 +725,11 @@ segments: end: 106 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -549,7 +738,11 @@ segments: end: 120 type: integer required: false - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 5, Month C - fields: - name: RPT_MONTH_YEAR @@ -570,6 +763,10 @@ segments: end: 104 type: string required: false + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -578,6 +775,11 @@ segments: end: 106 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -586,7 +788,11 @@ segments: end: 127 type: integer required: false - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 6, Month A - fields: - name: RPT_MONTH_YEAR @@ -607,6 +813,10 @@ segments: end: 128 type: string required: false + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -615,6 +825,11 @@ segments: end: 130 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -623,7 +838,11 @@ segments: end: 137 type: integer required: false - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 6, Month B - fields: - name: RPT_MONTH_YEAR @@ -644,6 +863,10 @@ segments: end: 128 type: string required: false + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -652,6 +875,11 @@ segments: end: 130 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -660,7 +888,11 @@ segments: end: 144 type: integer required: false - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 6, Month C - fields: - name: RPT_MONTH_YEAR @@ -681,6 +913,10 @@ segments: end: 128 type: string required: false + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -689,6 +925,11 @@ segments: end: 130 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -697,7 +938,11 @@ segments: end: 151 type: integer required: false - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 7, Month A - fields: - name: RPT_MONTH_YEAR @@ -718,6 +963,10 @@ segments: end: 152 type: string required: false + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -726,6 +975,11 @@ segments: end: 154 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -734,7 +988,11 @@ segments: end: 161 type: integer required: false - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 7, Month B - fields: - name: RPT_MONTH_YEAR @@ -755,6 +1013,10 @@ segments: end: 152 type: string required: false + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -763,6 +1025,11 @@ segments: end: 154 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -771,7 +1038,11 @@ segments: end: 168 type: integer required: false - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 7, Month C - fields: - name: RPT_MONTH_YEAR @@ -792,6 +1063,10 @@ segments: end: 152 type: string required: false + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -800,6 +1075,11 @@ segments: end: 154 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -808,7 +1088,11 @@ segments: end: 175 type: integer required: false - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 8, Month A - fields: - name: RPT_MONTH_YEAR @@ -829,6 +1113,10 @@ segments: end: 176 type: string required: false + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -837,6 +1125,11 @@ segments: end: 178 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -845,7 +1138,11 @@ segments: end: 185 type: integer required: false - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 8, Month B - fields: - name: RPT_MONTH_YEAR @@ -866,6 +1163,10 @@ segments: end: 176 type: string required: false + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -874,6 +1175,11 @@ segments: end: 178 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -882,7 +1188,11 @@ segments: end: 192 type: integer required: false - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 8, Month C - fields: - name: RPT_MONTH_YEAR @@ -903,6 +1213,10 @@ segments: end: 176 type: string required: false + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -911,6 +1225,11 @@ segments: end: 178 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -919,7 +1238,11 @@ segments: end: 199 type: integer required: false - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 9, Month A - fields: - name: RPT_MONTH_YEAR @@ -940,6 +1263,10 @@ segments: end: 200 type: string required: false + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -948,6 +1275,11 @@ segments: end: 202 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -956,7 +1288,11 @@ segments: end: 209 type: integer required: false - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 9, Month B - fields: - name: RPT_MONTH_YEAR @@ -977,6 +1313,10 @@ segments: end: 200 type: string required: false + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -985,6 +1325,11 @@ segments: end: 202 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -993,7 +1338,11 @@ segments: end: 216 type: integer required: false - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 9, Month C - fields: - name: RPT_MONTH_YEAR @@ -1014,6 +1363,10 @@ segments: end: 200 type: string required: false + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -1022,6 +1375,11 @@ segments: end: 202 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -1030,7 +1388,11 @@ segments: end: 223 type: integer required: false - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 10, Month A - fields: - name: RPT_MONTH_YEAR @@ -1051,6 +1413,10 @@ segments: end: 224 type: string required: false + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -1059,6 +1425,11 @@ segments: end: 226 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -1067,7 +1438,11 @@ segments: end: 233 type: integer required: false - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 10, Month B - fields: - name: RPT_MONTH_YEAR @@ -1088,6 +1463,10 @@ segments: end: 224 type: string required: false + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -1096,6 +1475,11 @@ segments: end: 226 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -1104,7 +1488,11 @@ segments: end: 240 type: integer required: false - + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 # Stratum 10, Month C - fields: - name: RPT_MONTH_YEAR @@ -1125,6 +1513,10 @@ segments: end: 224 type: string required: false + field_validators: + - id: in_values + params: + values: ["1", "2"] - name: STRATUM friendly_name: Stratum @@ -1133,6 +1525,11 @@ segments: end: 226 type: string required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 99 - name: FAMILIES_MONTH friendly_name: Number of Families @@ -1141,3 +1538,8 @@ segments: end: 247 type: integer required: false + field_validators: + - id: in_range_int + params: + min: 0 + max: 9999999 diff --git a/tdrs-services/parser/config/validation/validators.yaml b/tdrs-services/parser/config/validation/validators.yaml index f5fc925559..88c578f1b8 100644 --- a/tdrs-services/parser/config/validation/validators.yaml +++ b/tdrs-services/parser/config/validation/validators.yaml @@ -105,7 +105,7 @@ field_validators: # Education level validator - 0-16 or 98-99 - id: education_level - expr: "(int(Value) >= 0 and int(Value) <= 16) or (int(Value) >= 98 and int(Value) <= 99)" + expr: "(int(trim(string(Value))) >= 0 and int(trim(string(Value))) <= 16) or (int(trim(string(Value))) >= 98 and int(trim(string(Value))) <= 99)" message: "{{.RecordType}} Item {{.Item}}: must be 0-16 or 98-99, got {{.Value}}" # Work eligible indicator validator - 0-9 or 11, 12 @@ -128,6 +128,11 @@ field_validators: expr: "quarter(Value) >= 1 and quarter(Value) <= 4" message: "{{.RecordType}} Item {{.Item}}: quarter must be 1-4" + # Calendar quarter validator - checks YYYYQ quarter digit is 1-4 + - id: calendar_quarter_is_valid + expr: "len(string(Value)) == 5 and int(string(Value)[4:5]) >= 1 and int(string(Value)[4:5]) <= 4" + message: "{{.RecordType}} Item {{.Item}}: quarter must be 1-4" + # Closure reason validator - 01-19 or 99 - id: closure_reason expr: "(int(Value) >= 1 and int(Value) <= 19) or int(Value) == 99" @@ -136,9 +141,20 @@ field_validators: # Education level validator (no zero) - 1-16 or 98-99 # Used by M2, M3 where 0 is not a valid education level - id: education_level_no_zero - expr: "(int(Value) >= 1 and int(Value) <= 16) or (int(Value) >= 98 and int(Value) <= 99)" + expr: "(int(trim(string(Value))) >= 1 and int(trim(string(Value))) <= 16) or (int(trim(string(Value))) >= 98 and int(trim(string(Value))) <= 99)" message: "{{.RecordType}} Item {{.Item}}: must be 1-16 or 98-99, got {{.Value}}" + # FRA SSN validator - matches the Python FRA validator set. + - id: fra_ssn + expr: "isNumeric(string(Value)) and len(string(Value)) == 9 and string(Value)[0:3] not in ['000', '666'] and string(Value)[3:5] != '00' and string(Value)[5:9] != '0000'" + error_type: CASE_CONSISTENCY + message: "{{.RecordType}} Item {{.Item}}: Social Security Number is not valid." + + # Tribal closure reason validator - 01-18 or 99. + - id: tribal_closure_reason + expr: "(int(trim(string(Value))) >= 1 and int(trim(string(Value))) <= 18) or int(trim(string(Value))) == 99" + message: "{{.RecordType}} Item {{.Item}}: must be 01-18 or 99, got {{.Value}}" + # ============================================================================= # Header field validators # ============================================================================= @@ -221,6 +237,18 @@ record_validators: error_type: RECORD_PRE_CHECK fields: [RPT_MONTH_YEAR] + - id: calendar_quarter_is_valid + expr: | + let calendar_quarter = GetString('CALENDAR_QUARTER'); + len(calendar_quarter) == 5 + and isNumeric(calendar_quarter) + and int(calendar_quarter[0:4]) >= 2020 + and int(calendar_quarter[4:5]) > 0 + and int(calendar_quarter[4:5]) < 5 + error_type: RECORD_PRE_CHECK + fields: [CALENDAR_QUARTER] + message: "{{.RecordType}}: {{.Values.CALENDAR_QUARTER}} is invalid. Calendar Quarter must be a numeric representing the Calendar Year and Quarter formatted as YYYYQ" + - id: rpt_month_year_matches_header_year_quarter expr: > year(GetString('RPT_MONTH_YEAR')) == fiscalToCalendarYear(DataFileContext.FiscalYear, DataFileContext.FiscalQuarter) @@ -229,6 +257,14 @@ record_validators: fields: [RPT_MONTH_YEAR] message: "{{.RecordType}}: Reporting month year {{.Values.RPT_MONTH_YEAR}} does not match file reporting year:{{.DataFileContext.FiscalYear}}, quarter:{{.DataFileContext.FiscalQuarter}}." + - id: exit_date_matches_fiscal_period + expr: > + year(GetString('EXIT_DATE')) == fiscalToCalendarYear(DataFileContext.FiscalYear, DataFileContext.FiscalQuarter) + and string(quarter(GetString('EXIT_DATE'))) == fiscalToCalendarQuarter(DataFileContext.FiscalYear, DataFileContext.FiscalQuarter) + error_type: CASE_CONSISTENCY + fields: [EXIT_DATE] + message: "Exit date ({{.Values.EXIT_DATE}}) is not valid. Date must be in the range of {{.DataFileContext.FiscalYear}}." + # --- VALUE_CONSISTENCY validators (allow record serialization) --- # Amount requires positive field validator @@ -311,6 +347,12 @@ record_validators: message: "If FAMILY_AFFILIATION is 1-2, WORK_PART_STATUS must be 01, 02, 05, 07, 09, 15, 17, 18, 19, or 99" fields: [FAMILY_AFFILIATION, WORK_PART_STATUS] + # Tribal TANF allows WORK_PART_STATUS 01-03, 05-09, 11-19, or 99. + - id: tribal_t2_family_affil_1_2_work_part_status + expr: "if GetInt('FAMILY_AFFILIATION') in [1, 2] { (GetInt('WORK_PART_STATUS') in 1..3) or (GetInt('WORK_PART_STATUS') in 5..9) or (GetInt('WORK_PART_STATUS') in 11..19) or GetString('WORK_PART_STATUS') == '99' } else { true }" + message: "If FAMILY_AFFILIATION is 1-2, WORK_PART_STATUS must be 01-03, 05-09, 11-19, or 99" + fields: [FAMILY_AFFILIATION, WORK_PART_STATUS] + # When WORK_ELIGIBLE_INDICATOR is 1-5, WORK_PART_STATUS must not be 99 - id: t2_work_eligible_1_5_work_part_not_99 expr: "if GetInt('WORK_ELIGIBLE_INDICATOR') in 1..5 { GetString('WORK_PART_STATUS') != '99' } else { true }" @@ -516,17 +558,21 @@ group_validators: # Record type must have at least one related record in the same case. # Params: record_type (string), related_record_types (array of strings) - id: requires_related_record - expr: "RecordCounts[Params.record_type] == 0 or hasAnyRecordType(RecordCounts, Params.related_record_types)" + expr: | + filter(Group.Records, { + .GetRecordType() == Params.record_type and not any(Params.related_record_types, { RecordCounts[#] > 0 }) + }) message: "When {{.Params.record_type}} is present, at least one related record type must also be present" + result_mode: per_record # Record type must have at least one related record with a specific int field value. # Params: record_type (string), related_record_types (array of strings), field_name (string), expected_value (int) - id: requires_related_record_with_int_value - expr: "RecordCounts[Params.record_type] == 0 or anyRecordOfTypesHasInt(Group, Params.related_record_types, Params.field_name, Params.expected_value)" + expr: | + filter(Group.Records, { + .GetRecordType() == Params.record_type and not any(Group.Records, { + .GetRecordType() in Params.related_record_types and .GetInt(Params.field_name) == Params.expected_value + }) + }) message: "When {{.Params.record_type}} is present, at least one related record must have {{.Params.field_name}} == {{.Params.expected_value}}" - - # Record type requires a corresponding record type in the same case. - # Params: record_type (string), required_record_type (string) - - id: requires_corresponding_record - expr: "RecordCounts[Params.record_type] == 0 or RecordCounts[Params.required_record_type] >= 1" - message: "When {{.Params.record_type}} is present, {{.Params.required_record_type}} must also be present" + result_mode: per_record diff --git a/tdrs-services/parser/internal/config/db_metadata_test.go b/tdrs-services/parser/internal/config/db_metadata_test.go index 3b9dd4c861..a62d5cbf46 100644 --- a/tdrs-services/parser/internal/config/db_metadata_test.go +++ b/tdrs-services/parser/internal/config/db_metadata_test.go @@ -165,7 +165,7 @@ func TestRealConfig_MetadataColumnCounts(t *testing.T) { "tanf/t7": 2 + 4 + 3, // 9 "ssp/m1": 3 + 39 + 3, // 46 "ssp/m2": 4 + 63 + 3, // 70 - "ssp/m3": 4 + 18 + 3, // 25 + "ssp/m3": 3 + 18 + 3, // 24 "ssp/m4": 3 + 9 + 3, // 15 "ssp/m5": 3 + 24 + 3, // 30 "ssp/m6": 2 + 11 + 3, // 16 diff --git a/tdrs-services/parser/internal/config/filespec/types.go b/tdrs-services/parser/internal/config/filespec/types.go index 972723c53c..3da5a6556e 100644 --- a/tdrs-services/parser/internal/config/filespec/types.go +++ b/tdrs-services/parser/internal/config/filespec/types.go @@ -134,23 +134,32 @@ func (c *AccumulatorConfig) EffectiveBatchSize() int { // HasKeyFields returns true if key-based grouping is configured. func (c *AccumulatorConfig) HasKeyFields() bool { - return c.KeyFields != nil && (c.KeyFields.RptMonthYear.End > 0 || c.KeyFields.CaseNumber.End > 0) + return c.KeyFields != nil && len(c.KeyFields.Fields) > 0 } -// KeyFieldsConfig defines byte positions for extracting the grouping key. +// KeyFieldsConfig defines positions for extracting the grouping key. type KeyFieldsConfig struct { - // RptMonthYear is the position of the reporting month/year field - RptMonthYear PositionDef `yaml:"rpt_month_year"` + // Fields is the ordered list of grouping key fields. + Fields []KeyFieldDef `yaml:"fields"` +} + +func (c *KeyFieldsConfig) OrderedFields() []KeyFieldDef { + if c == nil { + return nil + } + return c.Fields +} - // CaseNumber is the position of the case number field - CaseNumber PositionDef `yaml:"case_number"` +type KeyFieldDef struct { + Name string `yaml:"name"` + PositionDef `yaml:",inline"` } -// PositionDef defines a byte range within a line. +// PositionDef defines a byte range for positional files or a column index for columnar files. type PositionDef struct { - // Start is the starting byte position (0-indexed, inclusive) + // Start is the starting byte position or column index (0-indexed, inclusive). Start int `yaml:"start"` - // End is the ending byte position (0-indexed, exclusive) + // End is the ending byte position or the next column index (0-indexed, exclusive). End int `yaml:"end"` } diff --git a/tdrs-services/parser/internal/config/registry_test.go b/tdrs-services/parser/internal/config/registry_test.go index 45a4ec7672..f5d252e14e 100644 --- a/tdrs-services/parser/internal/config/registry_test.go +++ b/tdrs-services/parser/internal/config/registry_test.go @@ -167,7 +167,7 @@ func TestConfig_SchemaFieldCounts(t *testing.T) { "tanf/t7": {2, 30, 4}, "ssp/m1": {3, 1, 39}, "ssp/m2": {4, 1, 63}, - "ssp/m3": {4, 2, 18}, + "ssp/m3": {3, 2, 18}, "ssp/m4": {3, 1, 9}, "ssp/m5": {3, 1, 24}, "ssp/m6": {2, 3, 11}, @@ -457,6 +457,83 @@ func TestConfig_FileSpecDetectionMethods(t *testing.T) { } } +func TestConfig_FRAAndTribalFileSpecValidationOrchestrators(t *testing.T) { + reg := loadRegistry(t) + + tests := []struct { + program string + section int + errorTypeByID map[int]string + }{ + { + program: "FRA", + section: 1, + errorTypeByID: map[int]string{ + 1: "CASE_CONSISTENCY", + 2: "CASE_CONSISTENCY", + 3: "CASE_CONSISTENCY", + 4: "CASE_CONSISTENCY", + }, + }, + { + program: "TRIBAL", + section: 1, + errorTypeByID: map[int]string{ + 1: "RECORD_PRE_CHECK", + 2: "FIELD_VALUE", + 3: "VALUE_CONSISTENCY", + 4: "CASE_CONSISTENCY", + }, + }, + { + program: "TRIBAL", + section: 2, + errorTypeByID: map[int]string{ + 1: "RECORD_PRE_CHECK", + 2: "FIELD_VALUE", + 3: "VALUE_CONSISTENCY", + 4: "CASE_CONSISTENCY", + }, + }, + { + program: "TRIBAL", + section: 3, + errorTypeByID: map[int]string{ + 1: "RECORD_PRE_CHECK", + 2: "FIELD_VALUE", + 3: "VALUE_CONSISTENCY", + 4: "CASE_CONSISTENCY", + }, + }, + { + program: "TRIBAL", + section: 4, + errorTypeByID: map[int]string{ + 1: "RECORD_PRE_CHECK", + 2: "FIELD_VALUE", + 3: "VALUE_CONSISTENCY", + 4: "CASE_CONSISTENCY", + }, + }, + } + + for _, tc := range tests { + spec := reg.GetFileSpec(tc.program, tc.section) + if spec == nil { + t.Fatalf("missing filespec %s:%d", tc.program, tc.section) + } + if len(spec.ValidationOrchestrator.Categories) != len(tc.errorTypeByID) { + t.Fatalf("%s:%d categories = %d, want %d", tc.program, tc.section, len(spec.ValidationOrchestrator.Categories), len(tc.errorTypeByID)) + } + for _, category := range spec.ValidationOrchestrator.Categories { + want := tc.errorTypeByID[category.ID] + if category.DefaultErrorType != want { + t.Errorf("%s:%d category %d default_error_type = %s, want %s", tc.program, tc.section, category.ID, category.DefaultErrorType, want) + } + } + } +} + func TestConfig_FileSpecFormats(t *testing.T) { reg := loadRegistry(t) @@ -477,12 +554,13 @@ func TestConfig_FileSpecFormats(t *testing.T) { func TestConfig_FileSpecAccumulatorKeyFields(t *testing.T) { reg := loadRegistry(t) - // Sections 1 and 2 have key_fields for case grouping; sections 3, 4, and FRA do not + // Sections 1 and 2 group case records; sections 3 and 4 group aggregate + // records by record type for duplicate checks. FRA groups by EXIT_DATE + SSN. expectKeyFields := map[string]bool{ - "TAN:1": true, "TAN:2": true, "TAN:3": false, "TAN:4": false, - "SSP:1": true, "SSP:2": true, "SSP:3": false, "SSP:4": false, - "TRIBAL:1": true, "TRIBAL:2": true, "TRIBAL:3": false, "TRIBAL:4": false, - "FRA:1": false, + "TAN:1": true, "TAN:2": true, "TAN:3": true, "TAN:4": true, + "SSP:1": true, "SSP:2": true, "SSP:3": true, "SSP:4": true, + "TRIBAL:1": true, "TRIBAL:2": true, "TRIBAL:3": true, "TRIBAL:4": true, + "FRA:1": true, } for key, wantKF := range expectKeyFields { @@ -501,15 +579,21 @@ func TestConfig_FileSpecAccumulatorKeyFields(t *testing.T) { func TestConfig_FileSpecGroupedSchemas(t *testing.T) { reg := loadRegistry(t) - // Only section 1 and 2 filespecs should have grouped schemas - // These must exclude header/trailer and include only the record schemas + // Grouped schemas must exclude header/trailer and include only the record schemas. expectedGrouped := map[string][]string{ "TAN:1": {"tanf/t1", "tanf/t2", "tanf/t3"}, "TAN:2": {"tanf/t4", "tanf/t5"}, + "TAN:3": {"tanf/t6"}, + "TAN:4": {"tanf/t7"}, "SSP:1": {"ssp/m1", "ssp/m2", "ssp/m3"}, "SSP:2": {"ssp/m4", "ssp/m5"}, + "SSP:3": {"ssp/m6"}, + "SSP:4": {"ssp/m7"}, "TRIBAL:1": {"tribal_tanf/t1", "tribal_tanf/t2", "tribal_tanf/t3"}, "TRIBAL:2": {"tribal_tanf/t4", "tribal_tanf/t5"}, + "TRIBAL:3": {"tribal_tanf/t6"}, + "TRIBAL:4": {"tribal_tanf/t7"}, + "FRA:1": {"fra/te1"}, } for key, want := range expectedGrouped { @@ -534,17 +618,6 @@ func TestConfig_FileSpecGroupedSchemas(t *testing.T) { } } - // Sections 3, 4, FRA should have no grouped schemas - noGrouped := []string{"TAN:3", "TAN:4", "SSP:3", "SSP:4", "TRIBAL:3", "TRIBAL:4", "FRA:1"} - for _, key := range noGrouped { - spec := reg.FileSpecs()[key] - if spec == nil { - continue - } - if len(spec.Accumulator.GroupedSchemas) != 0 { - t.Errorf("filespec %s: expected no grouped schemas, got %v", key, spec.Accumulator.GroupedSchemas) - } - } } // --------------------------------------------------------------------------- diff --git a/tdrs-services/parser/internal/decoder/csv.go b/tdrs-services/parser/internal/decoder/csv.go index d0a9fd77da..0e5e4b19d7 100644 --- a/tdrs-services/parser/internal/decoder/csv.go +++ b/tdrs-services/parser/internal/decoder/csv.go @@ -1,6 +1,7 @@ package decoder import ( + "bufio" "encoding/csv" "io" "iter" @@ -13,23 +14,29 @@ import ( // Each row becomes a ColumnarRow with string values. type CSVDecoder struct { Sortable - reader *csv.Reader - closer io.Closer - lineNum int + rawReader *bufio.Reader + reader *csv.Reader + closer io.Closer + lineNum int // recordType is the fixed record type for this file. // For FRA files, this is always "TE1". recordType string + + firstRead bool + firstRow Row } // NewCSVDecoder creates a decoder for CSV files. func NewCSVDecoder(r io.ReadCloser, recordType string) *CSVDecoder { - csvReader := csv.NewReader(r) + rawReader := bufio.NewReader(r) + csvReader := csv.NewReader(rawReader) // Configure CSV reader csvReader.FieldsPerRecord = -1 // Allow variable number of fields csvReader.TrimLeadingSpace = true return &CSVDecoder{ + rawReader: rawReader, reader: csvReader, closer: r, lineNum: 0, @@ -41,10 +48,24 @@ func (d *CSVDecoder) Format() filespec.Format { return filespec.FormatColumnar } -// ReadFirst returns nil for columnar files. -// CSV/columnar files don't have a header record in the data stream. +// ReadFirst returns the first physical row and buffers it for Rows. +// CSV/columnar files don't have a header record, but FRA uses this row for +// file-level sanity checks before normal record processing. func (d *CSVDecoder) ReadFirst() (Row, error) { - return nil, nil + if d.firstRead { + return d.firstRow, nil + } + d.firstRead = true + + row, err := d.readNextRow(false) + if err == io.EOF { + return nil, nil + } + if err != nil { + return nil, err + } + d.firstRow = row + return row, nil } func (d *CSVDecoder) Close() error { @@ -55,22 +76,39 @@ func (d *CSVDecoder) Close() error { } // Sort reads all rows, sorts them by key, and makes subsequent Rows() calls return sorted output. -func (d *CSVDecoder) Sort(detector *RecordTypeDetector, keyExtractor KeyExtractor, groupedSchemas []string) error { - return d.Sortable.DoSort(d.unsortedRows(), detector, keyExtractor, groupedSchemas) +func (d *CSVDecoder) Sort(detector *RecordTypeDetector, keyFields []filespec.KeyFieldDef, groupedSchemas []string) error { + return d.Sortable.DoSort(d.rowsWithBufferedFirst(), detector, keyFields, groupedSchemas) } func (d *CSVDecoder) Rows() iter.Seq2[Row, error] { if d.IsSorted() { return d.SortedRows() } - return d.unsortedRows() + return d.rowsWithBufferedFirst() +} + +func (d *CSVDecoder) rowsWithBufferedFirst() iter.Seq2[Row, error] { + return func(yield func(Row, error) bool) { + if d.firstRow != nil { + row := d.firstRow + d.firstRow = nil + if !yield(row, nil) { + return + } + } + + for row, err := range d.unsortedRows() { + if !yield(row, err) { + return + } + } + } } func (d *CSVDecoder) unsortedRows() iter.Seq2[Row, error] { return func(yield func(Row, error) bool) { for { - // Read a row from the CSV - record, err := d.reader.Read() + row, err := d.readNextRow(true) if err == io.EOF { return } @@ -79,25 +117,76 @@ func (d *CSVDecoder) unsortedRows() iter.Seq2[Row, error] { return } - d.lineNum++ - - // Skip empty rows or comment rows - if len(record) == 0 || (len(record) > 0 && strings.HasPrefix(record[0], "#")) { - continue + if !yield(row, nil) { + return } + } + } +} - // Convert []string to []any for consistent interface - columns := make([]any, len(record)) - for i, v := range record { - columns[i] = v +func (d *CSVDecoder) readNextRow(skipSkippable bool) (Row, error) { + for { + if !skipSkippable { + // csv.Reader skips blank physical lines; ReadFirst needs to expose one + // so FRA can reject files whose first row is empty. + blank, err := d.consumeLeadingBlankLine() + if err != nil { + return nil, err } + if blank { + return NewColumnarRow(d.lineNum, d.recordType, 0, []any{}), nil + } + } - // Create the row - row := NewColumnarRow(d.lineNum, d.recordType, len(columns), columns) + record, err := d.reader.Read() + if err != nil { + return nil, err + } + d.lineNum++ - if !yield(row, nil) { - return + // Normal row iteration keeps the historical behavior of ignoring + // comments and empty rows. ReadFirst does not, because FRA validates + // the first physical row before streaming records. + if skipSkippable && (len(record) == 0 || strings.HasPrefix(record[0], "#")) { + continue + } + + columns := make([]any, len(record)) + for i, v := range record { + columns[i] = v + } + + return NewColumnarRow(d.lineNum, d.recordType, len(columns), columns), nil + } +} + +func (d *CSVDecoder) consumeLeadingBlankLine() (bool, error) { + b, err := d.rawReader.Peek(1) + if err == io.EOF { + return false, nil + } + if err != nil { + return false, err + } + + switch b[0] { + case '\n': + if _, err := d.rawReader.ReadByte(); err != nil { + return false, err + } + case '\r': + if _, err := d.rawReader.ReadByte(); err != nil { + return false, err + } + if next, err := d.rawReader.Peek(1); err == nil && next[0] == '\n' { + if _, err := d.rawReader.ReadByte(); err != nil { + return false, err } } + default: + return false, nil } + + d.lineNum++ + return true, nil } diff --git a/tdrs-services/parser/internal/decoder/csv_test.go b/tdrs-services/parser/internal/decoder/csv_test.go index 3f2f190d5d..81878ba799 100644 --- a/tdrs-services/parser/internal/decoder/csv_test.go +++ b/tdrs-services/parser/internal/decoder/csv_test.go @@ -18,16 +18,34 @@ func TestCSVDecoder_Format(t *testing.T) { } } -func TestCSVDecoder_ReadFirst_ReturnsNil(t *testing.T) { - dec := newTestCSVDecoder("a,b,c\n", "TE1") +func TestCSVDecoder_ReadFirst_BuffersFirstRow(t *testing.T) { + dec := newTestCSVDecoder("a,b,c\nd,e,f\n", "TE1") defer dec.Close() row, err := dec.ReadFirst() if err != nil { t.Fatalf("ReadFirst() error: %v", err) } - if row != nil { - t.Errorf("ReadFirst() = %v, want nil for CSV decoder", row) + cr := row.(*ColumnarRow) + if cr.ColumnCount() != 3 { + t.Fatalf("ReadFirst() ColumnCount() = %d, want 3", cr.ColumnCount()) + } + if got := cr.Column(0); got != "a" { + t.Errorf("ReadFirst() Column(0) = %v, want %q", got, "a") + } + + var rows []Row + for row, err := range dec.Rows() { + if err != nil { + t.Fatalf("Rows() error: %v", err) + } + rows = append(rows, row) + } + if len(rows) != 2 { + t.Fatalf("Rows() got %d rows, want 2", len(rows)) + } + if got := rows[0].(*ColumnarRow).Column(0); got != "a" { + t.Errorf("Rows() first row Column(0) = %v, want buffered first row", got) } } diff --git a/tdrs-services/parser/internal/decoder/decoder.go b/tdrs-services/parser/internal/decoder/decoder.go index 906fc276f7..7264146dcb 100644 --- a/tdrs-services/parser/internal/decoder/decoder.go +++ b/tdrs-services/parser/internal/decoder/decoder.go @@ -28,7 +28,7 @@ type Decoder interface { // After calling Sort, subsequent calls to Rows() return sorted rows followed // by unkeyed rows. Header/trailer rows are separated out. // Must be called after ReadFirst() and before Rows(). - Sort(detector *RecordTypeDetector, keyExtractor KeyExtractor, groupedSchemas []string) error + Sort(detector *RecordTypeDetector, keyFields []filespec.KeyFieldDef, groupedSchemas []string) error // Close releases any resources held by the decoder. Close() error diff --git a/tdrs-services/parser/internal/decoder/factory.go b/tdrs-services/parser/internal/decoder/factory.go index 2be4d9fb35..8a8055bc13 100644 --- a/tdrs-services/parser/internal/decoder/factory.go +++ b/tdrs-services/parser/internal/decoder/factory.go @@ -1,12 +1,15 @@ package decoder import ( + "bytes" "fmt" "io" "net/http" "os" + "unicode/utf8" "go-parser/internal/config/filespec" + "go-parser/internal/sentinel" ) // CreateDecoder creates the appropriate decoder based on file format and content type. @@ -19,17 +22,18 @@ func CreateDecoder(file *os.File, spec *filespec.FileSpec) (Decoder, error) { case filespec.FormatColumnar: return createColumnarDecoder(file, spec) default: - return nil, fmt.Errorf("unknown format: %s", spec.Format) + return nil, fmt.Errorf("%w: unknown format %q", sentinel.ErrDecoderUnknown, spec.Format) } } // createColumnarDecoder determines whether the file is CSV or XLSX based on MIME type. func createColumnarDecoder(file *os.File, spec *filespec.FileSpec) (Decoder, error) { buf := make([]byte, 512) - _, err := file.Read(buf) + n, err := file.Read(buf) if err != nil && err != io.EOF { return nil, err } + sample := buf[:n] // Rewind the file pointer for later reading if _, err := file.Seek(0, io.SeekStart); err != nil { @@ -45,7 +49,16 @@ func createColumnarDecoder(file *os.File, spec *filespec.FileSpec) (Decoder, err return NewXLSXDecoder(file.Name(), spec.RecordTypeDetection.Schema) case "text/plain; charset=utf-8", "text/csv; charset=utf-8": return NewCSVDecoder(file, spec.RecordTypeDetection.Schema), nil + case "application/octet-stream": + if isBinaryContent(sample) { + return nil, fmt.Errorf("%w: %s has binary content", sentinel.ErrDecoderUnknown, file.Name()) + } + return NewCSVDecoder(file, spec.RecordTypeDetection.Schema), nil default: - return nil, fmt.Errorf("%s has an unknown or unexpected content type: %s", file.Name(), contentType) + return nil, fmt.Errorf("%w: %s has an unknown or unexpected content type: %s", sentinel.ErrDecoderUnknown, file.Name(), contentType) } } + +func isBinaryContent(sample []byte) bool { + return bytes.Contains(sample, []byte{0}) || !utf8.Valid(sample) +} diff --git a/tdrs-services/parser/internal/decoder/factory_test.go b/tdrs-services/parser/internal/decoder/factory_test.go index b22e6cf766..9a43dd9da5 100644 --- a/tdrs-services/parser/internal/decoder/factory_test.go +++ b/tdrs-services/parser/internal/decoder/factory_test.go @@ -1,10 +1,12 @@ package decoder import ( + "errors" "os" "testing" "go-parser/internal/config/filespec" + "go-parser/internal/sentinel" ) func TestCreateDecoder_Positional(t *testing.T) { @@ -102,6 +104,39 @@ func TestCreateDecoder_UnknownFormat(t *testing.T) { if err == nil { t.Fatal("expected error for unknown format") } + if !errors.Is(err, sentinel.ErrDecoderUnknown) { + t.Fatalf("expected ErrDecoderUnknown, got %v", err) + } +} + +func TestCreateColumnarDecoder_UnknownContentTypeReturnsSentinel(t *testing.T) { + tmpFile, err := os.CreateTemp("", "goparser-pdf-*.xlsx") + if err != nil { + t.Fatalf("Failed to create temp file: %v", err) + } + defer os.Remove(tmpFile.Name()) + + if _, err := tmpFile.WriteString("%PDF-1.4\n"); err != nil { + t.Fatalf("Failed to write temp file: %v", err) + } + if _, err := tmpFile.Seek(0, 0); err != nil { + t.Fatalf("Failed to seek: %v", err) + } + + spec := &filespec.FileSpec{ + Format: filespec.FormatColumnar, + RecordTypeDetection: filespec.RecordTypeDetection{ + Schema: "test", + }, + } + + _, err = CreateDecoder(tmpFile, spec) + if err == nil { + t.Fatal("expected error for unknown content type") + } + if !errors.Is(err, sentinel.ErrDecoderUnknown) { + t.Fatalf("expected ErrDecoderUnknown, got %v", err) + } } func TestCreateColumnarDecoder_BinaryContent(t *testing.T) { diff --git a/tdrs-services/parser/internal/decoder/key_extractor.go b/tdrs-services/parser/internal/decoder/key_extractor.go deleted file mode 100644 index db49935109..0000000000 --- a/tdrs-services/parser/internal/decoder/key_extractor.go +++ /dev/null @@ -1,85 +0,0 @@ -package decoder - -import ( - "fmt" - - "go-parser/internal/config/filespec" -) - -// KeyExtractor abstracts key extraction across file formats. -type KeyExtractor interface { - // ExtractKey returns the composite sort key for the given row. - // Returns an error if the row is too short or otherwise cannot produce a key. - ExtractKey(row Row) (string, error) -} - -// PositionalKeyExtractor extracts keys from fixed-width positional rows -// using the same byte positions as accumulator key_fields. -type PositionalKeyExtractor struct { - RptMonthYear filespec.PositionDef - CaseNumber filespec.PositionDef -} - -// ExtractKey extracts the composite key from a positional row. -func (e *PositionalKeyExtractor) ExtractKey(row Row) (string, error) { - pr, ok := row.(*PositionalRow) - if !ok { - return "", fmt.Errorf("positional key extraction requires PositionalRow, got %T", row) - } - - data := pr.Data() - - minLen := e.CaseNumber.End - if e.RptMonthYear.End > minLen { - minLen = e.RptMonthYear.End - } - if len(data) < minLen { - return "", fmt.Errorf("line too short for key extraction: need %d bytes, got %d", minLen, len(data)) - } - - rptMonth := data[e.RptMonthYear.Start:e.RptMonthYear.End] - caseNum := data[e.CaseNumber.Start:e.CaseNumber.End] - - return rptMonth + "|" + caseNum, nil -} - -// ColumnarKeyExtractor extracts keys from CSV/XLSX rows by column index. -type ColumnarKeyExtractor struct { - KeyColumns []int -} - -// ExtractKey extracts the composite key from a columnar row. -func (e *ColumnarKeyExtractor) ExtractKey(row Row) (string, error) { - cr, ok := row.(*ColumnarRow) - if !ok { - return "", fmt.Errorf("columnar key extraction requires ColumnarRow, got %T", row) - } - - key := "" - for i, colIdx := range e.KeyColumns { - val := cr.Column(colIdx) - if val == nil { - return "", fmt.Errorf("column %d is empty or missing", colIdx) - } - if i > 0 { - key += "|" - } - key += fmt.Sprintf("%v", val) - } - - return key, nil -} - -// NewKeyExtractor creates the appropriate KeyExtractor based on file specification. -// Returns nil if the spec has no key fields configured. -func NewKeyExtractor(spec *filespec.FileSpec) KeyExtractor { - if !spec.Accumulator.HasKeyFields() { - return nil - } - - kf := spec.Accumulator.KeyFields - return &PositionalKeyExtractor{ - RptMonthYear: kf.RptMonthYear, - CaseNumber: kf.CaseNumber, - } -} diff --git a/tdrs-services/parser/internal/decoder/key_extractor_test.go b/tdrs-services/parser/internal/decoder/key_extractor_test.go index 4fa7a37a07..529cb2219f 100644 --- a/tdrs-services/parser/internal/decoder/key_extractor_test.go +++ b/tdrs-services/parser/internal/decoder/key_extractor_test.go @@ -6,14 +6,9 @@ import ( "go-parser/internal/config/filespec" ) -func TestPositionalKeyExtractor_ExtractKey(t *testing.T) { - extractor := &PositionalKeyExtractor{ - RptMonthYear: filespec.PositionDef{Start: 2, End: 8}, - CaseNumber: filespec.PositionDef{Start: 8, End: 19}, - } - +func TestPositionalRow_ExtractKey(t *testing.T) { row := NewPositionalRow(1, "T1", 30, "T1202401CASE001 rest-of-data") - key, err := extractor.ExtractKey(row) + key, err := row.ExtractKey(positionalKeyFields()) if err != nil { t.Fatalf("ExtractKey failed: %v", err) } @@ -22,39 +17,17 @@ func TestPositionalKeyExtractor_ExtractKey(t *testing.T) { } } -func TestPositionalKeyExtractor_TooShort(t *testing.T) { - extractor := &PositionalKeyExtractor{ - RptMonthYear: filespec.PositionDef{Start: 2, End: 8}, - CaseNumber: filespec.PositionDef{Start: 8, End: 19}, - } - +func TestPositionalRow_ExtractKeyTooShort(t *testing.T) { row := NewPositionalRow(1, "T1", 7, "T1short") - _, err := extractor.ExtractKey(row) + _, err := row.ExtractKey(positionalKeyFields()) if err == nil { t.Fatal("expected error for short row") } } -func TestPositionalKeyExtractor_WrongRowType(t *testing.T) { - extractor := &PositionalKeyExtractor{ - RptMonthYear: filespec.PositionDef{Start: 2, End: 8}, - CaseNumber: filespec.PositionDef{Start: 8, End: 19}, - } - - row := NewColumnarRow(1, "T1", 3, []any{"a", "b", "c"}) - _, err := extractor.ExtractKey(row) - if err == nil { - t.Fatal("expected error for columnar row") - } -} - -func TestColumnarKeyExtractor_ExtractKey(t *testing.T) { - extractor := &ColumnarKeyExtractor{ - KeyColumns: []int{0, 1}, - } - +func TestColumnarRow_ExtractKey(t *testing.T) { row := NewColumnarRow(1, "TE1", 3, []any{"202401", "CASE001", "other"}) - key, err := extractor.ExtractKey(row) + key, err := row.ExtractKey(columnarKeyFields()) if err != nil { t.Fatalf("ExtractKey failed: %v", err) } @@ -63,49 +36,27 @@ func TestColumnarKeyExtractor_ExtractKey(t *testing.T) { } } -func TestColumnarKeyExtractor_MissingColumn(t *testing.T) { - extractor := &ColumnarKeyExtractor{ - KeyColumns: []int{0, 5}, // column 5 doesn't exist - } - +func TestColumnarRow_ExtractKeyMissingColumn(t *testing.T) { row := NewColumnarRow(1, "TE1", 3, []any{"202401", "CASE001", "other"}) - _, err := extractor.ExtractKey(row) + _, err := row.ExtractKey([]filespec.KeyFieldDef{ + {Name: "exit_date", PositionDef: filespec.PositionDef{Start: 0, End: 1}}, + {Name: "ssn", PositionDef: filespec.PositionDef{Start: 5, End: 6}}, + }) if err == nil { t.Fatal("expected error for missing column") } } -func TestNewKeyExtractor_NoKeyFields(t *testing.T) { - spec := &filespec.FileSpec{ - Accumulator: filespec.AccumulatorConfig{}, - } - - ke := NewKeyExtractor(spec) - if ke != nil { - t.Error("expected nil KeyExtractor when no key fields configured") +func positionalKeyFields() []filespec.KeyFieldDef { + return []filespec.KeyFieldDef{ + {Name: "rpt_month_year", PositionDef: filespec.PositionDef{Start: 2, End: 8}}, + {Name: "case_number", PositionDef: filespec.PositionDef{Start: 8, End: 19}}, } } -func TestNewKeyExtractor_WithKeyFields(t *testing.T) { - spec := &filespec.FileSpec{ - Accumulator: filespec.AccumulatorConfig{ - KeyFields: &filespec.KeyFieldsConfig{ - RptMonthYear: filespec.PositionDef{Start: 2, End: 8}, - CaseNumber: filespec.PositionDef{Start: 8, End: 19}, - }, - }, - } - - ke := NewKeyExtractor(spec) - if ke == nil { - t.Fatal("expected non-nil KeyExtractor") - } - - pke, ok := ke.(*PositionalKeyExtractor) - if !ok { - t.Fatalf("expected *PositionalKeyExtractor, got %T", ke) - } - if pke.RptMonthYear.Start != 2 || pke.RptMonthYear.End != 8 { - t.Errorf("unexpected RptMonthYear positions: %+v", pke.RptMonthYear) +func columnarKeyFields() []filespec.KeyFieldDef { + return []filespec.KeyFieldDef{ + {Name: "exit_date", PositionDef: filespec.PositionDef{Start: 0, End: 1}}, + {Name: "ssn", PositionDef: filespec.PositionDef{Start: 1, End: 2}}, } } diff --git a/tdrs-services/parser/internal/decoder/positional.go b/tdrs-services/parser/internal/decoder/positional.go index d850f0c13d..03a11ae18a 100644 --- a/tdrs-services/parser/internal/decoder/positional.go +++ b/tdrs-services/parser/internal/decoder/positional.go @@ -83,8 +83,8 @@ func (d *PostitionalDecoder) Close() error { } // Sort reads all rows, sorts them by key, and makes subsequent Rows() calls return sorted output. -func (d *PostitionalDecoder) Sort(detector *RecordTypeDetector, keyExtractor KeyExtractor, groupedSchemas []string) error { - return d.Sortable.DoSort(d.unsortedRows(), detector, keyExtractor, groupedSchemas) +func (d *PostitionalDecoder) Sort(detector *RecordTypeDetector, keyFields []filespec.KeyFieldDef, groupedSchemas []string) error { + return d.Sortable.DoSort(d.unsortedRows(), detector, keyFields, groupedSchemas) } func (d *PostitionalDecoder) Rows() iter.Seq2[Row, error] { diff --git a/tdrs-services/parser/internal/decoder/row.go b/tdrs-services/parser/internal/decoder/row.go index e7782ddeea..c47f49687b 100644 --- a/tdrs-services/parser/internal/decoder/row.go +++ b/tdrs-services/parser/internal/decoder/row.go @@ -1,5 +1,12 @@ package decoder +import ( + "fmt" + "strings" + + "go-parser/internal/config/filespec" +) + // Row is the interface that all row types implement. // This allows the parser to work with any row format. type Row interface { @@ -14,6 +21,9 @@ type Row interface { // DecodedLength returns the length of the decoded row DecodedLength() int + + // ExtractKey returns the composite key for the configured key fields. + ExtractKey(fields []filespec.KeyFieldDef) (string, error) } // PositionalRow represents a row from a positional (fixed-width) file. @@ -55,6 +65,20 @@ func (r *PositionalRow) Data() string { return r.data } +func (r *PositionalRow) ExtractKey(fields []filespec.KeyFieldDef) (string, error) { + // Fields are assumed to be ordered by position + minLen := fields[len(fields)-1].End + if len(r.data) < minLen { + return "", fmt.Errorf("line too short for key extraction: need %d bytes, got %d", minLen, len(r.data)) + } + + parts := make([]string, 0, len(fields)) + for _, field := range fields { + parts = append(parts, r.data[field.Start:field.End]) + } + return strings.Join(parts, "|"), nil +} + // ColumnarRow represents a row from a columnar (CSV/XLSX) file. // The data is a slice of values, and fields are accessed by column index. type ColumnarRow struct { @@ -92,3 +116,15 @@ func (r *ColumnarRow) Column(index int) any { func (r *ColumnarRow) ColumnCount() int { return len(r.columns) } + +func (r *ColumnarRow) ExtractKey(fields []filespec.KeyFieldDef) (string, error) { + parts := make([]string, 0, len(fields)) + for _, field := range fields { + value := r.Column(field.Start) + if value == nil { + return "", fmt.Errorf("column %d is empty or missing", field.Start) + } + parts = append(parts, strings.TrimSpace(fmt.Sprintf("%v", value))) + } + return strings.Join(parts, "|"), nil +} diff --git a/tdrs-services/parser/internal/decoder/sortable.go b/tdrs-services/parser/internal/decoder/sortable.go index a3a9a60e04..13e9318e94 100644 --- a/tdrs-services/parser/internal/decoder/sortable.go +++ b/tdrs-services/parser/internal/decoder/sortable.go @@ -5,6 +5,8 @@ import ( "iter" "log" "slices" + + "go-parser/internal/config/filespec" ) // Sortable provides shared sorting logic for embedding in decoder implementations. @@ -24,7 +26,7 @@ func (s *Sortable) IsSorted() bool { return s.sorted } func (s *Sortable) DoSort( rows iter.Seq2[Row, error], detector *RecordTypeDetector, - keyExtractor KeyExtractor, + keyFields []filespec.KeyFieldDef, groupedSchemas []string, ) error { groupedSet := make(map[string]bool, len(groupedSchemas)) @@ -68,7 +70,7 @@ func (s *Sortable) DoSort( } // Extract sort key - key, err := keyExtractor.ExtractKey(row) + key, err := row.ExtractKey(keyFields) if err != nil { // Key extraction failed — collect for error reporting s.unkeyedRows = append(s.unkeyedRows, row) diff --git a/tdrs-services/parser/internal/decoder/sortable_test.go b/tdrs-services/parser/internal/decoder/sortable_test.go index 5d7e91ef61..3fd0273796 100644 --- a/tdrs-services/parser/internal/decoder/sortable_test.go +++ b/tdrs-services/parser/internal/decoder/sortable_test.go @@ -15,11 +15,11 @@ type testDecoder struct { rows []Row } -func (d *testDecoder) Format() filespec.Format { return filespec.FormatPositional } -func (d *testDecoder) ReadFirst() (Row, error) { return nil, nil } -func (d *testDecoder) Close() error { return nil } -func (d *testDecoder) Sort(det *RecordTypeDetector, ke KeyExtractor, gs []string) error { - return d.Sortable.DoSort(d.unsortedRows(), det, ke, gs) +func (d *testDecoder) Format() filespec.Format { return filespec.FormatPositional } +func (d *testDecoder) ReadFirst() (Row, error) { return nil, nil } +func (d *testDecoder) Close() error { return nil } +func (d *testDecoder) Sort(det *RecordTypeDetector, keyFields []filespec.KeyFieldDef, gs []string) error { + return d.Sortable.DoSort(d.unsortedRows(), det, keyFields, gs) } func (d *testDecoder) Rows() iter.Seq2[Row, error] { if d.IsSorted() { @@ -49,6 +49,13 @@ func makeTestRow(lineNum int, data string) *PositionalRow { return NewPositionalRow(lineNum, rt, len(data), data) } +func testPositionalKeyFields() []filespec.KeyFieldDef { + return []filespec.KeyFieldDef{ + {Name: "rpt_month_year", PositionDef: filespec.PositionDef{Start: 2, End: 8}}, + {Name: "case_number", PositionDef: filespec.PositionDef{Start: 8, End: 19}}, + } +} + func buildTestDetector() *RecordTypeDetector { schemas := map[string]*schema.CompiledSchema{ "common/header": {SchemaDef: &schema.SchemaDef{RecordType: "HEADER"}, Path: "common/header"}, @@ -80,10 +87,7 @@ func buildTestDetector() *RecordTypeDetector { func TestSortable_SortsByKey(t *testing.T) { detector := buildTestDetector() - keyExtractor := &PositionalKeyExtractor{ - RptMonthYear: filespec.PositionDef{Start: 2, End: 8}, - CaseNumber: filespec.PositionDef{Start: 8, End: 19}, - } + keyFields := testPositionalKeyFields() groupedSchemas := []string{"tanf/t1", "tanf/t2", "tanf/t3"} dec := &testDecoder{ @@ -95,7 +99,7 @@ func TestSortable_SortsByKey(t *testing.T) { }, } - err := dec.Sort(detector, keyExtractor, groupedSchemas) + err := dec.Sort(detector, keyFields, groupedSchemas) if err != nil { t.Fatalf("Sort failed: %v", err) } @@ -123,10 +127,7 @@ func TestSortable_SortsByKey(t *testing.T) { func TestSortable_SeparatesTrailerAndUnkeyed(t *testing.T) { detector := buildTestDetector() - keyExtractor := &PositionalKeyExtractor{ - RptMonthYear: filespec.PositionDef{Start: 2, End: 8}, - CaseNumber: filespec.PositionDef{Start: 8, End: 19}, - } + keyFields := testPositionalKeyFields() groupedSchemas := []string{"tanf/t1", "tanf/t2", "tanf/t3"} dec := &testDecoder{ @@ -138,7 +139,7 @@ func TestSortable_SeparatesTrailerAndUnkeyed(t *testing.T) { }, } - err := dec.Sort(detector, keyExtractor, groupedSchemas) + err := dec.Sort(detector, keyFields, groupedSchemas) if err != nil { t.Fatalf("Sort failed: %v", err) } @@ -200,10 +201,7 @@ func TestSortable_UnsortedWhenNotSorted(t *testing.T) { func TestSortable_StableSort(t *testing.T) { detector := buildTestDetector() - keyExtractor := &PositionalKeyExtractor{ - RptMonthYear: filespec.PositionDef{Start: 2, End: 8}, - CaseNumber: filespec.PositionDef{Start: 8, End: 19}, - } + keyFields := testPositionalKeyFields() groupedSchemas := []string{"tanf/t1", "tanf/t2", "tanf/t3"} // Multiple records for same case — must preserve T1, T2, T3 order @@ -215,7 +213,7 @@ func TestSortable_StableSort(t *testing.T) { }, } - err := dec.Sort(detector, keyExtractor, groupedSchemas) + err := dec.Sort(detector, keyFields, groupedSchemas) if err != nil { t.Fatalf("Sort failed: %v", err) } diff --git a/tdrs-services/parser/internal/decoder/xlsx_test.go b/tdrs-services/parser/internal/decoder/xlsx_test.go index 4de8285775..1c0a36f144 100644 --- a/tdrs-services/parser/internal/decoder/xlsx_test.go +++ b/tdrs-services/parser/internal/decoder/xlsx_test.go @@ -55,8 +55,8 @@ func TestXLSXDecoder_Format(t *testing.T) { } } -func TestXLSXDecoder_ReadFirst_ReturnsNil(t *testing.T) { - path := createTestXLSX(t, [][]string{{"a", "b"}}) +func TestXLSXDecoder_ReadFirst_BuffersFirstRow(t *testing.T) { + path := createTestXLSX(t, [][]string{{"a", "b"}, {"c", "d"}}) dec, err := NewXLSXDecoder(path, "TE1") if err != nil { @@ -68,8 +68,26 @@ func TestXLSXDecoder_ReadFirst_ReturnsNil(t *testing.T) { if err != nil { t.Fatalf("ReadFirst() error: %v", err) } - if row != nil { - t.Errorf("ReadFirst() = %v, want nil for XLSX decoder", row) + cr := row.(*ColumnarRow) + if cr.ColumnCount() != 2 { + t.Fatalf("ReadFirst() ColumnCount() = %d, want 2", cr.ColumnCount()) + } + if got := cr.Column(0); got != "a" { + t.Errorf("ReadFirst() Column(0) = %v, want %q", got, "a") + } + + var rows []Row + for row, err := range dec.Rows() { + if err != nil { + t.Fatalf("Rows() error: %v", err) + } + rows = append(rows, row) + } + if len(rows) != 2 { + t.Fatalf("Rows() got %d rows, want 2", len(rows)) + } + if got := rows[0].(*ColumnarRow).Column(0); got != "a" { + t.Errorf("Rows() first row Column(0) = %v, want buffered first row", got) } } diff --git a/tdrs-services/parser/internal/decoder/xslx.go b/tdrs-services/parser/internal/decoder/xslx.go index 371c178f2f..7a8266f585 100644 --- a/tdrs-services/parser/internal/decoder/xslx.go +++ b/tdrs-services/parser/internal/decoder/xslx.go @@ -20,6 +20,9 @@ type XLSXDecoder struct { lineNum int recordType string + + firstRead bool + firstRow Row } // NewXLSXDecoder creates a decoder for XLSX files. @@ -64,48 +67,64 @@ func (d *XLSXDecoder) Close() error { return nil } -// ReadFirst returns nil for columnar files. -// CSV/columnar files don't have a header record in the data stream. +// ReadFirst returns the first physical row and buffers it for Rows. +// XLSX/columnar files don't have a header record, but FRA uses this row for +// file-level sanity checks before normal record processing. func (d *XLSXDecoder) ReadFirst() (Row, error) { - return nil, nil + if d.firstRead { + return d.firstRow, nil + } + d.firstRead = true + + row, err := d.readNextRow(false) + if err != nil { + return nil, err + } + d.firstRow = row + return row, nil } // Sort reads all rows, sorts them by key, and makes subsequent Rows() calls return sorted output. -func (d *XLSXDecoder) Sort(detector *RecordTypeDetector, keyExtractor KeyExtractor, groupedSchemas []string) error { - return d.Sortable.DoSort(d.unsortedRows(), detector, keyExtractor, groupedSchemas) +func (d *XLSXDecoder) Sort(detector *RecordTypeDetector, keyFields []filespec.KeyFieldDef, groupedSchemas []string) error { + return d.Sortable.DoSort(d.rowsWithBufferedFirst(), detector, keyFields, groupedSchemas) } func (d *XLSXDecoder) Rows() iter.Seq2[Row, error] { if d.IsSorted() { return d.SortedRows() } - return d.unsortedRows() + return d.rowsWithBufferedFirst() } -func (d *XLSXDecoder) unsortedRows() iter.Seq2[Row, error] { +func (d *XLSXDecoder) rowsWithBufferedFirst() iter.Seq2[Row, error] { return func(yield func(Row, error) bool) { - for d.rows.Next() { - d.lineNum++ - - cols, err := d.rows.Columns() - if err != nil { - yield(nil, err) + if d.firstRow != nil { + row := d.firstRow + d.firstRow = nil + if !yield(row, nil) { return } + } - // Skip empty rows - if len(cols) == 0 || allEmpty(cols) || strings.HasPrefix(cols[0], "#") { - continue + for row, err := range d.unsortedRows() { + if !yield(row, err) { + return } + } + } +} - // Convert to []any - // TODO: This could probably be []string since the package always converts cells to strings. Need to test further. - columns := make([]any, len(cols)) - for i, v := range cols { - columns[i] = v +func (d *XLSXDecoder) unsortedRows() iter.Seq2[Row, error] { + return func(yield func(Row, error) bool) { + for { + row, err := d.readNextRow(true) + if err != nil { + yield(nil, err) + return + } + if row == nil { + return } - - row := NewColumnarRow(d.lineNum, d.recordType, len(columns), columns) if !yield(row, nil) { return @@ -114,6 +133,32 @@ func (d *XLSXDecoder) unsortedRows() iter.Seq2[Row, error] { } } +func (d *XLSXDecoder) readNextRow(skipSkippable bool) (Row, error) { + for d.rows.Next() { + d.lineNum++ + + cols, err := d.rows.Columns() + if err != nil { + return nil, err + } + + if skipSkippable && (len(cols) == 0 || allEmpty(cols) || strings.HasPrefix(cols[0], "#")) { + continue + } + + // Convert to []any + // TODO: This could probably be []string since the package always converts cells to strings. Need to test further. + columns := make([]any, len(cols)) + for i, v := range cols { + columns[i] = v + } + + return NewColumnarRow(d.lineNum, d.recordType, len(columns), columns), nil + } + + return nil, nil +} + func allEmpty(cols []string) bool { for _, c := range cols { if strings.TrimSpace(c) != "" { diff --git a/tdrs-services/parser/internal/parser/accumulator.go b/tdrs-services/parser/internal/parser/accumulator.go index 0d8f862a6f..439c850847 100644 --- a/tdrs-services/parser/internal/parser/accumulator.go +++ b/tdrs-services/parser/internal/parser/accumulator.go @@ -87,50 +87,40 @@ func (a *Accumulator) Add(row decoder.Row) (batch *DecodedBatch, sch *schema.Com line := DecodedRecord{Row: row, Schema: sch} // Generate the grouping key - key, rptMonth, caseNum, err := a.generateKey(row) + groupingKey, err := a.generateKey(row) if err != nil { return nil, nil, false, fmt.Errorf("line %d: failed to generate key: %w", row.LineNum(), err) } - return a.addRecord(line, sch, key, rptMonth, caseNum) + return a.addRecord(line, sch, groupingKey) } // generateKey creates a grouping key for the row. // When key_fields configured: extracts composite key from row data. // When no key_fields: uses line number as unique key (every record is its own group). -func (a *Accumulator) generateKey(row decoder.Row) (key, rptMonth, caseNum string, err error) { +func (a *Accumulator) generateKey(row decoder.Row) (groupingKey, error) { if !a.hasKeyFields { // Each record is its own group - unique key guarantees immediate flush - return "line:" + strconv.Itoa(row.LineNum()), "", "", nil + return groupingKey{Value: "line:" + strconv.Itoa(row.LineNum())}, nil } return a.extractKey(row) } // addRecord handles all records using key-change detection. // If the key changes from the current group, the current group is flushed. -func (a *Accumulator) addRecord(line DecodedRecord, sch *schema.CompiledSchema, key, rptMonth, caseNum string) (*DecodedBatch, *schema.CompiledSchema, bool, error) { +func (a *Accumulator) addRecord(line DecodedRecord, sch *schema.CompiledSchema, key groupingKey) (*DecodedBatch, *schema.CompiledSchema, bool, error) { var completedBatch *DecodedBatch // Check if this is a new group if a.currentGroup == nil { // First record - start a new group - a.currentGroup = &DecodedGroup{ - Key: key, - RptMonthYear: rptMonth, - CaseNumber: caseNum, - DecodedRecords: make([]DecodedRecord, 0, 8), - } - } else if a.currentGroup.Key != key { + a.currentGroup = newDecodedGroup(key) + } else if a.currentGroup.Key != key.Value { // Key changed - current group is complete completedBatch = a.flushCurrentGroup() // Start new group - a.currentGroup = &DecodedGroup{ - Key: key, - RptMonthYear: rptMonth, - CaseNumber: caseNum, - DecodedRecords: make([]DecodedRecord, 0, 8), - } + a.currentGroup = newDecodedGroup(key) } // Add line to current group @@ -139,6 +129,17 @@ func (a *Accumulator) addRecord(line DecodedRecord, sch *schema.CompiledSchema, return completedBatch, sch, true, nil } +type groupingKey struct { + Value string +} + +func newDecodedGroup(key groupingKey) *DecodedGroup { + return &DecodedGroup{ + Key: key.Value, + DecodedRecords: make([]DecodedRecord, 0, 8), + } +} + // flushCurrentGroup handles the completed group based on batch_size configuration. // Returns a Batch if one is ready, nil otherwise. func (a *Accumulator) flushCurrentGroup() *DecodedBatch { @@ -169,29 +170,12 @@ func (a *Accumulator) flushCurrentGroup() *DecodedBatch { } // extractKey extracts the grouping key from a row. -func (a *Accumulator) extractKey(row decoder.Row) (key, rptMonth, caseNum string, err error) { - pr, ok := row.(*decoder.PositionalRow) - if !ok { - return "", "", "", fmt.Errorf("key-based grouping requires PositionalRow, got %T", row) - } - - data := pr.Data() - keyConfig := a.spec.Accumulator.KeyFields - - // Validate line length - minLen := keyConfig.CaseNumber.End - if len(data) < minLen { - return "", "", "", fmt.Errorf("line too short: need %d bytes, got %d", minLen, len(data)) +func (a *Accumulator) extractKey(row decoder.Row) (groupingKey, error) { + key, err := row.ExtractKey(a.spec.Accumulator.KeyFields.OrderedFields()) + if err != nil { + return groupingKey{}, err } - - // Extract key components - rptMonth = data[keyConfig.RptMonthYear.Start:keyConfig.RptMonthYear.End] - caseNum = data[keyConfig.CaseNumber.Start:keyConfig.CaseNumber.End] - - // Composite key with separator - key = rptMonth + "|" + caseNum - - return key, rptMonth, caseNum, nil + return groupingKey{Value: key}, nil } // Drain returns all accumulated groups as Batches and resets the accumulator. diff --git a/tdrs-services/parser/internal/parser/accumulator_test.go b/tdrs-services/parser/internal/parser/accumulator_test.go index e8a77f3bc5..acfd87be60 100644 --- a/tdrs-services/parser/internal/parser/accumulator_test.go +++ b/tdrs-services/parser/internal/parser/accumulator_test.go @@ -28,6 +28,30 @@ func buildNonKeyedSpec() *filespec.FileSpec { } } +func buildKeyedColumnarSpec() *filespec.FileSpec { + batchSize := 1 + return &filespec.FileSpec{ + Program: "FRA", + Section: 1, + Format: filespec.FormatColumnar, + Schemas: []string{"fra/te1"}, + RecordTypeDetection: filespec.RecordTypeDetection{ + Method: "fixed", + Schema: "fra/te1", + }, + Accumulator: filespec.AccumulatorConfig{ + KeyFields: &filespec.KeyFieldsConfig{ + Fields: []filespec.KeyFieldDef{ + {Name: "exit_date", PositionDef: filespec.PositionDef{Start: 0, End: 1}}, + {Name: "ssn", PositionDef: filespec.PositionDef{Start: 1, End: 2}}, + }, + }, + BatchSize: &batchSize, + GroupedSchemas: []string{"fra/te1"}, + }, + } +} + // buildNonKeyedSchemas creates schemas for the non-keyed spec. func buildNonKeyedSchemas() map[string]*schema.CompiledSchema { return map[string]*schema.CompiledSchema{ @@ -58,8 +82,10 @@ func buildBatchSize0Spec() *filespec.FileSpec { }, Accumulator: filespec.AccumulatorConfig{ KeyFields: &filespec.KeyFieldsConfig{ - RptMonthYear: filespec.PositionDef{Start: 2, End: 8}, - CaseNumber: filespec.PositionDef{Start: 8, End: 19}, + Fields: []filespec.KeyFieldDef{ + {Name: "rpt_month_year", PositionDef: filespec.PositionDef{Start: 2, End: 8}}, + {Name: "case_number", PositionDef: filespec.PositionDef{Start: 8, End: 19}}, + }, }, BatchSize: &batchSize, GroupedSchemas: []string{"tanf/t1", "tanf/t2", "tanf/t3"}, @@ -134,6 +160,41 @@ func TestAccumulator_AddGroupedSchema_ReturnsBatchOnKeyChange(t *testing.T) { } } +func TestAccumulator_AddColumnarGroupedSchema_ReturnsBatchOnKeyChange(t *testing.T) { + spec := buildKeyedColumnarSpec() + schemas := buildNonKeyedSchemas() + registry := config.NewTestRegistry(schemas) + detector := decoder.NewRecordTypeDetector(spec, registry) + acc := NewAccumulator(spec, detector) + + row1 := decoder.NewColumnarRow(1, "TE1", 2, []any{"202401", "111111111"}) + batch1, _, isAcc1, err := acc.Add(row1) + if err != nil { + t.Fatalf("Add row1: %v", err) + } + if !isAcc1 { + t.Error("expected isAccumulated=true for TE1") + } + if batch1 != nil { + t.Error("expected no batch after first row") + } + + row2 := decoder.NewColumnarRow(2, "TE1", 2, []any{"202401", "222222222"}) + batch2, _, isAcc2, err := acc.Add(row2) + if err != nil { + t.Fatalf("Add row2: %v", err) + } + if !isAcc2 { + t.Error("expected isAccumulated=true for second TE1") + } + if batch2 == nil { + t.Fatal("expected batch when columnar key changes with batch_size=1") + } + if got := batch2.DecodedGroups[0].Key; got != "202401|111111111" { + t.Errorf("completed group key = %q, want %q", got, "202401|111111111") + } +} + func TestAccumulator_AddNonGroupedSchema(t *testing.T) { spec := buildTANFS1Spec() schemas := buildTestSchemas() @@ -228,11 +289,8 @@ func TestAccumulator_DifferentKeysFlushPreviousGroup(t *testing.T) { // Verify the flushed group has the right key group := batch.DecodedGroups[0] - if group.RptMonthYear != "202401" { - t.Errorf("expected RptMonthYear=%q, got %q", "202401", group.RptMonthYear) - } - if group.CaseNumber != "CASE0000001" { - t.Errorf("expected CaseNumber=%q, got %q", "CASE0000001", group.CaseNumber) + if group.Key != "202401|CASE0000001" { + t.Errorf("expected Key=%q, got %q", "202401|CASE0000001", group.Key) } } @@ -390,7 +448,7 @@ func TestAccumulator_ExtractKeyTooShort(t *testing.T) { detector := decoder.NewRecordTypeDetector(spec, registry) acc := NewAccumulator(spec, detector) - // Row data is too short for key extraction (needs at least 19 bytes for CaseNumber end) + // Row data is too short for key extraction (needs at least 19 bytes for configured key fields) row := makeRow(1, "T1short") _, _, _, err := acc.Add(row) if err == nil { diff --git a/tdrs-services/parser/internal/parser/extractor_test.go b/tdrs-services/parser/internal/parser/extractor_test.go index d0102120b1..b45216234e 100644 --- a/tdrs-services/parser/internal/parser/extractor_test.go +++ b/tdrs-services/parser/internal/parser/extractor_test.go @@ -444,6 +444,26 @@ func TestColumnarExtractor_Extract(t *testing.T) { } }) + t.Run("with FRA exit date transform converts xlsx date to integer", func(t *testing.T) { + row := decoder.NewColumnarRow(1, "TE1", 2, []any{"10/1/2023", "123456789"}) + field := &schema.FieldDef{ + Name: "EXIT_DATE", + Type: "integer", + Column: 0, + Transform: &schema.TransformDef{ + Name: "fra_exit_date", + }, + } + + got, err := ext.Extract(row, field, nil, nil) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got != 202310 { + t.Errorf("got %v, want 202310", got) + } + }) + t.Run("with unknown transform returns error", func(t *testing.T) { row := decoder.NewColumnarRow(1, "T1", 2, []any{"value", "other"}) field := &schema.FieldDef{ diff --git a/tdrs-services/parser/internal/parser/orchestrator.go b/tdrs-services/parser/internal/parser/orchestrator.go index 4ef8c8185c..917bac6367 100644 --- a/tdrs-services/parser/internal/parser/orchestrator.go +++ b/tdrs-services/parser/internal/parser/orchestrator.go @@ -39,10 +39,8 @@ func (o *ParsingOrchestrator) ParseBatch(batch *DecodedBatch) *ParsedBatch { // processGroup parses all records in a single group. func (o *ParsingOrchestrator) processGroup(decodedGroup *DecodedGroup) *ParsedGroup { result := &ParsedGroup{ - Key: decodedGroup.Key, - RptMonthYear: decodedGroup.RptMonthYear, - CaseNumber: decodedGroup.CaseNumber, - Records: make([]*ParsedRecord, 0, len(decodedGroup.DecodedRecords)), + Key: decodedGroup.Key, + Records: make([]*ParsedRecord, 0, len(decodedGroup.DecodedRecords)), } for _, line := range decodedGroup.DecodedRecords { diff --git a/tdrs-services/parser/internal/parser/orchestrator_test.go b/tdrs-services/parser/internal/parser/orchestrator_test.go index 8b9f4bd48d..9971c79b5b 100644 --- a/tdrs-services/parser/internal/parser/orchestrator_test.go +++ b/tdrs-services/parser/internal/parser/orchestrator_test.go @@ -324,9 +324,7 @@ func TestParseBatch_SingleGroupSingleSegment(t *testing.T) { BatchID: 42, DecodedGroups: []*DecodedGroup{ { - Key: "202401|CASE001", - RptMonthYear: "202401", - CaseNumber: "CASE001", + Key: "202401|CASE001", DecodedRecords: []DecodedRecord{ {Row: row, Schema: cs}, }, @@ -381,14 +379,10 @@ func TestParseBatch_MultipleGroups(t *testing.T) { DecodedGroups: []*DecodedGroup{ { Key: "key1", - RptMonthYear: "202401", - CaseNumber: "CASE1", DecodedRecords: []DecodedRecord{{Row: row1, Schema: cs}}, }, { Key: "key2", - RptMonthYear: "202402", - CaseNumber: "CASE2", DecodedRecords: []DecodedRecord{{Row: row2, Schema: cs}, {Row: row3, Schema: cs}}, }, }, @@ -429,8 +423,6 @@ func TestParseBatch_PreservesGroupMetadata(t *testing.T) { DecodedGroups: []*DecodedGroup{ { Key: "202403|MYCASE", - RptMonthYear: "202403", - CaseNumber: "MYCASE", DecodedRecords: []DecodedRecord{{Row: row, Schema: cs}}, }, }, @@ -442,12 +434,6 @@ func TestParseBatch_PreservesGroupMetadata(t *testing.T) { if group.Key != "202403|MYCASE" { t.Errorf("Key = %q, want %q", group.Key, "202403|MYCASE") } - if group.RptMonthYear != "202403" { - t.Errorf("RptMonthYear = %q, want %q", group.RptMonthYear, "202403") - } - if group.CaseNumber != "MYCASE" { - t.Errorf("CaseNumber = %q, want %q", group.CaseNumber, "MYCASE") - } } func TestParseBatch_EmptyBatch(t *testing.T) { diff --git a/tdrs-services/parser/internal/parser/sorter.go b/tdrs-services/parser/internal/parser/sorter.go index b3f46472f7..a1fab42b3d 100644 --- a/tdrs-services/parser/internal/parser/sorter.go +++ b/tdrs-services/parser/internal/parser/sorter.go @@ -33,9 +33,8 @@ type sortableRow struct { // Sorter reads all rows from a decoder, separates non-grouped records, // and stable-sorts data records by key fields. type Sorter struct { - spec *filespec.FileSpec detector *decoder.RecordTypeDetector - keyExtractor decoder.KeyExtractor + keyFields []filespec.KeyFieldDef groupedSchemas map[string]bool } @@ -48,9 +47,8 @@ func NewSorter(spec *filespec.FileSpec, detector *decoder.RecordTypeDetector) *S } return &Sorter{ - spec: spec, detector: detector, - keyExtractor: decoder.NewKeyExtractor(spec), + keyFields: spec.Accumulator.KeyFields.OrderedFields(), groupedSchemas: groupedSchemas, } } @@ -95,7 +93,7 @@ func (s *Sorter) Sort(dec decoder.Decoder) (*SortResult, error) { } // Extract sort key - key, err := s.keyExtractor.ExtractKey(row) + key, err := row.ExtractKey(s.keyFields) if err != nil { // Key extraction failed — collect for error reporting result.UnkeyedRows = append(result.UnkeyedRows, row) diff --git a/tdrs-services/parser/internal/parser/sorter_test.go b/tdrs-services/parser/internal/parser/sorter_test.go index 47b66bd569..2e801aecfd 100644 --- a/tdrs-services/parser/internal/parser/sorter_test.go +++ b/tdrs-services/parser/internal/parser/sorter_test.go @@ -19,7 +19,7 @@ type mockDecoder struct { func (d *mockDecoder) Format() filespec.Format { return filespec.FormatPositional } func (d *mockDecoder) ReadFirst() (decoder.Row, error) { return d.header, nil } func (d *mockDecoder) Close() error { return nil } -func (d *mockDecoder) Sort(_ *decoder.RecordTypeDetector, _ decoder.KeyExtractor, _ []string) error { +func (d *mockDecoder) Sort(_ *decoder.RecordTypeDetector, _ []filespec.KeyFieldDef, _ []string) error { return nil } @@ -58,8 +58,10 @@ func buildTANFS1Spec() *filespec.FileSpec { }, Accumulator: filespec.AccumulatorConfig{ KeyFields: &filespec.KeyFieldsConfig{ - RptMonthYear: filespec.PositionDef{Start: 2, End: 8}, - CaseNumber: filespec.PositionDef{Start: 8, End: 19}, + Fields: []filespec.KeyFieldDef{ + {Name: "rpt_month_year", PositionDef: filespec.PositionDef{Start: 2, End: 8}}, + {Name: "case_number", PositionDef: filespec.PositionDef{Start: 8, End: 19}}, + }, }, BatchSize: &batchSize, GroupedSchemas: []string{"tanf/t1", "tanf/t2", "tanf/t3"}, @@ -108,6 +110,13 @@ func makeRow(lineNum int, data string) *decoder.PositionalRow { return decoder.NewPositionalRow(lineNum, rt, len(data), data) } +func testSorterPositionalKeyFields() []filespec.KeyFieldDef { + return []filespec.KeyFieldDef{ + {Name: "rpt_month_year", PositionDef: filespec.PositionDef{Start: 2, End: 8}}, + {Name: "case_number", PositionDef: filespec.PositionDef{Start: 8, End: 19}}, + } +} + func TestSorter_SortsByKey(t *testing.T) { spec := buildTANFS1Spec() schemas := buildTestSchemas() @@ -355,14 +364,9 @@ func TestSorter_UnrecognizedRecordType(t *testing.T) { } } -func TestPositionalKeyExtractor_ExtractKey(t *testing.T) { - extractor := &decoder.PositionalKeyExtractor{ - RptMonthYear: filespec.PositionDef{Start: 2, End: 8}, - CaseNumber: filespec.PositionDef{Start: 8, End: 19}, - } - +func TestRowKeyExtraction_ExtractKey(t *testing.T) { row := makeRow(1, "T1202401CASE001 rest-of-data") - key, err := extractor.ExtractKey(row) + key, err := row.ExtractKey(testSorterPositionalKeyFields()) if err != nil { t.Fatalf("ExtractKey failed: %v", err) } @@ -371,14 +375,9 @@ func TestPositionalKeyExtractor_ExtractKey(t *testing.T) { } } -func TestPositionalKeyExtractor_TooShort(t *testing.T) { - extractor := &decoder.PositionalKeyExtractor{ - RptMonthYear: filespec.PositionDef{Start: 2, End: 8}, - CaseNumber: filespec.PositionDef{Start: 8, End: 19}, - } - +func TestRowKeyExtraction_TooShort(t *testing.T) { row := makeRow(1, "T1short") - _, err := extractor.ExtractKey(row) + _, err := row.ExtractKey(testSorterPositionalKeyFields()) if err == nil { t.Fatal("expected error for short row") } diff --git a/tdrs-services/parser/internal/parser/transform.go b/tdrs-services/parser/internal/parser/transform.go index 806beab4eb..b597e57dfc 100644 --- a/tdrs-services/parser/internal/parser/transform.go +++ b/tdrs-services/parser/internal/parser/transform.go @@ -2,7 +2,12 @@ package parser import ( "fmt" + "math" + "strconv" "strings" + "time" + + "github.com/xuri/excelize/v2" ) // TransformFunc defines the signature for all transform functions. @@ -20,6 +25,7 @@ var Registry = map[string]TransformFunc{ "zero_pad": ZeroPad, "ssn_decrypt": SSNDecrypt, "calendar_quarter_to_month": CalendarQuarterToMonth, + "fra_exit_date": FRAExitDate, } // Apply looks up and executes a transform by name. @@ -141,3 +147,87 @@ func CalendarQuarterToMonth(value string, params map[string]any, _ *ParseContext return year + months[monthIndex], nil } + +// FRAExitDate normalizes FRA EXIT_DATE values to YYYYMM. +// FRA XLSX files may contain a real Excel date cell, which excelize exposes as +// either a formatted date string or an Excel serial number depending on styling. +func FRAExitDate(value string, _ map[string]any, _ *ParseContext) (string, error) { + trimmed := strings.TrimSpace(value) + if trimmed == "" { + return "", nil + } + + if isYYYYMM(trimmed) { + return trimmed, nil + } + + if len(trimmed) == 6 && isDigits(trimmed) { + return value, nil + } + + if f, err := strconv.ParseFloat(trimmed, 64); err == nil { + if math.Trunc(f) == f { + if serialDate, err := excelize.ExcelDateToTime(f, false); err == nil { + return serialDate.Format("200601"), nil + } + } + } + + for _, layout := range fraExitDateLayouts { + if parsed, err := time.Parse(layout, trimmed); err == nil { + return parsed.Format("200601"), nil + } + } + + return value, nil +} + +var fraExitDateLayouts = []string{ + "2006-01-02", + "2006-1-2", + "2006/01/02", + "2006/1/2", + "1/2/2006", + "01/02/2006", + "1/2/06", + "01/02/06", + "1-2-2006", + "01-02-2006", + "1-2-06", + "01-02-06", + "2006-01-02 15:04:05", + "2006/01/02 15:04:05", + "1/2/2006 15:04:05", + "01/02/2006 15:04:05", + "Jan-06", + "January-06", + "Jan 2006", + "January 2006", +} + +func isYYYYMM(value string) bool { + if len(value) != 6 { + return false + } + if !isDigits(value) { + return false + } + year, err := strconv.Atoi(value[:4]) + if err != nil || year < 1900 { + return false + } + month, err := strconv.Atoi(value[4:]) + if err != nil { + return false + } + return month >= 1 && month <= 12 +} + +func isDigits(value string) bool { + for _, r := range value { + if r < '0' || r > '9' { + return false + } + } + return value != "" +} diff --git a/tdrs-services/parser/internal/parser/transform_test.go b/tdrs-services/parser/internal/parser/transform_test.go index f463046a81..f475664f9b 100644 --- a/tdrs-services/parser/internal/parser/transform_test.go +++ b/tdrs-services/parser/internal/parser/transform_test.go @@ -439,8 +439,80 @@ func TestCalendarQuarterToMonth(t *testing.T) { } } +func TestFRAExitDate(t *testing.T) { + tests := []struct { + name string + value string + want string + wantErr bool + }{ + { + name: "already YYYYMM", + value: "202310", + want: "202310", + }, + { + name: "xlsx formatted date with four digit year", + value: "10/1/2023", + want: "202310", + }, + { + name: "xlsx formatted date with two digit year", + value: "10/1/23", + want: "202310", + }, + { + name: "iso date", + value: "2023-10-01", + want: "202310", + }, + { + name: "month year", + value: "Oct-23", + want: "202310", + }, + { + name: "excel serial date", + value: "45200", + want: "202310", + }, + { + name: "empty date", + value: "", + want: "", + }, + { + name: "invalid date", + value: "not-a-date", + want: "not-a-date", + }, + { + name: "invalid YYYYMM month", + value: "202313", + want: "202313", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := FRAExitDate(tt.value, nil, nil) + if tt.wantErr { + if err == nil { + t.Fatal("expected error, got nil") + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got != tt.want { + t.Errorf("FRAExitDate(%q) = %q, want %q", tt.value, got, tt.want) + } + }) + } +} + func TestRegistryContainsAllTransforms(t *testing.T) { - expected := []string{"trim", "zero_pad", "ssn_decrypt", "calendar_quarter_to_month"} + expected := []string{"trim", "zero_pad", "ssn_decrypt", "calendar_quarter_to_month", "fra_exit_date"} for _, name := range expected { if _, ok := Registry[name]; !ok { t.Errorf("Registry missing transform: %s", name) diff --git a/tdrs-services/parser/internal/parser/types.go b/tdrs-services/parser/internal/parser/types.go index fa2308c3e6..ce3421de7a 100644 --- a/tdrs-services/parser/internal/parser/types.go +++ b/tdrs-services/parser/internal/parser/types.go @@ -4,6 +4,7 @@ import ( "fmt" "slices" "strconv" + "strings" "go-parser/internal/config/schema" "go-parser/internal/decoder" @@ -16,19 +17,12 @@ type DecodedRecord struct { } // DecodedGroup holds all decoded records belonging to a logical group. -// For key-based grouping: all records with the same (RPT_MONTH_YEAR, CASE_NUMBER). -// For non-keyed: each record is its own group (Key is empty). +// For key-based grouping, all records share the same configured composite key. +// For non-keyed files, each record is its own group. type DecodedGroup struct { - // Key is the composite grouping key: "YYYYMM|CASE_NUMBER" - // Empty string if no key_fields are configured (each record is its own group). + // Key is the composite grouping key built from configured key_fields. Key string - // RptMonthYear is extracted from the key for convenience (empty if no key_fields) - RptMonthYear string - - // CaseNumber is extracted from the key for convenience (empty if no key_fields) - CaseNumber string - // DecodedRecords contains all rows for this group DecodedRecords []DecodedRecord } @@ -172,11 +166,12 @@ func (pr *ParsedRecord) GetInt(fieldName string) int { case int: return val case string: - if val == "" { + trimmed := strings.TrimSpace(val) + if trimmed == "" { return 0 } // Parse string as int, return 0 on failure - i, err := strconv.Atoi(val) + i, err := strconv.Atoi(trimmed) if err != nil { return 0 } @@ -260,9 +255,7 @@ type ParseContext struct { // ParsedGroup contains parsing results for a single RecordGroup. type ParsedGroup struct { // Key is the grouping key (empty for non-keyed records) - Key string - RptMonthYear string - CaseNumber string + Key string // Records contains all successfully parsed records in this group Records []*ParsedRecord @@ -273,16 +266,6 @@ func (pg *ParsedGroup) GetKey() string { return pg.Key } -// GetRptMonthYear returns the reporting month/year. -func (pg *ParsedGroup) GetRptMonthYear() string { - return pg.RptMonthYear -} - -// GetCaseNumber returns the case number. -func (pg *ParsedGroup) GetCaseNumber() string { - return pg.CaseNumber -} - // ParsedBatch contains parsing results for a Batch (one or more groups). type ParsedBatch struct { BatchID int diff --git a/tdrs-services/parser/internal/parser/types_test.go b/tdrs-services/parser/internal/parser/types_test.go index f51cd86d67..07b81acb86 100644 --- a/tdrs-services/parser/internal/parser/types_test.go +++ b/tdrs-services/parser/internal/parser/types_test.go @@ -38,10 +38,8 @@ func newRecord(s *schema.CompiledSchema, lineNum int, values map[string]any) *Pa // newGroup creates a ParsedGroup with default key fields for testing. func newGroup(records ...*ParsedRecord) *ParsedGroup { return &ParsedGroup{ - Key: "202401|12345", - RptMonthYear: "202401", - CaseNumber: "12345", - Records: records, + Key: "202401|12345", + Records: records, } } @@ -550,25 +548,13 @@ func TestParsedGroup_Getters(t *testing.T) { if got := g.GetKey(); got != "202401|12345" { t.Errorf("GetKey() = %s, want 202401|12345", got) } - if got := g.GetRptMonthYear(); got != "202401" { - t.Errorf("GetRptMonthYear() = %s, want 202401", got) - } - if got := g.GetCaseNumber(); got != "12345" { - t.Errorf("GetCaseNumber() = %s, want 12345", got) - } } func TestParsedGroup_EmptyKey(t *testing.T) { - g := &ParsedGroup{Key: "", RptMonthYear: "", CaseNumber: ""} + g := &ParsedGroup{Key: ""} if got := g.GetKey(); got != "" { t.Errorf("GetKey() = %q, want empty", got) } - if got := g.GetRptMonthYear(); got != "" { - t.Errorf("GetRptMonthYear() = %q, want empty", got) - } - if got := g.GetCaseNumber(); got != "" { - t.Errorf("GetCaseNumber() = %q, want empty", got) - } } // --- ParsedBatch tests --- diff --git a/tdrs-services/parser/internal/pipeline/pipeline.go b/tdrs-services/parser/internal/pipeline/pipeline.go index ca3681ef60..7bec166dfd 100644 --- a/tdrs-services/parser/internal/pipeline/pipeline.go +++ b/tdrs-services/parser/internal/pipeline/pipeline.go @@ -5,9 +5,11 @@ import ( "errors" "fmt" "log" + "strings" "time" "go-parser/internal/config" + "go-parser/internal/config/filespec" "go-parser/internal/decoder" "go-parser/internal/parser" "go-parser/internal/sentinel" @@ -101,20 +103,30 @@ func (p *Pipeline) Process(ctx context.Context, dec decoder.Decoder, dfCtx DataF var headerStats ErrorStats var result *ParsingResult - // Step 3: Read and parse header (for positional files) - headerRow, err := dec.ReadFirst() + // Step 3: Read first row. Positional files use it as HEADER; FRA uses it + // for a first-data-row sanity check and then processes it normally. + firstRow, err := dec.ReadFirst() if err != nil { - err = fmt.Errorf("failed to read header: %w", err) + err = fmt.Errorf("failed to read first row: %w", err) if rollbackErr := p.abortAndRollback(ctx, cancelRun, dfCtx, router); rollbackErr != nil { return nil, errors.Join(err, rollbackErr) } return nil, err } - headerSchema := p.registry.GetSchema(parser.HeaderSchemaPath) - parseCtx, err := parser.ParseHeader(headerRow, headerSchema) - if err != nil { - return p.handleHeaderParseInvalid(err, ctx, dfCtx, router, validationOrchestrator, startTime) + // TODO: We could probably abstract this branch into an interface. Helpful if we get more file types with different + // header semantics. + var parseCtx *parser.ParseContext + if spec.Format == filespec.FormatColumnar { + if dfCtx.Program == "FRA" && !isValidFRAFirstRow(firstRow) { + return p.handleFRAFirstRowInvalid(ctx, dfCtx, router, startTime), nil + } + } else { + headerSchema := p.registry.GetSchema(parser.HeaderSchemaPath) + parseCtx, err = parser.ParseHeader(firstRow, headerSchema) + if err != nil { + return p.handleHeaderParseInvalid(err, ctx, dfCtx, router, validationOrchestrator, startTime) + } } // Step 3b: Validate header (skip for FRA/columnar files where parseCtx is nil) @@ -145,7 +157,7 @@ func (p *Pipeline) Process(ctx context.Context, dec decoder.Decoder, dfCtx DataF // Step 4b: Sort the file if presort is enabled if spec.Accumulator.Presort && spec.Accumulator.HasKeyFields() { - if err := dec.Sort(detector, decoder.NewKeyExtractor(spec), spec.Accumulator.GroupedSchemas); err != nil { + if err := dec.Sort(detector, spec.Accumulator.KeyFields.OrderedFields(), spec.Accumulator.GroupedSchemas); err != nil { err = fmt.Errorf("presort failed: %w", err) if rollbackErr := p.abortAndRollback(ctx, cancelRun, dfCtx, router); rollbackErr != nil { return nil, errors.Join(err, rollbackErr) @@ -171,22 +183,24 @@ func (p *Pipeline) Process(ctx context.Context, dec decoder.Decoder, dfCtx DataF err = accumulateBatches(runCtx, dec, acc, workers, router, dfCtx.DatafileID) // Step 8: Wait for everything to complete - if err != nil { - cancelRun() - } - workers.CloseInputs() - workers.Wait() - if err != nil { var multipleHeaders *sentinel.MultipleHeadersError if errors.As(err, &multipleHeaders) { + workers.CloseInputs() + workers.Wait() return p.handleMultipleHeaders(ctx, cancelRun, dfCtx, router, multipleHeaders.RowNumber(), startTime) } + + cancelRun() + workers.CloseInputs() + workers.Wait() if rollbackErr := p.abortAndRollback(ctx, cancelRun, dfCtx, router); rollbackErr != nil { return nil, errors.Join(err, rollbackErr) } return nil, err } + workers.CloseInputs() + workers.Wait() if err := workers.Err(); err != nil { if rollbackErr := p.abortAndRollback(ctx, cancelRun, dfCtx, router); rollbackErr != nil { @@ -268,6 +282,21 @@ func addErrorStats(dst *ErrorStats, src ErrorStats) { dst.CaseConsistency += src.CaseConsistency } +func isValidFRAFirstRow(row decoder.Row) bool { + cr, ok := row.(*decoder.ColumnarRow) + if !ok || cr.ColumnCount() != 2 { + return false + } + + for i := 0; i < cr.ColumnCount(); i++ { + if strings.TrimSpace(fmt.Sprintf("%v", cr.Column(i))) == "" { + return false + } + } + + return true +} + // renderHeaderErrorMessage renders a validation result's message template // with context from the header record. This is similar to writer.renderErrorMessage // but adds DataFileContext and Values for header cross-validation messages. @@ -335,18 +364,12 @@ func (p *Pipeline) writeNoRecordsCreatedError( } noRecordsCreated := validationOrchestrator.CreateNoRecordsCreatedError() - var row []any - if headerRecord == nil { - row = writer.SerializeHeaderError(noRecordsCreated.Message(nil), noRecordsCreated.ErrorType, datafileID) - } else { - row = writer.SerializeError( - noRecordsCreated, - headerRecord, - nil, - datafileID, - nil, - ) - } + row := writer.SerializeParserError( + noRecordsCreated.LineNumber, + noRecordsCreated.Message(nil), + noRecordsCreated.ErrorType, + datafileID, + ) if err := writeRow(row); err != nil { return 0, err } @@ -356,12 +379,13 @@ func (p *Pipeline) writeNoRecordsCreatedError( func (p *Pipeline) abortAndRollback(ctx context.Context, cancelRun context.CancelFunc, dfCtx DataFileContext, router *writer.Router) error { cancelRun() + cleanupCtx := context.WithoutCancel(ctx) var errs []error if abortErr := router.Abort(); abortErr != nil { errs = append(errs, fmt.Errorf("abort writers: %w", abortErr)) } - if rollbackErr := p.rollbackDatafile(ctx, dfCtx, router); rollbackErr != nil { + if rollbackErr := p.rollbackDatafile(cleanupCtx, dfCtx, router); rollbackErr != nil { errs = append(errs, rollbackErr) } return errors.Join(errs...) @@ -379,9 +403,18 @@ func (p *Pipeline) rollbackDatafile(ctx context.Context, dfCtx DataFileContext, func (p *Pipeline) handleMultipleHeaders(ctx context.Context, cancelRun context.CancelFunc, dfCtx DataFileContext, router *writer.Router, rowNumber int, startTime time.Time) (*ParsingResult, error) { // Multiple headers detected: stop writers, rollback all records/errors // already written, then write a single PRE_CHECK error directly via sink. - if rollbackErr := p.abortAndRollback(ctx, cancelRun, dfCtx, router); rollbackErr != nil { - log.Printf("failed to rollback datafile records: %v", rollbackErr) - return nil, rollbackErr + cleanupCtx := context.WithoutCancel(ctx) + var errs []error + if abortErr := router.Abort(); abortErr != nil { + errs = append(errs, fmt.Errorf("abort writers: %w", abortErr)) + } + cancelRun() + if rollbackErr := p.rollbackDatafile(cleanupCtx, dfCtx, router); rollbackErr != nil { + errs = append(errs, rollbackErr) + } + if err := errors.Join(errs...); err != nil { + log.Printf("failed to rollback datafile records: %v", err) + return nil, err } log.Printf("Header validation failed: Multiple headers found.") @@ -391,7 +424,7 @@ func (p *Pipeline) handleMultipleHeaders(ctx context.Context, cancelRun context. validation.ErrorTypePreCheck, dfCtx.DatafileID, ) - if _, flushErr := p.sink.Flush(ctx, "parser_error", writer.ParserErrorColumns(), [][]any{headerErr}); flushErr != nil { + if _, flushErr := p.sink.Flush(cleanupCtx, "parser_error", writer.ParserErrorColumns(), [][]any{headerErr}); flushErr != nil { log.Printf("failed to write multiple headers error: %v", flushErr) } return &ParsingResult{ @@ -401,6 +434,29 @@ func (p *Pipeline) handleMultipleHeaders(ctx context.Context, cancelRun context. }, nil } +func (p *Pipeline) handleFRAFirstRowInvalid(ctx context.Context, dfCtx DataFileContext, router *writer.Router, startTime time.Time) *ParsingResult { + log.Printf("FRA first-row validation failed: File does not begin with FRA data.") + parserErr := writer.SerializeParserError( + 1, + "File does not begin with FRA data.", + validation.ErrorTypePreCheck, + dfCtx.DatafileID, + ) + if routeErr := router.RouteErrorRow(ctx, parserErr); routeErr != nil { + log.Printf("failed to write FRA first-row error: %v", routeErr) + } + if stopErr := router.Stop(); stopErr != nil { + log.Printf("failed to stop router: %v", stopErr) + } + + return &ParsingResult{ + RecordCounts: map[string]int64{"parser_error": 1}, + ErrorCount: 1, + ErrorStats: &ErrorStats{RecordPreCheck: 1}, + Duration: time.Since(startTime), + } +} + func (p *Pipeline) handleHeaderValidationResult( ctx context.Context, headerResult *validation.RecordValidationResult, @@ -422,7 +478,7 @@ func (p *Pipeline) handleHeaderValidationResult( for _, vr := range allErrors { msg := renderHeaderErrorMessage(vr, parseCtx.Header, valDfCtx) log.Printf(" [%s] %s", vr.ErrorType, msg) - row := writer.SerializeHeaderError(msg, vr.ErrorType, dfCtx.DatafileID) + row := writer.SerializeParserError(parseCtx.Header.LineNumber, msg, vr.ErrorType, dfCtx.DatafileID) if routeErr := router.RouteErrorRow(ctx, row); routeErr != nil { log.Printf("failed to write header error: %v", routeErr) } @@ -454,7 +510,8 @@ func (p *Pipeline) handleHeaderValidationResult( func (p *Pipeline) handleHeaderParseInvalid(err error, ctx context.Context, dfCtx DataFileContext, router *writer.Router, validationOrchestrator *validation.ValidationOrchestrator, startTime time.Time) (*ParsingResult, error) { // First line is not a HEADER record or other error — generate a PRE_CHECK error and stop log.Printf("Header validation failed: %s.", err.Error()) - headerErr := writer.SerializeHeaderError( + headerErr := writer.SerializeParserError( + 1, err.Error(), validation.ErrorTypePreCheck, dfCtx.DatafileID, diff --git a/tdrs-services/parser/internal/pipeline/pipeline_test.go b/tdrs-services/parser/internal/pipeline/pipeline_test.go index 1e24a5526b..1eebed353f 100644 --- a/tdrs-services/parser/internal/pipeline/pipeline_test.go +++ b/tdrs-services/parser/internal/pipeline/pipeline_test.go @@ -19,7 +19,7 @@ func (s *stubSink) Flush(_ context.Context, _ string, _ []string, _ [][]any) (in return 0, nil } func (s *stubSink) RollbackDatafile(_ context.Context, _ int32, _ []string) error { return nil } -func (s *stubSink) Close() error { return nil } +func (s *stubSink) Close() error { return nil } // Verify writer.Sink interface is satisfied by stubSink at compile time. var _ writer.Sink = (*stubSink)(nil) @@ -27,12 +27,12 @@ var _ writer.Sink = (*stubSink)(nil) // stubDecoder implements decoder.Decoder minimally for testing. type stubDecoder struct{} -func (d *stubDecoder) Format() filespec.Format { return filespec.FormatPositional } -func (d *stubDecoder) ReadFirst() (decoder.Row, error) { return nil, nil } +func (d *stubDecoder) Format() filespec.Format { return filespec.FormatPositional } +func (d *stubDecoder) ReadFirst() (decoder.Row, error) { return nil, nil } func (d *stubDecoder) Rows() iter.Seq2[decoder.Row, error] { return func(yield func(decoder.Row, error) bool) {} } -func (d *stubDecoder) Sort(_ *decoder.RecordTypeDetector, _ decoder.KeyExtractor, _ []string) error { +func (d *stubDecoder) Sort(_ *decoder.RecordTypeDetector, _ []filespec.KeyFieldDef, _ []string) error { return nil } func (d *stubDecoder) Close() error { return nil } diff --git a/tdrs-services/parser/internal/pipeline/process_test.go b/tdrs-services/parser/internal/pipeline/process_test.go index b5cd5cf564..aa7a13b25f 100644 --- a/tdrs-services/parser/internal/pipeline/process_test.go +++ b/tdrs-services/parser/internal/pipeline/process_test.go @@ -7,6 +7,7 @@ import ( "path/filepath" "slices" "strings" + "sync" "testing" "go-parser/internal/config" @@ -80,8 +81,20 @@ func testSSPContext() DataFileContext { } } +func testFRAContext() DataFileContext { + return DataFileContext{ + Program: "FRA", + Section: 1, + DatafileID: 1, + FiscalYear: 2024, + FiscalQuarter: "Q2", + SectionName: "FRA Work Outcome TANF Exiters", + } +} + // capturingSink captures all flushed data for assertions. type capturingSink struct { + mu sync.Mutex tables map[string][][]any // tableName -> rows rollbackCalls int rollbackErr error @@ -96,11 +109,15 @@ func newCapturingSink() *capturingSink { func (s *capturingSink) Flush(_ context.Context, tableName string, _ []string, rows [][]any) (int64, error) { copied := make([][]any, len(rows)) copy(copied, rows) + s.mu.Lock() + defer s.mu.Unlock() s.tables[tableName] = append(s.tables[tableName], copied...) return int64(len(rows)), nil } func (s *capturingSink) RollbackDatafile(_ context.Context, datafileID int32, tables []string) error { + s.mu.Lock() + defer s.mu.Unlock() s.rollbackCalls++ s.rollbackID = datafileID s.rollbackTables = slices.Clone(tables) @@ -116,10 +133,14 @@ func (s *capturingSink) RollbackDatafile(_ context.Context, datafileID int32, ta func (s *capturingSink) Close() error { return nil } func (s *capturingSink) rowCount(tableName string) int { + s.mu.Lock() + defer s.mu.Unlock() return len(s.tables[tableName]) } func (s *capturingSink) totalRecords() int { + s.mu.Lock() + defer s.mu.Unlock() total := 0 for name, rows := range s.tables { if name != "parser_error" { @@ -130,6 +151,8 @@ func (s *capturingSink) totalRecords() int { } func (s *capturingSink) errorCount() int { + s.mu.Lock() + defer s.mu.Unlock() return len(s.tables["parser_error"]) } @@ -154,6 +177,83 @@ func writeTempFile(t *testing.T, content string) string { // --- End-to-end Process tests --- +func TestProcess_FRAInvalidFirstRowWritesPreCheckError(t *testing.T) { + reg := loadRegistry(t) + validators := loadValidators(t, reg) + + tests := []struct { + name string + content string + }{ + { + name: "comment row", + content: "# This line represents a header which is not allowed in fra files.\n202401,946412419\n", + }, + { + name: "empty first row", + content: "\n202401,946412419\n", + }, + { + name: "missing column value", + content: "202401,\n", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + filePath := writeTempFile(t, tt.content) + f, err := os.Open(filePath) + if err != nil { + t.Fatalf("failed to open file: %v", err) + } + defer f.Close() + + spec := reg.GetFileSpec("FRA", 1) + if spec == nil { + t.Fatal("GetFileSpec(FRA, 1) returned nil") + } + + dec, err := decoder.CreateDecoder(f, spec) + if err != nil { + t.Fatalf("CreateDecoder failed: %v", err) + } + defer dec.Close() + + sink := newCapturingSink() + pipelineCfg := TestConfig() + pipelineCfg.IncludeRecords = true + pipelineCfg.IncludeErrors = true + p := NewPipeline(sink, reg, validators, pipelineCfg) + + result, err := p.Process(context.Background(), dec, testFRAContext()) + if err != nil { + t.Fatalf("Process failed: %v", err) + } + + if result.ErrorCount != 1 { + t.Errorf("ErrorCount = %d, want 1", result.ErrorCount) + } + if sink.totalRecords() != 0 { + t.Errorf("totalRecords = %d, want 0", sink.totalRecords()) + } + if sink.errorCount() != 1 { + t.Fatalf("sink error count = %d, want 1", sink.errorCount()) + } + + row := sink.tables["parser_error"][0] + if got := row[0]; got != int32(1) { + t.Errorf("row_number = %v, want %d", got, 1) + } + if got := row[6]; got != "File does not begin with FRA data." { + t.Errorf("error_message = %v, want %q", got, "File does not begin with FRA data.") + } + if got := row[7]; got != "1" { + t.Errorf("error_type = %v, want %q", got, "1") + } + }) + } +} + func TestProcess_TANF_S1_ValidData(t *testing.T) { reg := loadRegistry(t) validators := loadValidators(t, reg) @@ -270,6 +370,9 @@ func TestProcess_TANF_S1_MissingHeader(t *testing.T) { if got := sink.tables["parser_error"][1][6]; got != "No records created." { t.Errorf("second error_message = %v, want %q", got, "No records created.") } + if got := sink.tables["parser_error"][1][0]; got != int32(0) { + t.Errorf("second row_number = %v, want %d", got, 0) + } } func TestProcess_TANF_MultipleHeaders_RollbacksAndWritesOffendingRow(t *testing.T) { @@ -566,6 +669,9 @@ func TestProcess_HeaderOnlyWritesNoRecordsCreatedError(t *testing.T) { } row := sink.tables["parser_error"][0] + if got := row[0]; got != int32(0) { + t.Errorf("row_number = %v, want %d", got, 0) + } if got := row[6]; got != "No records created." { t.Errorf("error_message = %v, want %q", got, "No records created.") } diff --git a/tdrs-services/parser/internal/pipeline/worker_pool_test.go b/tdrs-services/parser/internal/pipeline/worker_pool_test.go index dafc5a0ffe..530865b5b7 100644 --- a/tdrs-services/parser/internal/pipeline/worker_pool_test.go +++ b/tdrs-services/parser/internal/pipeline/worker_pool_test.go @@ -169,9 +169,7 @@ func TestWorkerPool_ProcessesBatches(t *testing.T) { BatchID: 1, DecodedGroups: []*parser.DecodedGroup{ { - Key: "202401|12345", - RptMonthYear: "202401", - CaseNumber: "12345", + Key: "202401|12345", DecodedRecords: []parser.DecodedRecord{ {Row: row, Schema: t1Schema}, }, @@ -245,9 +243,7 @@ func TestWorkerPool_MultipleBatches(t *testing.T) { BatchID: i + 1, DecodedGroups: []*parser.DecodedGroup{ { - Key: "202401|12345", - RptMonthYear: "202401", - CaseNumber: "12345", + Key: "202401|12345", DecodedRecords: []parser.DecodedRecord{ {Row: row, Schema: t1Schema}, }, @@ -421,14 +417,10 @@ func TestWorkerPool_ProcessBatch_WithMultipleGroups(t *testing.T) { DecodedGroups: []*parser.DecodedGroup{ { Key: "202401|11111", - RptMonthYear: "202401", - CaseNumber: "11111", DecodedRecords: []parser.DecodedRecord{{Row: row1, Schema: t1Schema}}, }, { Key: "202401|22222", - RptMonthYear: "202401", - CaseNumber: "22222", DecodedRecords: []parser.DecodedRecord{{Row: row2, Schema: t1Schema}}, }, }, diff --git a/tdrs-services/parser/internal/sentinel/errors.go b/tdrs-services/parser/internal/sentinel/errors.go index 16d91bc711..a747f6625b 100644 --- a/tdrs-services/parser/internal/sentinel/errors.go +++ b/tdrs-services/parser/internal/sentinel/errors.go @@ -10,10 +10,16 @@ var ( // to any schema in the active file specification. ErrUnknownRecordType = errors.New("unknown record type") + // ErrDecoderUnknown identifies files whose bytes cannot be matched to a + // supported decoder before normal parser validation can begin. + ErrDecoderUnknown = errors.New("decoder unknown") + // ErrWriterAborted identifies writes attempted after a per-run rollback abort. ErrWriterAborted = errors.New("writer aborted") ) +const DecoderUnknownMessage = "Could not determine encoding of FRA file. If the file is an XLSX file, ensure it can be opened in Excel. If the file is a CSV, ensure it can be opened in a text editor and is UTF-8 encoded." + // MultipleHeadersError identifies a second HEADER record and carries the // offending row number for parser error reporting. type MultipleHeadersError struct { diff --git a/tdrs-services/parser/internal/server/celery/celery.go b/tdrs-services/parser/internal/server/celery/celery.go index a427f1e00e..b46226b91b 100644 --- a/tdrs-services/parser/internal/server/celery/celery.go +++ b/tdrs-services/parser/internal/server/celery/celery.go @@ -235,10 +235,17 @@ func sectionNumber(section string) int { switch section { case "Active Case Data": return 1 + // TODO: We should probably move off of the number system for sections. + case "Work Outcomes of TANF Exiters": + return 1 case "Closed Case Data": return 2 + case "Secondary School Attainment": + return 2 case "Aggregate Data": return 3 + case "Supplemental Work Outcomes": + return 3 case "Stratum Data": return 4 default: diff --git a/tdrs-services/parser/internal/server/server.go b/tdrs-services/parser/internal/server/server.go index 12602b9aa5..2cf87b3148 100644 --- a/tdrs-services/parser/internal/server/server.go +++ b/tdrs-services/parser/internal/server/server.go @@ -2,8 +2,10 @@ package server import ( "context" + "errors" "fmt" "log" + "time" "github.com/jackc/pgx/v5/pgxpool" @@ -11,6 +13,7 @@ import ( "go-parser/internal/db" "go-parser/internal/decoder" "go-parser/internal/pipeline" + "go-parser/internal/sentinel" "go-parser/internal/storage/reader" "go-parser/internal/storage/writer" "go-parser/internal/validation" @@ -59,6 +62,7 @@ func (b *Base) ConnectDB(ctx context.Context) (*pgxpool.Pool, error) { // and runs the parsing pipeline. It centralizes the shared orchestration logic // used by all server modes. func (b *Base) RunPipeline(ctx context.Context, source reader.FileSource, sink writer.Sink, dfCtx pipeline.DataFileContext) (*pipeline.ParsingResult, error) { + startTime := time.Now() file, err := source.Open(ctx) if err != nil { return nil, fmt.Errorf("failed to open file: %w", err) @@ -73,6 +77,9 @@ func (b *Base) RunPipeline(ctx context.Context, source reader.FileSource, sink w dec, err := decoder.CreateDecoder(file, spec) if err != nil { + if errors.Is(err, sentinel.ErrDecoderUnknown) { + return b.handleDecoderUnknown(ctx, sink, dfCtx, startTime) + } return nil, fmt.Errorf("failed to create decoder: %w", err) } defer dec.Close() @@ -80,3 +87,21 @@ func (b *Base) RunPipeline(ctx context.Context, source reader.FileSource, sink w pipeln := pipeline.NewPipeline(sink, b.Registry, b.Validators, pipeline.NewConfig(b.Config)) return pipeln.Process(ctx, dec, dfCtx) } + +func (b *Base) handleDecoderUnknown(ctx context.Context, sink writer.Sink, dfCtx pipeline.DataFileContext, startTime time.Time) (*pipeline.ParsingResult, error) { + parserErr := writer.SerializeParserError( + 1, + sentinel.DecoderUnknownMessage, + validation.ErrorTypePreCheck, + dfCtx.DatafileID, + ) + if _, err := sink.Flush(ctx, "parser_error", writer.ParserErrorColumns(), [][]any{parserErr}); err != nil { + return nil, fmt.Errorf("write decoder unknown parser error: %w", err) + } + return &pipeline.ParsingResult{ + RecordCounts: map[string]int64{"parser_error": 1}, + ErrorCount: 1, + ErrorStats: &pipeline.ErrorStats{RecordPreCheck: 1}, + Duration: time.Since(startTime), + }, nil +} diff --git a/tdrs-services/parser/internal/server/server_test.go b/tdrs-services/parser/internal/server/server_test.go new file mode 100644 index 0000000000..8143121671 --- /dev/null +++ b/tdrs-services/parser/internal/server/server_test.go @@ -0,0 +1,100 @@ +package server + +import ( + "context" + "os" + "testing" + + "go-parser/internal/config" + "go-parser/internal/config/filespec" + "go-parser/internal/pipeline" + "go-parser/internal/sentinel" + "go-parser/internal/storage/reader" + "go-parser/internal/storage/writer" +) + +type captureSink struct { + table string + columns []string + rows [][]any +} + +func (s *captureSink) Flush(_ context.Context, tableName string, columns []string, rows [][]any) (int64, error) { + s.table = tableName + s.columns = columns + s.rows = append(s.rows, rows...) + return int64(len(rows)), nil +} + +func (s *captureSink) RollbackDatafile(_ context.Context, _ int32, _ []string) error { + return nil +} + +func (s *captureSink) Close() error { + return nil +} + +var _ writer.Sink = (*captureSink)(nil) + +func TestRunPipeline_DecoderUnknownWritesPreCheckError(t *testing.T) { + tmpFile, err := os.CreateTemp("", "fra-decoder-unknown-*.xlsx") + if err != nil { + t.Fatalf("CreateTemp failed: %v", err) + } + defer os.Remove(tmpFile.Name()) + + if _, err := tmpFile.WriteString("%PDF-1.4\n"); err != nil { + t.Fatalf("WriteString failed: %v", err) + } + if err := tmpFile.Close(); err != nil { + t.Fatalf("Close failed: %v", err) + } + + cfg := config.DefaultConfig() + reg := config.NewTestRegistry(nil) + reg.FileSpecs()["FRA:1"] = &filespec.FileSpec{ + Program: "FRA", + Section: 1, + Format: filespec.FormatColumnar, + RecordTypeDetection: filespec.RecordTypeDetection{ + Schema: "TE1", + }, + } + + sink := &captureSink{} + base := NewBase(cfg, reg, nil) + result, err := base.RunPipeline( + context.Background(), + reader.NewLocalSource(tmpFile.Name()), + sink, + pipeline.DataFileContext{ + Program: "FRA", + Section: 1, + DatafileID: 42, + }, + ) + if err != nil { + t.Fatalf("RunPipeline failed: %v", err) + } + + if result.ErrorCount != 1 { + t.Fatalf("ErrorCount = %d, want 1", result.ErrorCount) + } + if sink.table != "parser_error" { + t.Fatalf("table = %q, want parser_error", sink.table) + } + if len(sink.rows) != 1 { + t.Fatalf("rows = %d, want 1", len(sink.rows)) + } + + row := sink.rows[0] + if row[6] != sentinel.DecoderUnknownMessage { + t.Fatalf("error message = %q, want %q", row[6], sentinel.DecoderUnknownMessage) + } + if row[7] != "1" { + t.Fatalf("error type = %v, want PRE_CHECK db value 1", row[7]) + } + if row[11] != int32(42) { + t.Fatalf("file id = %v, want 42", row[11]) + } +} diff --git a/tdrs-services/parser/internal/storage/writer/error.go b/tdrs-services/parser/internal/storage/writer/error.go index e25dfab4c4..7f24f38eea 100644 --- a/tdrs-services/parser/internal/storage/writer/error.go +++ b/tdrs-services/parser/internal/storage/writer/error.go @@ -282,28 +282,6 @@ func toErrorObjectID(recordUUID *pgtype.UUID) pgtype.UUID { return *recordUUID } -// SerializeHeaderError creates a database error row for a header validation error. -// Header errors have row_number=1, no case_number, no object_id, no content_type_id. -func SerializeHeaderError(message string, errorType string, datafileID int32) []any { - return []any{ - int32(1), // row_number (header is always line 1) - nil, // column_number - nil, // item_number - nil, // field_name - nil, // case_number - nil, // rpt_month_year - message, // error_message - mapErrorType(errorType), // error_type - time.Now(), // created_at - nil, // fields_json - nil, // content_type_id - datafileID, // file_id - pgtype.UUID{Valid: false}, // object_id - false, // deprecated - nil, // values_json - } -} - // SerializeParserError creates a database error row for parser-level line errors. func SerializeParserError(rowNumber int, message string, errorType string, datafileID int32) []any { return []any{ diff --git a/tdrs-services/parser/internal/storage/writer/ssp.go b/tdrs-services/parser/internal/storage/writer/ssp.go index 09dacab24a..664c2caacf 100644 --- a/tdrs-services/parser/internal/storage/writer/ssp.go +++ b/tdrs-services/parser/internal/storage/writer/ssp.go @@ -140,7 +140,6 @@ func serializeSspM3(record *parser.ParsedRecord, datafileID int32) [][]any { record.Get("RecordType"), record.Get("RPT_MONTH_YEAR"), record.Get("CASE_NUMBER"), - record.Get("FIPS_CODE"), record.Get("FAMILY_AFFILIATION"), record.Get("DATE_OF_BIRTH"), record.Get("SSN"), diff --git a/tdrs-services/parser/internal/testutil/records.go b/tdrs-services/parser/internal/testutil/records.go index 1d05ad4336..229ef386ed 100644 --- a/tdrs-services/parser/internal/testutil/records.go +++ b/tdrs-services/parser/internal/testutil/records.go @@ -42,9 +42,7 @@ func NewTestRecord(s *schema.CompiledSchema, lineNum int, values map[string]any) // NewTestGroup creates a ParsedGroup with default key fields for testing. func NewTestGroup(records ...*parser.ParsedRecord) *parser.ParsedGroup { return &parser.ParsedGroup{ - Key: "202401|12345", - RptMonthYear: "202401", - CaseNumber: "12345", - Records: records, + Key: "202401|12345", + Records: records, } } diff --git a/tdrs-services/parser/internal/validation/functions.go b/tdrs-services/parser/internal/validation/functions.go index 1a0650d001..686b3773c8 100644 --- a/tdrs-services/parser/internal/validation/functions.go +++ b/tdrs-services/parser/internal/validation/functions.go @@ -44,10 +44,6 @@ func RegisterFunctions() []expr.Option { // Group validators (take group explicitly) expr.Function("getRecordsOfType", wrap2(getRecordsOfType), new(func(*parser.ParsedGroup, string) []*parser.ParsedRecord)), - expr.Function("hasAnyRecordType", wrap2(hasAnyRecordType), - new(func(map[string]int, []any) bool)), - expr.Function("anyRecordOfTypesHasInt", wrap4(anyRecordOfTypesHasInt), - new(func(*parser.ParsedGroup, []any, string, int) bool)), expr.Function("hasAnyRecordOfTypeWithInt", wrap4(hasAnyRecordOfTypeWithInt), new(func(*parser.ParsedGroup, string, string, int) bool)), @@ -323,27 +319,6 @@ func getRecordsOfType(group *parser.ParsedGroup, recordType string) []*parser.Pa return result } -// hasAnyRecordType returns true when any requested record type is present in the group. -func hasAnyRecordType(recordCounts map[string]int, recordTypes []any) bool { - for _, recordType := range toStringSlice(recordTypes) { - if recordCounts[recordType] > 0 { - return true - } - } - return false -} - -// anyRecordOfTypesHasInt returns true when any record of the given types has -// the requested integer field value. -func anyRecordOfTypesHasInt(group *parser.ParsedGroup, recordTypes []any, fieldName string, expectedValue int) bool { - for _, recordType := range toStringSlice(recordTypes) { - if hasAnyRecordOfTypeWithInt(group, recordType, fieldName, expectedValue) { - return true - } - } - return false -} - // hasAnyRecordOfTypeWithInt returns true when any record of the given type has // the requested integer field value. func hasAnyRecordOfTypeWithInt(group *parser.ParsedGroup, recordType string, fieldName string, expectedValue int) bool { diff --git a/tdrs-services/parser/internal/validation/orchestrator.go b/tdrs-services/parser/internal/validation/orchestrator.go index 4b677c9b91..d6d569e56a 100644 --- a/tdrs-services/parser/internal/validation/orchestrator.go +++ b/tdrs-services/parser/internal/validation/orchestrator.go @@ -1,6 +1,7 @@ package validation import ( + "strings" "text/template" "go-parser/internal/parser" @@ -81,6 +82,7 @@ func (o *ValidationOrchestrator) CreateNoRecordsCreatedError() *ValidationResult Valid: false, ErrorType: ErrorTypePreCheck, ValidatorID: "no_records_created", + LineNumber: 0, Validator: &CompiledValidator{ ID: "no_records_created", Scope: ScopeGroup, @@ -97,13 +99,13 @@ func (o *ValidationOrchestrator) CreateNoRecordsCreatedError() *ValidationResult // to expressions for cross-validation against submission metadata. func (o *ValidationOrchestrator) ValidateHeader(headerRec *parser.ParsedRecord, dfCtx *DataFileContext) *RecordValidationResult { result := &RecordValidationResult{Record: headerRec} - recType := headerRec.GetRecordType() + schemaKey := validationSchemaKey(headerRec) recordEnv := NewRecordEnv(headerRec) recordEnv.DataFileContext = dfCtx // Phase 1: Run PRE_CHECK and RECORD_PRE_CHECK validators recordBlocked := false - for _, validator := range o.registry.GetRecordValidators(recType) { + for _, validator := range o.registry.GetRecordValidators(schemaKey) { if validator.ErrorType != ErrorTypeRecordPreCheck && validator.ErrorType != ErrorTypePreCheck { continue } @@ -123,31 +125,28 @@ func (o *ValidationOrchestrator) ValidateHeader(headerRec *parser.ParsedRecord, // Phase 2: Field validation fieldEnv := &FieldEnv{DataFileContext: dfCtx} - for fieldName, validators := range o.registry.GetFieldValidatorsForRecord(recType) { + for fieldName, validators := range o.registry.GetFieldValidatorsForRecord(schemaKey) { value := headerRec.Get(fieldName) required := headerRec.IsFieldRequired(fieldName) - if value == nil { - if required { - result.FieldErrors = append(result.FieldErrors, &ValidationResult{ - Valid: false, - ValidatorID: "field_required", - ErrorType: ErrorTypeFieldValue, - FieldName: fieldName, - Validator: &CompiledValidator{ - ID: "field_required", - Scope: ScopeField, - ErrorType: ErrorTypeFieldValue, - ResultMode: "single", - Message: fieldRequiredMessage, - }, - }) - } + if !required { continue } - // Preserve Python parser parity: field validators only run for required fields. - if !required { + if fieldValueIsEmpty(value) { + result.FieldErrors = append(result.FieldErrors, &ValidationResult{ + Valid: false, + ValidatorID: "field_required", + ErrorType: ErrorTypeFieldValue, + FieldName: fieldName, + Validator: &CompiledValidator{ + ID: "field_required", + Scope: ScopeField, + ErrorType: ErrorTypeFieldValue, + ResultMode: "single", + Message: fieldRequiredMessage, + }, + }) continue } @@ -164,7 +163,7 @@ func (o *ValidationOrchestrator) ValidateHeader(headerRec *parser.ParsedRecord, } // Phase 3: Non-precheck record validators (consistency checks) - for _, cv := range o.registry.GetRecordValidators(recType) { + for _, cv := range o.registry.GetRecordValidators(schemaKey) { if cv.ErrorType == ErrorTypeRecordPreCheck || cv.ErrorType == ErrorTypePreCheck { continue } @@ -182,12 +181,12 @@ func (o *ValidationOrchestrator) ValidateHeader(headerRec *parser.ParsedRecord, // validateRecord validates a single record, updating the provided result. // Called internally by ValidateGroup. func (o *ValidationOrchestrator) validateRecord(result *RecordValidationResult, rec *parser.ParsedRecord, groupBlocked bool, dfCtx *DataFileContext) { - recType := rec.GetRecordType() + schemaKey := validationSchemaKey(rec) recordEnv := NewRecordEnv(rec) recordEnv.DataFileContext = dfCtx // Phase 1: Run RECORD_PRE_CHECK and PRE_CHECK validators (always runs, can block) - for _, cv := range o.registry.GetRecordValidators(recType) { + for _, cv := range o.registry.GetRecordValidators(schemaKey) { // Skip non-precheck validators in this phase if cv.ErrorType == ErrorTypeRecordPreCheck || cv.ErrorType == ErrorTypePreCheck { recordEnv.Params = cv.Params @@ -210,34 +209,28 @@ func (o *ValidationOrchestrator) validateRecord(result *RecordValidationResult, // Phase 2: Field validation fieldEnv := &FieldEnv{DataFileContext: dfCtx} // Reuse env for efficiency - for fieldName, validators := range o.registry.GetFieldValidatorsForRecord(recType) { + for fieldName, validators := range o.registry.GetFieldValidatorsForRecord(schemaKey) { value := rec.Get(fieldName) required := rec.IsFieldRequired(fieldName) - // Handle nil values - if value == nil { - if required { - // Required field is nil - generate error - result.FieldErrors = append(result.FieldErrors, &ValidationResult{ - Valid: false, - ValidatorID: "field_required", - ErrorType: ErrorTypeFieldValue, - FieldName: fieldName, - Validator: &CompiledValidator{ - ID: "field_required", - Scope: ScopeField, - ErrorType: ErrorTypeFieldValue, - ResultMode: "single", - Message: fieldRequiredMessage, - }, - }) - } - // Skip validators for nil fields (both required and optional) + if !required { continue } - // Preserve Python parser parity: field validators only run for required fields. - if !required { + if fieldValueIsEmpty(value) { + result.FieldErrors = append(result.FieldErrors, &ValidationResult{ + Valid: false, + ValidatorID: "field_required", + ErrorType: ErrorTypeFieldValue, + FieldName: fieldName, + Validator: &CompiledValidator{ + ID: "field_required", + Scope: ScopeField, + ErrorType: ErrorTypeFieldValue, + ResultMode: "single", + Message: fieldRequiredMessage, + }, + }) continue } @@ -254,7 +247,7 @@ func (o *ValidationOrchestrator) validateRecord(result *RecordValidationResult, } // Phase 3: Non-precheck record validators (consistency checks) - for _, cv := range o.registry.GetRecordValidators(recType) { + for _, cv := range o.registry.GetRecordValidators(schemaKey) { if cv.ErrorType == ErrorTypeRecordPreCheck || cv.ErrorType == ErrorTypePreCheck { continue // Already ran in phase 1 } @@ -266,3 +259,20 @@ func (o *ValidationOrchestrator) validateRecord(result *RecordValidationResult, } } } + +func validationSchemaKey(rec *parser.ParsedRecord) string { + if rec.Schema != nil && rec.Schema.Path != "" { + return rec.Schema.Path + } + return rec.GetRecordType() +} + +func fieldValueIsEmpty(value any) bool { + if value == nil { + return true + } + if s, ok := value.(string); ok { + return strings.TrimSpace(s) == "" + } + return false +} diff --git a/tdrs-services/parser/internal/validation/orchestrator_test.go b/tdrs-services/parser/internal/validation/orchestrator_test.go index 87299c0ad1..7019cf6dbd 100644 --- a/tdrs-services/parser/internal/validation/orchestrator_test.go +++ b/tdrs-services/parser/internal/validation/orchestrator_test.go @@ -155,6 +155,28 @@ func TestOrchestratorOptionalFieldWithValueSkipsValidators(t *testing.T) { } } +func TestOrchestratorBlankOptionalFieldSkipsValidators(t *testing.T) { + registry := newValidatorRegistry() + registry.exprOpts = RegisterFunctions() + + fieldExpr, _ := registry.getOrCompileExpr(ScopeField, "isNotEmpty(Value)", "single") + registry.field["T1"] = map[string][]*CompiledValidator{ + "AMOUNT": {{ID: "not_empty", Scope: ScopeField, ErrorType: ErrorTypeFieldValue, Expr: fieldExpr}}, + } + + orchestrator := NewValidationOrchestrator(registry, true) + + optionalSchema := testutil.NewTestSchema("T1", "AMOUNT") + rec := testutil.NewTestRecord(optionalSchema, 1, map[string]any{"AMOUNT": " "}) + group := testutil.NewTestGroup(rec) + + result := orchestrator.ValidateGroup(group, "TEST:1", defaultTestDataFileContext) + + if len(result.RecordResults[0].FieldErrors) != 0 { + t.Errorf("expected 0 field errors for blank optional field, got %d", len(result.RecordResults[0].FieldErrors)) + } +} + // TestOrchestratorShortCircuitSkipsFieldValidation tests that with shortCircuit=true, // field validators are skipped when a precheck validator fails. func TestOrchestratorShortCircuitSkipsFieldValidation(t *testing.T) { @@ -536,6 +558,195 @@ func TestOrchestratorPerRecordGroupValidation(t *testing.T) { }) } +func TestOrchestratorRequiresRelatedRecordReportsEachMissingRecord(t *testing.T) { + registry := newValidatorRegistry() + registry.exprOpts = RegisterFunctions() + + relatedExpr, err := registry.getOrCompileExpr( + ScopeGroup, + "filter(Group.Records, { .GetRecordType() == Params.record_type and not any(Params.related_record_types, { RecordCounts[#] > 0 }) })", + "per_record", + ) + if err != nil { + t.Fatalf("compiling related record expression: %v", err) + } + registry.group["TEST:1"] = []*CompiledValidator{ + { + ID: "requires_related_record", + Scope: ScopeGroup, + ErrorType: ErrorTypeCaseConsistency, + ResultMode: "per_record", + Expr: relatedExpr, + Params: map[string]any{ + "record_type": "T1", + "related_record_types": []any{"T2", "T3"}, + }, + }, + } + + orchestrator := NewValidationOrchestrator(registry, true) + group := testutil.NewTestGroup( + testutil.NewTestRecord(t1Schema, 2, map[string]any{"CASE_NUMBER": "1", "AMOUNT": 10}), + testutil.NewTestRecord(t1Schema, 3, map[string]any{"CASE_NUMBER": "1", "AMOUNT": 10}), + ) + + result := orchestrator.ValidateGroup(group, "TEST:1", defaultTestDataFileContext) + + if len(result.GroupErrors) != 0 { + t.Fatalf("expected no group-level errors, got %d", len(result.GroupErrors)) + } + for i, recResult := range result.RecordResults { + if len(recResult.RecordErrors) != 1 { + t.Fatalf("expected 1 record error for record %d, got %d", i, len(recResult.RecordErrors)) + } + err := recResult.RecordErrors[0] + if err.ValidatorID != "requires_related_record" { + t.Errorf("expected requires_related_record, got %s", err.ValidatorID) + } + if err.LineNumber != group.Records[i].GetLineNumber() { + t.Errorf("expected LineNumber=%d, got %d", group.Records[i].GetLineNumber(), err.LineNumber) + } + } +} + +func TestOrchestratorRequiresRelatedRecordPassesWhenAnyRelatedTypeExists(t *testing.T) { + registry := newValidatorRegistry() + registry.exprOpts = RegisterFunctions() + + relatedExpr, err := registry.getOrCompileExpr( + ScopeGroup, + "filter(Group.Records, { .GetRecordType() == Params.record_type and not any(Params.related_record_types, { RecordCounts[#] > 0 }) })", + "per_record", + ) + if err != nil { + t.Fatalf("compiling related record expression: %v", err) + } + registry.group["TEST:1"] = []*CompiledValidator{ + { + ID: "requires_related_record", + Scope: ScopeGroup, + ErrorType: ErrorTypeCaseConsistency, + ResultMode: "per_record", + Expr: relatedExpr, + Params: map[string]any{ + "record_type": "T1", + "related_record_types": []any{"T2", "T3"}, + }, + }, + } + + orchestrator := NewValidationOrchestrator(registry, true) + group := testutil.NewTestGroup( + testutil.NewTestRecord(t1Schema, 2, map[string]any{"CASE_NUMBER": "1", "AMOUNT": 10}), + testutil.NewTestRecord(t3Schema, 3, map[string]any{"FAMILY_AFFILIATION": 1}), + ) + + result := orchestrator.ValidateGroup(group, "TEST:1", defaultTestDataFileContext) + + if result.HasErrors() { + t.Fatalf("expected no errors when any related record type exists") + } +} + +func TestOrchestratorRequiresRelatedRecordWithIntValueReportsEachMissingRecord(t *testing.T) { + registry := newValidatorRegistry() + registry.exprOpts = RegisterFunctions() + + relatedExpr, err := registry.getOrCompileExpr( + ScopeGroup, + "filter(Group.Records, { .GetRecordType() == Params.record_type and not any(Group.Records, { .GetRecordType() in Params.related_record_types and .GetInt(Params.field_name) == Params.expected_value }) })", + "per_record", + ) + if err != nil { + t.Fatalf("compiling related record with int expression: %v", err) + } + registry.group["TEST:1"] = []*CompiledValidator{ + { + ID: "requires_related_record_with_int_value", + Scope: ScopeGroup, + ErrorType: ErrorTypeCaseConsistency, + ResultMode: "per_record", + Expr: relatedExpr, + Params: map[string]any{ + "record_type": "T1", + "related_record_types": []any{"T2", "T3"}, + "field_name": "FAMILY_AFFILIATION", + "expected_value": 1, + }, + }, + } + + orchestrator := NewValidationOrchestrator(registry, true) + group := testutil.NewTestGroup( + testutil.NewTestRecord(t1Schema, 2, map[string]any{"CASE_NUMBER": "1", "AMOUNT": 10}), + testutil.NewTestRecord(t1Schema, 3, map[string]any{"CASE_NUMBER": "1", "AMOUNT": 10}), + testutil.NewTestRecord(t2Schema, 4, map[string]any{"SSN": "111111111", "FAMILY_AFFILIATION": 2}), + ) + + result := orchestrator.ValidateGroup(group, "TEST:1", defaultTestDataFileContext) + + if len(result.GroupErrors) != 0 { + t.Fatalf("expected no group-level errors, got %d", len(result.GroupErrors)) + } + for i := 0; i < 2; i++ { + recResult := result.RecordResults[i] + if len(recResult.RecordErrors) != 1 { + t.Fatalf("expected 1 record error for record %d, got %d", i, len(recResult.RecordErrors)) + } + err := recResult.RecordErrors[0] + if err.ValidatorID != "requires_related_record_with_int_value" { + t.Errorf("expected requires_related_record_with_int_value, got %s", err.ValidatorID) + } + if err.LineNumber != group.Records[i].GetLineNumber() { + t.Errorf("expected LineNumber=%d, got %d", group.Records[i].GetLineNumber(), err.LineNumber) + } + } + if len(result.RecordResults[2].RecordErrors) != 0 { + t.Fatalf("expected no errors on related record, got %d", len(result.RecordResults[2].RecordErrors)) + } +} + +func TestOrchestratorRequiresRelatedRecordWithIntValuePassesWhenAnyRelatedTypeHasExpectedValue(t *testing.T) { + registry := newValidatorRegistry() + registry.exprOpts = RegisterFunctions() + + relatedExpr, err := registry.getOrCompileExpr( + ScopeGroup, + "filter(Group.Records, { .GetRecordType() == Params.record_type and not any(Group.Records, { .GetRecordType() in Params.related_record_types and .GetInt(Params.field_name) == Params.expected_value }) })", + "per_record", + ) + if err != nil { + t.Fatalf("compiling related record with int expression: %v", err) + } + registry.group["TEST:1"] = []*CompiledValidator{ + { + ID: "requires_related_record_with_int_value", + Scope: ScopeGroup, + ErrorType: ErrorTypeCaseConsistency, + ResultMode: "per_record", + Expr: relatedExpr, + Params: map[string]any{ + "record_type": "T1", + "related_record_types": []any{"T2", "T3"}, + "field_name": "FAMILY_AFFILIATION", + "expected_value": 1, + }, + }, + } + + orchestrator := NewValidationOrchestrator(registry, true) + group := testutil.NewTestGroup( + testutil.NewTestRecord(t1Schema, 2, map[string]any{"CASE_NUMBER": "1", "AMOUNT": 10}), + testutil.NewTestRecord(t3Schema, 3, map[string]any{"FAMILY_AFFILIATION": 1}), + ) + + result := orchestrator.ValidateGroup(group, "TEST:1", defaultTestDataFileContext) + + if result.HasErrors() { + t.Fatalf("expected no errors when any related record type has the expected value") + } +} + // TestOrchestratorMultipleFieldValidators tests that multiple validators on the same field all run. func TestOrchestratorMultipleFieldValidators(t *testing.T) { registry := newValidatorRegistry() diff --git a/tdrs-services/parser/internal/validation/registry.go b/tdrs-services/parser/internal/validation/registry.go index 26c9da6de1..4e4cd80ddc 100644 --- a/tdrs-services/parser/internal/validation/registry.go +++ b/tdrs-services/parser/internal/validation/registry.go @@ -1,6 +1,7 @@ package validation import ( + "encoding/json" "fmt" "os" "text/template" @@ -32,8 +33,8 @@ type ValidatorRegistry struct { predefined map[string]map[string]*validation.ValidatorDef // Compiled validators by scope - field map[string]map[string][]*CompiledValidator // recordType -> fieldName -> validators - record map[string][]*CompiledValidator // recordType -> validators + field map[string]map[string][]*CompiledValidator // schema path -> fieldName -> validators + record map[string][]*CompiledValidator // schema path -> validators group map[string][]*CompiledValidator // filespec key -> validators exprOpts []expr.Option @@ -141,11 +142,10 @@ func applyDefaultErrorType(vdef *validation.ValidatorDef, scope string) { // loadSchemaValidators compiles field and record validators from a schema. func (r *ValidatorRegistry) loadSchemaValidators(path string, cs *schema.CompiledSchema) error { - recordType := cs.RecordType + schemaKey := path - // Initialize field map for this record type - if r.field[recordType] == nil { - r.field[recordType] = make(map[string][]*CompiledValidator) + if r.field[schemaKey] == nil { + r.field[schemaKey] = make(map[string][]*CompiledValidator) } // Load record-scope validators @@ -154,7 +154,7 @@ func (r *ValidatorRegistry) loadSchemaValidators(path string, cs *schema.Compile if err != nil { return fmt.Errorf("schema %s record validator %s: %w", path, vdef.ID, err) } - r.record[recordType] = append(r.record[recordType], cv) + r.record[schemaKey] = append(r.record[schemaKey], cv) } // Load field-scope validators from shared fields @@ -164,7 +164,10 @@ func (r *ValidatorRegistry) loadSchemaValidators(path string, cs *schema.Compile if err != nil { return fmt.Errorf("schema %s field %s validator %s: %w", path, field.Name, vdef.ID, err) } - r.field[recordType][field.Name] = append(r.field[recordType][field.Name], cv) + r.field[schemaKey][field.Name] = appendUniqueValidator( + r.field[schemaKey][field.Name], + cv, + ) } } @@ -176,7 +179,10 @@ func (r *ValidatorRegistry) loadSchemaValidators(path string, cs *schema.Compile if err != nil { return fmt.Errorf("schema %s field %s validator %s: %w", path, field.Name, vdef.ID, err) } - r.field[recordType][field.Name] = append(r.field[recordType][field.Name], cv) + r.field[schemaKey][field.Name] = appendUniqueValidator( + r.field[schemaKey][field.Name], + cv, + ) } } } @@ -386,21 +392,21 @@ func mergeParams(predefined, useSite map[string]any) map[string]any { } // GetFieldValidators returns field-scope validators for a specific field. -func (r *ValidatorRegistry) GetFieldValidators(recordType, fieldName string) []*CompiledValidator { - if fields, ok := r.field[recordType]; ok { +func (r *ValidatorRegistry) GetFieldValidators(schemaKey, fieldName string) []*CompiledValidator { + if fields, ok := r.field[schemaKey]; ok { return fields[fieldName] } return nil } -// GetFieldValidatorsForRecord returns all fields with field-scope validators for a record type. -func (r *ValidatorRegistry) GetFieldValidatorsForRecord(recordType string) map[string][]*CompiledValidator { - return r.field[recordType] +// GetFieldValidatorsForRecord returns all fields with field-scope validators for a schema. +func (r *ValidatorRegistry) GetFieldValidatorsForRecord(schemaKey string) map[string][]*CompiledValidator { + return r.field[schemaKey] } -// GetRecordValidators returns record-scope validators for a record type. -func (r *ValidatorRegistry) GetRecordValidators(recordType string) []*CompiledValidator { - return r.record[recordType] +// GetRecordValidators returns record-scope validators for a schema. +func (r *ValidatorRegistry) GetRecordValidators(schemaKey string) []*CompiledValidator { + return r.record[schemaKey] } // GetGroupValidators returns group-scope validators for a filespec. @@ -466,3 +472,65 @@ func (r *ValidatorRegistry) ClearCompileTimeData() { r.expressions = nil r.predefined = nil } + +func appendUniqueValidator(validators []*CompiledValidator, candidate *CompiledValidator) []*CompiledValidator { + // Ensure that segment fields don't duplicate the same validator. This also queues us up for unique validators + // per segment if we ever need to do so. + candidateKey := validatorSemanticKey(candidate) + for _, existing := range validators { + if validatorSemanticKey(existing) == candidateKey { + return validators + } + } + return append(validators, candidate) +} + +func validatorSemanticKey(cv *CompiledValidator) string { + if cv == nil { + return "" + } + fields := append([]string(nil), cv.Fields...) + + key := struct { + ID string `json:"id"` + Scope string `json:"scope"` + ErrorType string `json:"error_type"` + ResultMode string `json:"result_mode"` + Expr string `json:"expr"` + Message string `json:"message"` + Fields []string `json:"fields"` + Params map[string]any `json:"params"` + Description string `json:"description"` + }{ + ID: cv.ID, + Scope: cv.Scope, + ErrorType: cv.ErrorType, + ResultMode: cv.ResultMode, + Expr: compiledExprString(cv.Expr), + Message: templateString(cv.Message), + Fields: fields, + Params: cv.Params, + Description: cv.Description, + } + + encoded, err := json.Marshal(key) + if err != nil { + return fmt.Sprintf("%s|%s|%s|%s|%s|%s|%v|%v|%s", + key.ID, key.Scope, key.ErrorType, key.ResultMode, key.Expr, key.Message, key.Fields, key.Params, key.Description) + } + return string(encoded) +} + +func compiledExprString(expr *CompiledExpr) string { + if expr == nil { + return "" + } + return expr.Expr +} + +func templateString(tmpl *template.Template) string { + if tmpl == nil || tmpl.Tree == nil || tmpl.Tree.Root == nil { + return "" + } + return tmpl.Tree.Root.String() +} diff --git a/tdrs-services/parser/internal/validation/result.go b/tdrs-services/parser/internal/validation/result.go index 28ccd0cf9e..246c806414 100644 --- a/tdrs-services/parser/internal/validation/result.go +++ b/tdrs-services/parser/internal/validation/result.go @@ -155,6 +155,11 @@ func (gvr *GroupValidationResult) HasBlockingGroupErrors() bool { return true } } + for _, err := range rr.FieldErrors { + if err.BlocksGroup() { + return true + } + } } return false } diff --git a/tdrs-services/parser/internal/validation/validation_test.go b/tdrs-services/parser/internal/validation/validation_test.go index 91de14a728..838a7d7e6c 100644 --- a/tdrs-services/parser/internal/validation/validation_test.go +++ b/tdrs-services/parser/internal/validation/validation_test.go @@ -265,35 +265,6 @@ func TestGetRecordsOfType(t *testing.T) { } } -func TestHasAnyRecordType(t *testing.T) { - recordCounts := map[string]int{ - "T1": 1, - "T2": 2, - } - - if !hasAnyRecordType(recordCounts, []any{"T3", "T2"}) { - t.Error("expected true when one requested record type is present") - } - if hasAnyRecordType(recordCounts, []any{"T3", "T4"}) { - t.Error("expected false when none of the requested record types are present") - } -} - -func TestAnyRecordOfTypesHasInt(t *testing.T) { - group := testutil.NewTestGroup( - testutil.NewTestRecord(t1Schema, 1, nil), - testutil.NewTestRecord(t2Schema, 2, map[string]any{"FAMILY_AFFILIATION": 2}), - testutil.NewTestRecord(t3Schema, 3, map[string]any{"FAMILY_AFFILIATION": 1}), - ) - - if !anyRecordOfTypesHasInt(group, []any{"T2", "T3"}, "FAMILY_AFFILIATION", 1) { - t.Error("expected true when a related record has the requested value") - } - if anyRecordOfTypesHasInt(group, []any{"T2", "T3"}, "FAMILY_AFFILIATION", 9) { - t.Error("expected false when no related record has the requested value") - } -} - func TestHasAnyRecordOfTypeWithInt(t *testing.T) { group := testutil.NewTestGroup( testutil.NewTestRecord(t2Schema, 1, map[string]any{"FAMILY_AFFILIATION": 1}), @@ -616,26 +587,26 @@ func TestRealConfig_GroupValidatorBindingsAcrossPrograms(t *testing.T) { }, { filespecKey: "TAN:2", - validatorID: "requires_corresponding_record", + validatorID: "requires_related_record", params: map[string]any{ "record_type": "T4", - "required_record_type": "T5", + "related_record_types": []any{"T5"}, }, }, { filespecKey: "SSP:2", - validatorID: "requires_corresponding_record", + validatorID: "requires_related_record", params: map[string]any{ "record_type": "M4", - "required_record_type": "M5", + "related_record_types": []any{"M5"}, }, }, { filespecKey: "TRIBAL:2", - validatorID: "requires_corresponding_record", + validatorID: "requires_related_record", params: map[string]any{ "record_type": "T4", - "required_record_type": "T5", + "related_record_types": []any{"T5"}, }, }, } @@ -654,6 +625,112 @@ func TestRealConfig_GroupValidatorBindingsAcrossPrograms(t *testing.T) { } } +func TestRealConfig_ProgramSpecificSchemaValidatorBindings(t *testing.T) { + cfg := configpkg.TestConfig() + cfg.Global.ConfigDir = realConfigDir(t) + + reg, err := configpkg.NewRegistry(cfg) + if err != nil { + t.Fatalf("loading config registry: %v", err) + } + + validators, err := NewRegistry(cfg, reg) + if err != nil { + t.Fatalf("loading validator registry: %v", err) + } + + requireRecordValidator := func(schemaKey string, id string) { + t.Helper() + for _, validator := range validators.GetRecordValidators(schemaKey) { + if validator.ID == id { + return + } + } + t.Fatalf("expected %s to have record validator %s", schemaKey, id) + } + + requireNoRecordValidator := func(schemaKey string, id string) { + t.Helper() + for _, validator := range validators.GetRecordValidators(schemaKey) { + if validator.ID == id { + t.Fatalf("expected %s not to have record validator %s", schemaKey, id) + } + } + } + + requireFieldValidator := func(schemaKey string, fieldName string, id string) *CompiledValidator { + t.Helper() + for _, validator := range validators.GetFieldValidators(schemaKey, fieldName) { + if validator.ID == id { + return validator + } + } + t.Fatalf("expected %s.%s to have field validator %s", schemaKey, fieldName, id) + return nil + } + + requireRecordValidator("tanf/t2", "t2_family_affil_1_2_work_part_status") + requireNoRecordValidator("tribal_tanf/t2", "t2_family_affil_1_2_work_part_status") + requireRecordValidator("tribal_tanf/t2", "tribal_t2_family_affil_1_2_work_part_status") + + requireFieldValidator("tanf/t4", "CLOSURE_REASON", "closure_reason") + requireFieldValidator("tribal_tanf/t4", "CLOSURE_REASON", "tribal_closure_reason") + + requireRecordValidator("fra/te1", "exit_date_matches_fiscal_period") + if got := requireFieldValidator("fra/te1", "SSN", "fra_ssn").ErrorType; got != ErrorTypeCaseConsistency { + t.Fatalf("expected fra_ssn error type %s, got %s", ErrorTypeCaseConsistency, got) + } +} + +func TestRealConfig_FRAValidationErrorsAreCaseConsistency(t *testing.T) { + cfg := configpkg.TestConfig() + cfg.Global.ConfigDir = realConfigDir(t) + + reg, err := configpkg.NewRegistry(cfg) + if err != nil { + t.Fatalf("loading config registry: %v", err) + } + + validators, err := NewRegistry(cfg, reg) + if err != nil { + t.Fatalf("loading validator registry: %v", err) + } + + fraSchema := reg.GetSchema("fra/te1") + rec := testutil.NewTestRecord(fraSchema, 1, map[string]any{ + "RecordType": "TE1", + "EXIT_DATE": "202310", + "SSN": "000000000", + }) + for i := range fraSchema.Segments[0].Fields { + field := &fraSchema.Segments[0].Fields[i] + idx := fraSchema.FieldIndex[field.Name] + rec.Fields[idx].Def = field + } + + orchestrator := NewValidationOrchestrator(validators, true) + result := orchestrator.ValidateGroup( + testutil.NewTestGroup(rec), + "FRA:1", + &DataFileContext{FiscalYear: 2024, FiscalQuarter: "Q1", Program: "FRA"}, + ) + + if len(result.RecordResults) != 1 { + t.Fatalf("expected one record result, got %d", len(result.RecordResults)) + } + + fieldErrors := result.RecordResults[0].FieldErrors + if len(fieldErrors) != 1 { + t.Fatalf("expected one FRA field error, got %d", len(fieldErrors)) + } + if got := fieldErrors[0].ErrorType; got != ErrorTypeCaseConsistency { + t.Fatalf("expected FRA field error type %s, got %s", ErrorTypeCaseConsistency, got) + } + if result.ShouldSerialize() { + t.Fatal("expected FRA CASE_CONSISTENCY field error to block serialization") + } +} + func validatorParamsEqual(actual map[string]any, expected map[string]any) bool { if len(actual) != len(expected) { return false @@ -692,11 +769,10 @@ func validatorParamsEqual(actual map[string]any, expected map[string]any) bool { func TestGroupValidatorParameterizedExpression(t *testing.T) { opts := RegisterFunctions() - exprStr := `RecordCounts[Params.record_type] == 0 or anyRecordOfTypesHasInt(Group, Params.related_record_types, Params.field_name, Params.expected_value)` + exprStr := `filter(Group.Records, { .GetRecordType() == Params.record_type and not any(Group.Records, { .GetRecordType() in Params.related_record_types and .GetInt(Params.field_name) == Params.expected_value }) })` compileOpts := append([]expr.Option{ expr.Env(&GroupEnv{}), - expr.AsBool(), expr.AllowUndefinedVariables(), }, opts...) @@ -721,8 +797,8 @@ func TestGroupValidatorParameterizedExpression(t *testing.T) { if err != nil { t.Fatalf("failed to run: %v", err) } - if result != true { - t.Errorf("expected true when no T1, got %v", result) + if records := toRecordSlice(result); len(records) != 0 { + t.Errorf("expected no records when no T1, got %d", len(records)) } }) @@ -743,8 +819,8 @@ func TestGroupValidatorParameterizedExpression(t *testing.T) { if err != nil { t.Fatalf("failed to run: %v", err) } - if result != true { - t.Errorf("expected true when T2 has FA=1, got %v", result) + if records := toRecordSlice(result); len(records) != 0 { + t.Errorf("expected no records when T2 has FA=1, got %d", len(records)) } }) @@ -765,8 +841,8 @@ func TestGroupValidatorParameterizedExpression(t *testing.T) { if err != nil { t.Fatalf("failed to run: %v", err) } - if result != true { - t.Errorf("expected true when T3 has FA=1, got %v", result) + if records := toRecordSlice(result); len(records) != 0 { + t.Errorf("expected no records when T3 has FA=1, got %d", len(records)) } }) @@ -787,8 +863,8 @@ func TestGroupValidatorParameterizedExpression(t *testing.T) { if err != nil { t.Fatalf("failed to run: %v", err) } - if result != false { - t.Errorf("expected false when no T2/T3 has FA=1, got %v", result) + if records := toRecordSlice(result); len(records) != 1 { + t.Errorf("expected 1 record when no T2/T3 has FA=1, got %d", len(records)) } }) @@ -808,8 +884,8 @@ func TestGroupValidatorParameterizedExpression(t *testing.T) { if err != nil { t.Fatalf("failed to run: %v", err) } - if result != false { - t.Errorf("expected false when T1 has no T2/T3, got %v", result) + if records := toRecordSlice(result); len(records) != 1 { + t.Errorf("expected 1 record when T1 has no T2/T3, got %d", len(records)) } }) @@ -832,8 +908,8 @@ func TestGroupValidatorParameterizedExpression(t *testing.T) { if err != nil { t.Fatalf("failed to run: %v", err) } - if result != true { - t.Errorf("expected true when at least one T2 has FA=1, got %v", result) + if records := toRecordSlice(result); len(records) != 0 { + t.Errorf("expected no records when at least one T2 has FA=1, got %d", len(records)) } }) } @@ -1702,6 +1778,95 @@ func TestRegistryGetters(t *testing.T) { }) } +func TestRegistryFieldValidatorDedupe(t *testing.T) { + registry := newValidatorRegistry() + registry.exprOpts = RegisterFunctions() + + cs := (&schema.SchemaDef{ + RecordType: "T9", + Program: "TAN", + Shared: []schema.FieldDef{ + { + Name: "SHARED_CODE", + Type: "integer", + Field: []configValidation.ValidatorDef{ + { + ID: "shared_only", + Expr: "Value > 0", + Message: "shared failed", + }, + }, + }, + }, + Segments: []schema.SegmentDef{ + { + Fields: []schema.FieldDef{ + { + Name: "CODE", + Type: "integer", + Field: []configValidation.ValidatorDef{ + { + ID: "range", + Expr: "Value >= Params.min and Value <= Params.max", + Params: map[string]any{"min": 0, "max": 10}, + Message: "range failed", + }, + { + ID: "range", + Expr: "Value >= Params.min and Value <= Params.max", + Params: map[string]any{"min": 0, "max": 10}, + Message: "range failed", + }, + { + ID: "range", + Expr: "Value >= Params.min and Value <= Params.max", + Params: map[string]any{"min": 11, "max": 20}, + Message: "range failed", + }, + }, + }, + }, + }, + { + Fields: []schema.FieldDef{ + { + Name: "CODE", + Type: "integer", + Field: []configValidation.ValidatorDef{ + { + ID: "range", + Expr: "Value >= Params.min and Value <= Params.max", + Params: map[string]any{"min": 0, "max": 10}, + Message: "range failed", + }, + }, + }, + }, + }, + }, + }).Compile() + + if err := registry.loadSchemaValidators("test/t9", cs); err != nil { + t.Fatalf("loadSchemaValidators failed: %v", err) + } + + codeValidators := registry.GetFieldValidators("test/t9", "CODE") + if len(codeValidators) != 2 { + t.Fatalf("CODE validators = %d, want %d", len(codeValidators), 2) + } + if codeValidators[0].ID != "range" || codeValidators[1].ID != "range" { + t.Fatalf("CODE validator IDs = %s, %s; want range, range", codeValidators[0].ID, codeValidators[1].ID) + } + if codeValidators[0].Params["min"] == codeValidators[1].Params["min"] { + t.Fatal("same validator ID with different params should remain distinct") + } + + sharedValidators := registry.GetFieldValidators("test/t9", "SHARED_CODE") + if len(sharedValidators) != 1 || sharedValidators[0].ID != "shared_only" { + t.Fatalf("SHARED_CODE validators = %v, want one shared_only validator", sharedValidators) + } +} + func TestRegistryStats(t *testing.T) { registry := newValidatorRegistry() registry.exprOpts = RegisterFunctions()