|
35 | 35 | # Countries with a single NUTS3 region: country_code -> nuts3 code |
36 | 36 | _single_nuts3: dict[str, str] = {} |
37 | 37 |
|
| 38 | +# Country-level majority-vote fallback for countries where NUTS1/NUTS2 |
| 39 | +# are unanimous but NUTS3 has a dominant winner (e.g. MT → MT0/MT00/MT001) |
| 40 | +_country_fallback: dict[str, dict] = {} |
| 41 | + |
38 | 42 | # NUTS region names: nuts_id -> name_latn |
39 | 43 | _nuts_names: dict[str, str] = {} |
40 | 44 |
|
@@ -619,6 +623,41 @@ def _build_prefix_index() -> None: |
619 | 623 | if _single_nuts3: |
620 | 624 | logger.info("Single-NUTS3 countries: %s", ", ".join(sorted(_single_nuts3))) |
621 | 625 |
|
| 626 | + # Country-level majority-vote fallback for countries NOT in _single_nuts3 |
| 627 | + # where NUTS1 and NUTS2 are unanimous but NUTS3 has a dominant winner |
| 628 | + _country_fallback.clear() |
| 629 | + caps = settings.approximate_confidence_caps |
| 630 | + for cc, nuts3_set in country_nuts3.items(): |
| 631 | + if cc in _single_nuts3: |
| 632 | + continue |
| 633 | + nuts1_set = {n[:3] for n in nuts3_set} |
| 634 | + nuts2_set = {n[:4] for n in nuts3_set} |
| 635 | + if len(nuts1_set) != 1 or len(nuts2_set) != 1: |
| 636 | + continue |
| 637 | + # Count postal codes per NUTS3 to find dominant region |
| 638 | + nuts3_counts: Counter[str] = Counter() |
| 639 | + for (c, _), n3 in _lookup.items(): |
| 640 | + if c == cc: |
| 641 | + nuts3_counts[n3] += 1 |
| 642 | + total = sum(nuts3_counts.values()) |
| 643 | + if total == 0: |
| 644 | + continue |
| 645 | + winner, winner_count = nuts3_counts.most_common(1)[0] |
| 646 | + ratio = winner_count / total |
| 647 | + _country_fallback[cc] = { |
| 648 | + "nuts1": next(iter(nuts1_set)), |
| 649 | + "nuts1_confidence": 1.0, |
| 650 | + "nuts2": next(iter(nuts2_set)), |
| 651 | + "nuts2_confidence": 1.0, |
| 652 | + "nuts3": winner, |
| 653 | + "nuts3_confidence": round(min(ratio, caps["nuts3"]), 2), |
| 654 | + } |
| 655 | + if _country_fallback: |
| 656 | + logger.info( |
| 657 | + "Country-level fallback: %s", |
| 658 | + ", ".join(f"{cc}→{v['nuts3']}" for cc, v in sorted(_country_fallback.items())), |
| 659 | + ) |
| 660 | + |
622 | 661 |
|
623 | 662 | def _estimate_by_prefix(cc: str, postal_code: str) -> dict | None: |
624 | 663 | """Runtime estimation via longest prefix match + majority vote. |
@@ -910,11 +949,12 @@ def load_data() -> None: |
910 | 949 | def lookup(country_code: str, postal_code: str) -> dict | None: |
911 | 950 | """Look up NUTS codes for a given country + postal code. |
912 | 951 |
|
913 | | - Four-tier fall-through: |
| 952 | + Five-tier fall-through: |
914 | 953 | 1. Exact TERCET match → confidence 1.0 |
915 | 954 | 2. Pre-computed estimate → stored confidence per level |
916 | 955 | 3. Runtime prefix-based estimation → calculated confidence |
917 | | - 4. Single-NUTS3 country fallback → confidence 1.0 (e.g. LI, CY, LU) |
| 956 | + 4. Country-level majority vote → unanimous NUTS1/2, dominant NUTS3 (e.g. MT) |
| 957 | + 5. Single-NUTS3 country fallback → confidence 1.0 (e.g. LI, CY, LU) |
918 | 958 |
|
919 | 959 | Returns a dict with nuts1/2/3, match_type, and per-level confidence, or None. |
920 | 960 | """ |
@@ -962,7 +1002,21 @@ def lookup(country_code: str, postal_code: str) -> dict | None: |
962 | 1002 | approx.update(_resolve_names(approx["nuts1"], approx["nuts2"], approx["nuts3"])) |
963 | 1003 | return approx |
964 | 1004 |
|
965 | | - # Tier 4: Single-NUTS3 country fallback (e.g. LI → LI000) |
| 1005 | + # Tier 4: Country-level majority vote (unanimous NUTS1/2, dominant NUTS3) |
| 1006 | + fallback = _country_fallback.get(cc) |
| 1007 | + if fallback is not None: |
| 1008 | + return { |
| 1009 | + "match_type": "approximate", |
| 1010 | + "nuts1": fallback["nuts1"], |
| 1011 | + "nuts1_confidence": fallback["nuts1_confidence"], |
| 1012 | + "nuts2": fallback["nuts2"], |
| 1013 | + "nuts2_confidence": fallback["nuts2_confidence"], |
| 1014 | + "nuts3": fallback["nuts3"], |
| 1015 | + "nuts3_confidence": fallback["nuts3_confidence"], |
| 1016 | + **_resolve_names(fallback["nuts1"], fallback["nuts2"], fallback["nuts3"]), |
| 1017 | + } |
| 1018 | + |
| 1019 | + # Tier 5: Single-NUTS3 country fallback (e.g. LI → LI000) |
966 | 1020 | nuts3 = _single_nuts3.get(cc) |
967 | 1021 | if nuts3 is not None: |
968 | 1022 | return { |
|
0 commit comments