Skip to content

Commit 54d0a1d

Browse files
committed
Relax postal code regex patterns to accept flexible prefix separators
Replace rigid '-?' prefix separators with flexible pattern accepting spaces, dashes, en-dashes, em-dashes, and dots across all 34 countries. Also add missing prefix alternatives: HU (was H only), NO (was N only), FI (add FIN), ES (add ES), PT (add P/PT), FR (add FR), LU (add LU), LI (add LI), RS (add RS), NL (add space support). Tested against 161K Erasmus programme postal codes: - No-match rate: 1.3% → 0.8% (644 fewer failures) - Hit rate: 98.7% → 99.2% - Zero regressions — all previously matching codes still match Closes #3
1 parent c7244c4 commit 54d0a1d

1 file changed

Lines changed: 63 additions & 63 deletions

File tree

app/postal_patterns.json

Lines changed: 63 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,141 +1,141 @@
11
{
22
"AT": {
3-
"regex": "^(?:A-?|AT-?)?([0-9]{4})$",
4-
"example": "1010, A-1010, AT-1010"
3+
"regex": "^(?:A[\\s\\-\u2013\u2014.]*|AT[\\s\\-\u2013\u2014.]*)?([0-9]{4})$",
4+
"example": "1010, A-1010, AT-1010, A 1010, AT 1010"
55
},
66
"BE": {
7-
"regex": "^(?:B-?|BE-?)?([0-9]{4})$",
8-
"example": "1000, B-1000, BE-1000"
7+
"regex": "^(?:B[\\s\\-\u2013\u2014.]*|BE[\\s\\-\u2013\u2014.]*)?([0-9]{4})$",
8+
"example": "1000, B-1000, BE-1000, B 1000"
99
},
1010
"BG": {
11-
"regex": "^(?:BG-?)?([0-9]{4})$",
12-
"example": "1000, BG-1000"
11+
"regex": "^(?:BG[\\s\\-\u2013\u2014.]*)?([0-9]{4})$",
12+
"example": "1000, BG-1000, BG 1000"
1313
},
1414
"CH": {
15-
"regex": "^(?:CH-?)?([0-9]{4})$",
16-
"example": "8000, CH-8000"
15+
"regex": "^(?:CH[\\s\\-\u2013\u2014.]*)?([0-9]{4})$",
16+
"example": "8000, CH-8000, CH 8000"
1717
},
1818
"CY": {
19-
"regex": "^(?:CY-?)?([0-9]{4})$",
20-
"example": "1010, CY-1010"
19+
"regex": "^(?:CY[\\s\\-\u2013\u2014.]*)?([0-9]{4})$",
20+
"example": "1010, CY-1010, CY 1010"
2121
},
2222
"CZ": {
23-
"regex": "^(?:CZ-?)?(\\d{3}\\s?\\d{2})$",
24-
"example": "11000, CZ-11000, 110 00"
23+
"regex": "^(?:CZ[\\s\\-\u2013\u2014.]*)?(\\d{3}\\s?\\d{2})$",
24+
"example": "11000, CZ-11000, CZ 11000, 110 00"
2525
},
2626
"DE": {
27-
"regex": "^(?:D-?|DE-?)?([0-9]{5})$",
28-
"example": "10115, D-10115, DE-10115"
27+
"regex": "^(?:D[\\s\\-\u2013\u2014.]*|DE[\\s\\-\u2013\u2014.]*)?([0-9]{5})$",
28+
"example": "10115, D-10115, DE-10115, D 10115"
2929
},
3030
"DK": {
31-
"regex": "^(?:DK-?)?([0-9]{4})$",
32-
"example": "1050, DK-1050"
31+
"regex": "^(?:DK[\\s\\-\u2013\u2014.]*)?([0-9]{4})$",
32+
"example": "1050, DK-1050, DK 1050"
3333
},
3434
"EE": {
35-
"regex": "^(?:EE-?)?([0-9]{5})$",
36-
"example": "10111, EE-10111"
35+
"regex": "^(?:EE[\\s\\-\u2013\u2014.]*)?([0-9]{5})$",
36+
"example": "10111, EE-10111, EE 10111"
3737
},
3838
"EL": {
39-
"regex": "^(?:GR-?|EL-?)?(\\d{5}|\\d{2}\\s\\d{3}|\\d{3}\\s\\d{2})$",
40-
"example": "10431, GR-10431, EL-10431, 105 57"
39+
"regex": "^(?:GR[\\s\\-\u2013\u2014.]*|EL[\\s\\-\u2013\u2014.]*)?(\\d{5}|\\d{2}\\s\\d{3}|\\d{3}\\s\\d{2})$",
40+
"example": "10431, GR-10431, GR 10431, EL-10431, 105 57"
4141
},
4242
"ES": {
43-
"regex": "^(?:E-?)?([0-9]{5})$",
44-
"example": "28001, E-28001"
43+
"regex": "^(?:E[\\s\\-\u2013\u2014.]*|ES[\\s\\-\u2013\u2014.]*)?([0-9]{5})$",
44+
"example": "28001, E-28001, ES-28001, E 28001"
4545
},
4646
"FI": {
47-
"regex": "^(?:FI-?)?([0-9]{5})$",
48-
"example": "00100, FI-00100"
47+
"regex": "^(?:FI(?:N)?[\\s\\-\u2013\u2014.]*)?([0-9]{5})$",
48+
"example": "00100, FI-00100, FIN-00100, FI 00100"
4949
},
5050
"FR": {
51-
"regex": "^(?:F-?)?([0-9]{5})$",
52-
"example": "75001, F-75001"
51+
"regex": "^(?:F[\\s\\-\u2013\u2014.]*|FR[\\s\\-\u2013\u2014.]*)?([0-9]{5})$",
52+
"example": "75001, F-75001, FR-75001"
5353
},
5454
"HR": {
55-
"regex": "^(?:HR-?)?([0-9]{5})$",
56-
"example": "10000, HR-10000"
55+
"regex": "^(?:HR[\\s\\-\u2013\u2014.]*)?([0-9]{5})$",
56+
"example": "10000, HR-10000, HR 10000"
5757
},
5858
"HU": {
59-
"regex": "^(?:H-?)?([0-9]{4})$",
60-
"example": "1011, H-1011"
59+
"regex": "^(?:H[\\s\\-\u2013\u2014.]*|HU[\\s\\-\u2013\u2014.]*)?([0-9]{4})$",
60+
"example": "1011, H-1011, HU-1011, HU 1011"
6161
},
6262
"IE": {
6363
"regex": "^[A-Z](?:\\d{2}|6W)\\s[A-Z0-9]{4}$",
6464
"example": "D02 X285, A65 F4E2",
6565
"tercet_map": "truncate:3"
6666
},
6767
"IS": {
68-
"regex": "^(?:IS-?)?([0-9]{3})$",
69-
"example": "101, IS-101"
68+
"regex": "^(?:IS[\\s\\-\u2013\u2014.]*)?([0-9]{3})$",
69+
"example": "101, IS-101, IS 101"
7070
},
7171
"IT": {
72-
"regex": "^(?:I-?|IT-?)?([0-9]{5})$",
73-
"example": "00118, I-00118, IT-00118"
72+
"regex": "^(?:I[\\s\\-\u2013\u2014.]*|IT[\\s\\-\u2013\u2014.]*)?([0-9]{5})$",
73+
"example": "00118, I-00118, IT-00118, I 00118"
7474
},
7575
"LI": {
76-
"regex": "^(?:FL-?)?([0-9]{4})$",
77-
"example": "9490, FL-9490"
76+
"regex": "^(?:FL[\\s\\-\u2013\u2014.]*|LI[\\s\\-\u2013\u2014.]*)?([0-9]{4})$",
77+
"example": "9490, FL-9490, LI-9490"
7878
},
7979
"LT": {
80-
"regex": "^(?:LT-?)?([0-9]{5})$",
81-
"example": "01100, LT-01100"
80+
"regex": "^(?:LT[\\s\\-\u2013\u2014.]*)?([0-9]{5})$",
81+
"example": "01100, LT-01100, LT 01100"
8282
},
8383
"LU": {
84-
"regex": "^(?:L-?)?([0-9]{4})$",
85-
"example": "1009, L-1009"
84+
"regex": "^(?:L[\\s\\-\u2013\u2014.]*|LU[\\s\\-\u2013\u2014.]*)?([0-9]{4})$",
85+
"example": "1009, L-1009, LU-1009, L 1009"
8686
},
8787
"LV": {
88-
"regex": "^(?:LV-?\\s?)?(\\d{4})$",
88+
"regex": "^(?:LV[\\s\\-\u2013\u2014.]*)?(\\d{4})$",
8989
"example": "1010, LV-1010, LV 1010",
9090
"tercet_map": "prepend:LV"
9191
},
9292
"MK": {
93-
"regex": "^(?:MK-?)?([0-9]{4})$",
94-
"example": "1000, MK-1000"
93+
"regex": "^(?:MK[\\s\\-\u2013\u2014.]*)?([0-9]{4})$",
94+
"example": "1000, MK-1000, MK 1000"
9595
},
9696
"MT": {
9797
"regex": "^([A-Z]{2,3}\\s\\d{2,4})$",
9898
"example": "VLT 1010, FNT 1010, MSK 1234",
9999
"tercet_map": "keep_alpha"
100100
},
101101
"NL": {
102-
"regex": "^(?:NL-?)?(\\d{4}\\s?[A-Z]{2})$",
103-
"example": "1012 AB, NL-1012AB"
102+
"regex": "^(?:NL[\\s\\-\u2013\u2014.]*)?(\\d{4}\\s?[A-Z]{2})$",
103+
"example": "1012 AB, NL-1012AB, NL 1012AB"
104104
},
105105
"NO": {
106-
"regex": "^(?:N-?)?([0-9]{4})$",
107-
"example": "0150, N-0150"
106+
"regex": "^(?:N[\\s\\-\u2013\u2014.]*|NO[\\s\\-\u2013\u2014.]*)?([0-9]{4})$",
107+
"example": "0150, N-0150, NO-0150, NO 0150"
108108
},
109109
"PL": {
110-
"regex": "^(?:PL-?)?([0-9]{2})-?([0-9]{3})$",
111-
"example": "00-950, 00950, PL-00-950"
110+
"regex": "^(?:PL[\\s\\-\u2013\u2014.]*)?([0-9]{2})-?([0-9]{3})$",
111+
"example": "00-950, 00950, PL-00-950, PL 00-950"
112112
},
113113
"PT": {
114-
"regex": "^([0-9]{4})-?([0-9]{3})$",
115-
"example": "1000-001, 1000001"
114+
"regex": "^(?:P[\\s\\-\u2013\u2014.]*|PT[\\s\\-\u2013\u2014.]*)?([0-9]{4})-?([0-9]{3})$",
115+
"example": "1000-001, 1000001, P-1000-001, PT-1000001"
116116
},
117117
"RO": {
118-
"regex": "^(?:RO-?)?([0-9]{6})$",
119-
"example": "010001, RO-010001"
118+
"regex": "^(?:RO[\\s\\-\u2013\u2014.]*)?([0-9]{6})$",
119+
"example": "010001, RO-010001, RO 010001"
120120
},
121121
"RS": {
122-
"regex": "^([0-9]{5})$",
123-
"example": "11000"
122+
"regex": "^(?:RS[\\s\\-\u2013\u2014.]*)?([0-9]{5})$",
123+
"example": "11000, RS-11000"
124124
},
125125
"SE": {
126-
"regex": "^(?:S-?|SE-?)?(\\d{3}\\s?\\d{2})$",
127-
"example": "10005, 100 05, S-10005, SE-10005"
126+
"regex": "^(?:S[\\s\\-\u2013\u2014.]*|SE[\\s\\-\u2013\u2014.]*)?(\\d{3}\\s?\\d{2})$",
127+
"example": "10005, 100 05, S-10005, SE-10005, SE 10005"
128128
},
129129
"SI": {
130-
"regex": "^(?:SI-?)?([0-9]{4})$",
131-
"example": "1000, SI-1000"
130+
"regex": "^(?:SI[\\s\\-\u2013\u2014.]*)?([0-9]{4})$",
131+
"example": "1000, SI-1000, SI 1000"
132132
},
133133
"SK": {
134-
"regex": "^(?:SK-?)?(\\d{3}\\s?\\d{2})$",
135-
"example": "81101, SK-81101, 811 01"
134+
"regex": "^(?:SK[\\s\\-\u2013\u2014.]*)?(\\d{3}\\s?\\d{2})$",
135+
"example": "81101, SK-81101, SK 81101, 811 01"
136136
},
137137
"TR": {
138-
"regex": "^(?:TR-?)?(\\d{5})$",
139-
"example": "06100, TR-06100, 34000"
138+
"regex": "^(?:TR[\\s\\-\u2013\u2014.]*)?(\\d{5})$",
139+
"example": "06100, TR-06100, TR 06100"
140140
}
141141
}

0 commit comments

Comments
 (0)