Skip to content

Commit 8a8f08a

Browse files
authored
Merge pull request #366 from posit-dev/enh-dataset-generate-country-name-code
Enh: add dataset generation presets for country codes (2- and 3-letter variants)
2 parents f4304a0 + 1ecf524 commit 8a8f08a

7 files changed

Lines changed: 398 additions & 18 deletions

File tree

docs/user-guide/test-data-generation.qmd

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,8 @@ The `preset=` parameter in `string_field()` supports many data types:
191191
- `city`: city name
192192
- `state`: state/province name
193193
- `country`: country name
194+
- `country_code_2`: ISO 3166-1 alpha-2 country code (e.g., `"US"`)
195+
- `country_code_3`: ISO 3166-1 alpha-3 country code (e.g., `"USA"`)
194196
- `postcode`: postal/ZIP code
195197
- `latitude`: latitude coordinate
196198
- `longitude`: longitude coordinate

pointblank/countries/__init__.py

Lines changed: 277 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
"LocaleGenerator",
1717
"get_generator",
1818
"COUNTRY_CODE_MAP",
19+
"COUNTRY_INFO",
1920
"COUNTRIES_WITH_FULL_DATA",
2021
]
2122

@@ -304,6 +305,252 @@
304305
"ZA", # South Africa
305306
]
306307

308+
# Comprehensive country info mapping: alpha-2 -> (alpha-3, English name)
309+
COUNTRY_INFO: dict[str, tuple[str, str]] = {
310+
"AD": ("AND", "Andorra"),
311+
"AE": ("ARE", "United Arab Emirates"),
312+
"AF": ("AFG", "Afghanistan"),
313+
"AG": ("ATG", "Antigua & Barbuda"),
314+
"AI": ("AIA", "Anguilla"),
315+
"AL": ("ALB", "Albania"),
316+
"AM": ("ARM", "Armenia"),
317+
"AO": ("AGO", "Angola"),
318+
"AR": ("ARG", "Argentina"),
319+
"AS": ("ASM", "American Samoa"),
320+
"AT": ("AUT", "Austria"),
321+
"AU": ("AUS", "Australia"),
322+
"AW": ("ABW", "Aruba"),
323+
"AX": ("ALA", "\u00c5land Islands"),
324+
"AZ": ("AZE", "Azerbaijan"),
325+
"BA": ("BIH", "Bosnia & Herzegovina"),
326+
"BB": ("BRB", "Barbados"),
327+
"BD": ("BGD", "Bangladesh"),
328+
"BE": ("BEL", "Belgium"),
329+
"BF": ("BFA", "Burkina Faso"),
330+
"BG": ("BGR", "Bulgaria"),
331+
"BH": ("BHR", "Bahrain"),
332+
"BI": ("BDI", "Burundi"),
333+
"BJ": ("BEN", "Benin"),
334+
"BL": ("BLM", "St. Barth\u00e9lemy"),
335+
"BM": ("BMU", "Bermuda"),
336+
"BN": ("BRN", "Brunei"),
337+
"BO": ("BOL", "Bolivia"),
338+
"BR": ("BRA", "Brazil"),
339+
"BS": ("BHS", "Bahamas"),
340+
"BT": ("BTN", "Bhutan"),
341+
"BW": ("BWA", "Botswana"),
342+
"BY": ("BLR", "Belarus"),
343+
"BZ": ("BLZ", "Belize"),
344+
"CA": ("CAN", "Canada"),
345+
"CC": ("CCK", "Cocos (Keeling) Islands"),
346+
"CD": ("COD", "Congo (DRC)"),
347+
"CF": ("CAF", "Central African Republic"),
348+
"CG": ("COG", "Congo (Republic)"),
349+
"CH": ("CHE", "Switzerland"),
350+
"CI": ("CIV", "C\u00f4te d\u2019Ivoire"),
351+
"CK": ("COK", "Cook Islands"),
352+
"CL": ("CHL", "Chile"),
353+
"CM": ("CMR", "Cameroon"),
354+
"CN": ("CHN", "China"),
355+
"CO": ("COL", "Colombia"),
356+
"CR": ("CRI", "Costa Rica"),
357+
"CU": ("CUB", "Cuba"),
358+
"CV": ("CPV", "Cape Verde"),
359+
"CW": ("CUW", "Cura\u00e7ao"),
360+
"CY": ("CYP", "Cyprus"),
361+
"CZ": ("CZE", "Czech Republic"),
362+
"DE": ("DEU", "Germany"),
363+
"DJ": ("DJI", "Djibouti"),
364+
"DK": ("DNK", "Denmark"),
365+
"DM": ("DMA", "Dominica"),
366+
"DO": ("DOM", "Dominican Republic"),
367+
"DZ": ("DZA", "Algeria"),
368+
"EC": ("ECU", "Ecuador"),
369+
"EE": ("EST", "Estonia"),
370+
"EG": ("EGY", "Egypt"),
371+
"EH": ("ESH", "Western Sahara"),
372+
"ER": ("ERI", "Eritrea"),
373+
"ES": ("ESP", "Spain"),
374+
"ET": ("ETH", "Ethiopia"),
375+
"EU": ("EUR", "European Union"),
376+
"FI": ("FIN", "Finland"),
377+
"FJ": ("FJI", "Fiji"),
378+
"FK": ("FLK", "Falkland Islands"),
379+
"FM": ("FSM", "Micronesia"),
380+
"FO": ("FRO", "Faroe Islands"),
381+
"FR": ("FRA", "France"),
382+
"GA": ("GAB", "Gabon"),
383+
"GB": ("GBR", "United Kingdom"),
384+
"GD": ("GRD", "Grenada"),
385+
"GE": ("GEO", "Georgia"),
386+
"GF": ("GUF", "French Guiana"),
387+
"GG": ("GGY", "Guernsey"),
388+
"GH": ("GHA", "Ghana"),
389+
"GI": ("GIB", "Gibraltar"),
390+
"GL": ("GRL", "Greenland"),
391+
"GM": ("GMB", "Gambia"),
392+
"GN": ("GIN", "Guinea"),
393+
"GP": ("GLP", "Guadeloupe"),
394+
"GQ": ("GNQ", "Equatorial Guinea"),
395+
"GR": ("GRC", "Greece"),
396+
"GS": ("SGS", "South Georgia & South Sandwich Islands"),
397+
"GT": ("GTM", "Guatemala"),
398+
"GU": ("GUM", "Guam"),
399+
"GW": ("GNB", "Guinea-Bissau"),
400+
"GY": ("GUY", "Guyana"),
401+
"HK": ("HKG", "Hong Kong"),
402+
"HN": ("HND", "Honduras"),
403+
"HR": ("HRV", "Croatia"),
404+
"HT": ("HTI", "Haiti"),
405+
"HU": ("HUN", "Hungary"),
406+
"ID": ("IDN", "Indonesia"),
407+
"IE": ("IRL", "Ireland"),
408+
"IL": ("ISR", "Israel"),
409+
"IM": ("IMN", "Isle of Man"),
410+
"IN": ("IND", "India"),
411+
"IO": ("IOT", "British Indian Ocean Territory"),
412+
"IQ": ("IRQ", "Iraq"),
413+
"IR": ("IRN", "Iran"),
414+
"IS": ("ISL", "Iceland"),
415+
"IT": ("ITA", "Italy"),
416+
"JE": ("JEY", "Jersey"),
417+
"JM": ("JAM", "Jamaica"),
418+
"JO": ("JOR", "Jordan"),
419+
"JP": ("JPN", "Japan"),
420+
"KE": ("KEN", "Kenya"),
421+
"KG": ("KGZ", "Kyrgyzstan"),
422+
"KH": ("KHM", "Cambodia"),
423+
"KI": ("KIR", "Kiribati"),
424+
"KM": ("COM", "Comoros"),
425+
"KN": ("KNA", "St. Kitts & Nevis"),
426+
"KP": ("PRK", "North Korea"),
427+
"KR": ("KOR", "South Korea"),
428+
"KW": ("KWT", "Kuwait"),
429+
"KY": ("CYM", "Cayman Islands"),
430+
"KZ": ("KAZ", "Kazakhstan"),
431+
"LA": ("LAO", "Laos"),
432+
"LB": ("LBN", "Lebanon"),
433+
"LC": ("LCA", "St. Lucia"),
434+
"LI": ("LIE", "Liechtenstein"),
435+
"LK": ("LKA", "Sri Lanka"),
436+
"LR": ("LBR", "Liberia"),
437+
"LS": ("LSO", "Lesotho"),
438+
"LT": ("LTU", "Lithuania"),
439+
"LU": ("LUX", "Luxembourg"),
440+
"LV": ("LVA", "Latvia"),
441+
"LY": ("LBY", "Libya"),
442+
"MA": ("MAR", "Morocco"),
443+
"MC": ("MCO", "Monaco"),
444+
"MD": ("MDA", "Moldova"),
445+
"ME": ("MNE", "Montenegro"),
446+
"MF": ("MAF", "St. Martin"),
447+
"MG": ("MDG", "Madagascar"),
448+
"MH": ("MHL", "Marshall Islands"),
449+
"MK": ("MKD", "North Macedonia"),
450+
"ML": ("MLI", "Mali"),
451+
"MM": ("MMR", "Myanmar"),
452+
"MN": ("MNG", "Mongolia"),
453+
"MO": ("MAC", "Macao"),
454+
"MP": ("MNP", "Northern Mariana Islands"),
455+
"MQ": ("MTQ", "Martinique"),
456+
"MR": ("MRT", "Mauritania"),
457+
"MS": ("MSR", "Montserrat"),
458+
"MT": ("MLT", "Malta"),
459+
"MU": ("MUS", "Mauritius"),
460+
"MV": ("MDV", "Maldives"),
461+
"MW": ("MWI", "Malawi"),
462+
"MX": ("MEX", "Mexico"),
463+
"MY": ("MYS", "Malaysia"),
464+
"MZ": ("MOZ", "Mozambique"),
465+
"NA": ("NAM", "Namibia"),
466+
"NC": ("NCL", "New Caledonia"),
467+
"NE": ("NER", "Niger"),
468+
"NF": ("NFK", "Norfolk Island"),
469+
"NG": ("NGA", "Nigeria"),
470+
"NI": ("NIC", "Nicaragua"),
471+
"NL": ("NLD", "Netherlands"),
472+
"NO": ("NOR", "Norway"),
473+
"NP": ("NPL", "Nepal"),
474+
"NR": ("NRU", "Nauru"),
475+
"NU": ("NIU", "Niue"),
476+
"NZ": ("NZL", "New Zealand"),
477+
"OM": ("OMN", "Oman"),
478+
"PA": ("PAN", "Panama"),
479+
"PE": ("PER", "Peru"),
480+
"PF": ("PYF", "French Polynesia"),
481+
"PG": ("PNG", "Papua New Guinea"),
482+
"PH": ("PHL", "Philippines"),
483+
"PK": ("PAK", "Pakistan"),
484+
"PL": ("POL", "Poland"),
485+
"PM": ("SPM", "St. Pierre & Miquelon"),
486+
"PN": ("PCN", "Pitcairn Islands"),
487+
"PR": ("PRI", "Puerto Rico"),
488+
"PS": ("PSE", "Palestine"),
489+
"PT": ("PRT", "Portugal"),
490+
"PW": ("PLW", "Palau"),
491+
"PY": ("PRY", "Paraguay"),
492+
"QA": ("QAT", "Qatar"),
493+
"RE": ("REU", "R\u00e9union"),
494+
"RO": ("ROU", "Romania"),
495+
"RS": ("SRB", "Serbia"),
496+
"RU": ("RUS", "Russia"),
497+
"RW": ("RWA", "Rwanda"),
498+
"SA": ("SAU", "Saudi Arabia"),
499+
"SB": ("SLB", "Solomon Islands"),
500+
"SC": ("SYC", "Seychelles"),
501+
"SD": ("SDN", "Sudan"),
502+
"SE": ("SWE", "Sweden"),
503+
"SG": ("SGP", "Singapore"),
504+
"SI": ("SVN", "Slovenia"),
505+
"SK": ("SVK", "Slovakia"),
506+
"SL": ("SLE", "Sierra Leone"),
507+
"SM": ("SMR", "San Marino"),
508+
"SN": ("SEN", "Senegal"),
509+
"SO": ("SOM", "Somalia"),
510+
"SR": ("SUR", "Suriname"),
511+
"SS": ("SSD", "South Sudan"),
512+
"ST": ("STP", "S\u00e3o Tom\u00e9 & Pr\u00edncipe"),
513+
"SV": ("SLV", "El Salvador"),
514+
"SX": ("SXM", "Sint Maarten"),
515+
"SY": ("SYR", "Syria"),
516+
"SZ": ("SWZ", "Eswatini"),
517+
"TC": ("TCA", "Turks & Caicos Islands"),
518+
"TD": ("TCD", "Chad"),
519+
"TF": ("ATF", "French Southern Territories"),
520+
"TG": ("TGO", "Togo"),
521+
"TH": ("THA", "Thailand"),
522+
"TJ": ("TJK", "Tajikistan"),
523+
"TK": ("TKL", "Tokelau"),
524+
"TL": ("TLS", "East Timor"),
525+
"TM": ("TKM", "Turkmenistan"),
526+
"TN": ("TUN", "Tunisia"),
527+
"TO": ("TON", "Tonga"),
528+
"TR": ("TUR", "Turkey"),
529+
"TT": ("TTO", "Trinidad & Tobago"),
530+
"TV": ("TUV", "Tuvalu"),
531+
"TW": ("TWN", "Taiwan"),
532+
"TZ": ("TZA", "Tanzania"),
533+
"UA": ("UKR", "Ukraine"),
534+
"UG": ("UGA", "Uganda"),
535+
"US": ("USA", "United States"),
536+
"UY": ("URY", "Uruguay"),
537+
"UZ": ("UZB", "Uzbekistan"),
538+
"VA": ("VAT", "Vatican City"),
539+
"VC": ("VCT", "St. Vincent & Grenadines"),
540+
"VE": ("VEN", "Venezuela"),
541+
"VG": ("VGB", "British Virgin Islands"),
542+
"VI": ("VIR", "U.S. Virgin Islands"),
543+
"VN": ("VNM", "Vietnam"),
544+
"VU": ("VUT", "Vanuatu"),
545+
"WF": ("WLF", "Wallis & Futuna"),
546+
"WS": ("WSM", "Samoa"),
547+
"YE": ("YEM", "Yemen"),
548+
"YT": ("MYT", "Mayotte"),
549+
"ZA": ("ZAF", "South Africa"),
550+
"ZM": ("ZMB", "Zambia"),
551+
"ZW": ("ZWE", "Zimbabwe"),
552+
}
553+
307554

308555
@dataclass
309556
class LocaleData:
@@ -1426,6 +1673,36 @@ def country(self) -> str:
14261673
"""Generate the country name for this locale."""
14271674
return self._data.address.get("country", "United States")
14281675

1676+
def country_name(self) -> str:
1677+
"""Generate the standardized English country name for this locale.
1678+
1679+
Uses the `COUNTRY_INFO` mapping to return a clean, standardized English name.
1680+
Falls back to the address data country name if the code is not found in the mapping.
1681+
"""
1682+
info = COUNTRY_INFO.get(self.country_code)
1683+
if info is not None:
1684+
return info[1]
1685+
return self._data.address.get("country", "United States")
1686+
1687+
def country_code_2(self) -> str:
1688+
"""Generate the ISO 3166-1 alpha-2 (2-letter) country code for this locale."""
1689+
return self.country_code
1690+
1691+
def country_code_3(self) -> str:
1692+
"""Generate the ISO 3166-1 alpha-3 (3-letter) country code for this locale.
1693+
1694+
Uses the `COUNTRY_INFO` mapping to look up the 3-letter code from the
1695+
locale's 2-letter code.
1696+
"""
1697+
info = COUNTRY_INFO.get(self.country_code)
1698+
if info is not None:
1699+
return info[0]
1700+
# Fallback: try reverse lookup in COUNTRY_CODE_MAP for a 3-letter entry
1701+
for code, normalized in COUNTRY_CODE_MAP.items():
1702+
if normalized == self.country_code and len(code) == 3:
1703+
return code
1704+
return self.country_code
1705+
14291706
def postcode(self) -> str:
14301707
"""Generate a random postal code (coherent with current location context)."""
14311708
location = self._get_current_location()

pointblank/field.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@
4444
"city",
4545
"state",
4646
"country",
47+
"country_code_2",
48+
"country_code_3",
4749
"postcode",
4850
"latitude",
4951
"longitude",
@@ -854,7 +856,8 @@ def string_field(
854856
**Personal:** `"name"` (first + last name), `"name_full"` (full name with possible prefix
855857
or suffix), `"first_name"`, `"last_name"`, `"email"` (realistic email address),
856858
`"phone_number"`, `"address"` (full street address), `"city"`, `"state"`, `"country"`,
857-
`"postcode"`, `"latitude"`, `"longitude"`
859+
`"country_code_2"` (ISO 3166-1 alpha-2 code, e.g., `"US"`), `"country_code_3"` (ISO
860+
3166-1 alpha-3 code, e.g., `"USA"`), `"postcode"`, `"latitude"`, `"longitude"`
858861
859862
**Business:** `"company"` (company name), `"job"` (job title), `"catch_phrase"`
860863

pointblank/generate/generators.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,8 @@ def _generate_from_preset(preset: str, generator: LocaleGenerator) -> str:
113113
"city": generator.city,
114114
"state": generator.state,
115115
"country": generator.country,
116+
"country_code_2": generator.country_code_2,
117+
"country_code_3": generator.country_code_3,
116118
"postcode": generator.postcode,
117119
"latitude": generator.latitude,
118120
"longitude": generator.longitude,

pointblank_mcp_server/pointblank_server.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -819,9 +819,9 @@ async def profile_dataframe(
819819
# Sample data if needed
820820
if sample_size > 0 and df.shape[0] > sample_size:
821821
if hasattr(df, "sample"): # pandas
822-
df_sample = df.sample(n=sample_size, random_state=42)
822+
df_sample = df.sample(n=sample_size, random_state=23)
823823
else: # polars
824-
df_sample = df.sample(n=sample_size, seed=42)
824+
df_sample = df.sample(n=sample_size, seed=23)
825825
await ctx.report_progress(20, 100, f"Sampling {sample_size} rows for analysis...")
826826
else:
827827
df_sample = df

0 commit comments

Comments
 (0)