Skip to content

Commit b02914c

Browse files
Ralphclaude
andcommitted
fix(intl): #5585 — address CodeRabbit review (fmt + correctness)
- cargo fmt (the lint check failed on formatting). - Route the CanonicalizeLocaleList branches through `canonicalize_language_tag` (feature-aware, matches intl/locales.rs) instead of `canonical_locale`. - Skip absent indices via HasProperty when walking array-like locale objects (`{length:3,0:"en"}` → `["en"]`, not a TypeError on the holes). - `unicode_extension_keyword` stops at the `x` private-use singleton so `en-x-u-kn` is not misread as a `kn` Unicode extension. - Collator `collation` option rejects the reserved `standard`/`search` values (and malformed types); a valid option now wins over the `-u-co-` keyword. - `supportedLocalesOf` runs CanonicalizeLocaleList(locales) before reading `options` (locale errors/side-effects precede option validation). - Collator compare normalizes with NFC (not NFD) so the Swedish fast path keeps its precomposed å/ä/ö; `ignorePunctuation` stripping uses an explicit punctuation set instead of whole Latin-1 ranges (no longer eats ª/µ/º/¹²³/¼). - RTF `format`/`formatToParts` and PluralRules `selectRange` use a ToNumber that rejects BigInt with TypeError (`format(1n,"day")` throws). test262 intl402 across the six dirs: 307/387 pass (79%), up from 297 — 10 more cases, no regressions. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 4c83a09 commit b02914c

3 files changed

Lines changed: 127 additions & 56 deletions

File tree

crates/perry-runtime/src/intl.rs

Lines changed: 93 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,14 @@ fn canonicalize_language_tag(tag: &str) -> Option<String> {
549549
}
550550
}
551551

552+
/// `HasProperty(O, ToString(index))` — true when the integer-indexed property is
553+
/// present (own or inherited). Used to skip holes/absent indices in
554+
/// CanonicalizeLocaleList's array/array-like walk.
555+
fn js_has_index(obj: f64, index: u32) -> bool {
556+
let key = string_value(&index.to_string());
557+
crate::object::js_object_has_property(obj, key).to_bits() == crate::value::TAG_TRUE
558+
}
559+
552560
/// CanonicalizeLocaleList element handler: a present element must be a String or
553561
/// an Object (an `Intl.Locale` or anything ToString-able), else `TypeError`; the
554562
/// resulting tag is canonicalized (`RangeError` if structurally invalid) and
@@ -563,7 +571,7 @@ fn push_locale_element(out: &mut Vec<String>, value: f64) {
563571
// undefined / null / boolean / number / Symbol element → TypeError.
564572
throw_type_error("locale must be a String or Object");
565573
};
566-
let Some(canonical) = canonical_locale(&tag) else {
574+
let Some(canonical) = canonicalize_language_tag(&tag) else {
567575
throw_invalid_language_tag(&tag);
568576
};
569577
if !out.iter().any(|existing| existing == &canonical) {
@@ -584,7 +592,7 @@ fn locales_from_value(locales: f64) -> Vec<String> {
584592
// A String argument is treated as a single-element list (not iterated by char).
585593
if js.is_any_string() {
586594
let tag = string_from_string_value(locales).unwrap_or_default();
587-
let Some(canonical) = canonical_locale(&tag) else {
595+
let Some(canonical) = canonicalize_language_tag(&tag) else {
588596
throw_invalid_language_tag(&tag);
589597
};
590598
return vec![canonical];
@@ -611,6 +619,11 @@ fn locales_from_value(locales: f64) -> Vec<String> {
611619
};
612620
let mut out = Vec::with_capacity(len as usize);
613621
for i in 0..len {
622+
// Skip absent indices (`HasProperty` is false) — e.g.
623+
// `{ length: 3, 0: "en" }` yields just `["en"]`, never `undefined`.
624+
if !js_has_index(locales, i) {
625+
continue;
626+
}
614627
push_locale_element(&mut out, get_field(obj, &i.to_string()));
615628
}
616629
return out;
@@ -654,9 +667,14 @@ fn unicode_extension_keyword(locale: &str, key: &str) -> Option<String> {
654667
let lower = locale.to_ascii_lowercase();
655668
let key = key.to_ascii_lowercase();
656669
let mut iter = lower.split('-');
657-
// Advance to the `u` singleton.
670+
// Advance to the `u` singleton. A `x` singleton starts the private-use
671+
// sequence (which must come last); a `u` inside it — e.g. `en-x-u-kn` — is
672+
// private data, not a Unicode extension, so stop scanning there.
658673
let mut in_u = false;
659-
while let Some(p) = iter.next() {
674+
for p in iter.by_ref() {
675+
if p == "x" {
676+
return None;
677+
}
660678
if p == "u" {
661679
in_u = true;
662680
break;
@@ -1260,27 +1278,34 @@ fn make_instance(closure: *const ClosureHeader, kind: &str, locales: f64, option
12601278
// (constructor-options-throwing-getters / resolvedOptions order.js).
12611279
let options = coerce_options_reject_null(options);
12621280
let usage = enum_option_strict(options, "usage", &["sort", "search"], "sort");
1263-
let _ = enum_option_strict(options, "localeMatcher", &["lookup", "best fit"], "best fit");
1264-
// `collation` is a free-form `type` string (RangeError if malformed);
1265-
// it has no CLDR effect here, so it resolves to "default".
1266-
if let Some(collation) = get_option_string_coerced(options, "collation") {
1267-
if !is_well_formed_numbering_system(&collation) {
1281+
let _ = enum_option_strict(
1282+
options,
1283+
"localeMatcher",
1284+
&["lookup", "best fit"],
1285+
"best fit",
1286+
);
1287+
// `collation` is a `type` string: malformed, or the reserved `standard`
1288+
// /`search` values, are a RangeError (the latter are only valid as a
1289+
// `usage` selector, never an explicit collation). A valid value wins
1290+
// over any `-u-co-` keyword; absent ⇒ fall back to the extension.
1291+
let collation_opt = get_option_string_coerced(options, "collation").map(|v| {
1292+
if !is_well_formed_numbering_system(&v) || v == "standard" || v == "search" {
12681293
throw_range_error(&format!(
1269-
"Value {collation} out of range for Intl options property collation"
1294+
"Value {v} out of range for Intl options property collation"
12701295
));
12711296
}
1272-
}
1297+
v
1298+
});
12731299
let numeric_opt = get_bool_option(options, "numeric");
1274-
let case_first_opt =
1275-
get_option_string_coerced(options, "caseFirst").map(|v| {
1276-
if ["upper", "lower", "false"].contains(&v.as_str()) {
1277-
v
1278-
} else {
1279-
throw_range_error(&format!(
1280-
"Value {v} out of range for Intl options property caseFirst"
1281-
))
1282-
}
1283-
});
1300+
let case_first_opt = get_option_string_coerced(options, "caseFirst").map(|v| {
1301+
if ["upper", "lower", "false"].contains(&v.as_str()) {
1302+
v
1303+
} else {
1304+
throw_range_error(&format!(
1305+
"Value {v} out of range for Intl options property caseFirst"
1306+
))
1307+
}
1308+
});
12841309
let sensitivity = enum_option_strict(
12851310
options,
12861311
"sensitivity",
@@ -1291,20 +1316,21 @@ fn make_instance(closure: *const ClosureHeader, kind: &str, locales: f64, option
12911316
// ResolveLocale: when an option is absent, fall back to the matching
12921317
// Unicode (`-u-`) extension keyword in the resolved locale — `kn`
12931318
// (numeric, value-less ⇒ true) and `kf` (caseFirst).
1294-
let numeric = numeric_opt.unwrap_or_else(|| {
1295-
match unicode_extension_keyword(&locale, "kn") {
1319+
let numeric =
1320+
numeric_opt.unwrap_or_else(|| match unicode_extension_keyword(&locale, "kn") {
12961321
Some(v) => v != "false",
12971322
None => false,
1298-
}
1299-
});
1323+
});
13001324
let case_first = case_first_opt.unwrap_or_else(|| {
13011325
unicode_extension_keyword(&locale, "kf")
13021326
.filter(|v| ["upper", "lower", "false"].contains(&v.as_str()))
13031327
.unwrap_or_else(|| "false".to_string())
13041328
});
1305-
let collation = unicode_extension_keyword(&locale, "co")
1306-
.filter(|v| !v.is_empty() && v != "standard" && v != "search")
1307-
.unwrap_or_else(|| "default".to_string());
1329+
let collation = collation_opt.unwrap_or_else(|| {
1330+
unicode_extension_keyword(&locale, "co")
1331+
.filter(|v| !v.is_empty() && v != "standard" && v != "search")
1332+
.unwrap_or_else(|| "default".to_string())
1333+
});
13081334
set_internal_field(obj, KEY_COL_USAGE, string_value(&usage));
13091335
set_internal_field(obj, KEY_COL_SENSITIVITY, string_value(&sensitivity));
13101336
set_internal_field(obj, KEY_COL_IGNORE_PUNCT, bool_value(ignore_punct));
@@ -1328,7 +1354,12 @@ fn make_instance(closure: *const ClosureHeader, kind: &str, locales: f64, option
13281354
// `? ToObject(options)` (null → TypeError), then GetOption in order:
13291355
// localeMatcher, granularity (options-order.js / options-null.js).
13301356
let options = coerce_options_reject_null(options);
1331-
let _ = enum_option_strict(options, "localeMatcher", &["lookup", "best fit"], "best fit");
1357+
let _ = enum_option_strict(
1358+
options,
1359+
"localeMatcher",
1360+
&["lookup", "best fit"],
1361+
"best fit",
1362+
);
13321363
let granularity =
13331364
normalize_granularity(get_option_string_coerced(options, "granularity"));
13341365
set_internal_field(obj, KEY_GRANULARITY, string_value(&granularity));
@@ -1350,7 +1381,12 @@ fn make_instance(closure: *const ClosureHeader, kind: &str, locales: f64, option
13501381
// TypeError), then GetOption: localeMatcher, type, style
13511382
// (options-getoptionsobject.js / options-order.js).
13521383
let options = get_options_object(options);
1353-
let _ = enum_option_strict(options, "localeMatcher", &["lookup", "best fit"], "best fit");
1384+
let _ = enum_option_strict(
1385+
options,
1386+
"localeMatcher",
1387+
&["lookup", "best fit"],
1388+
"best fit",
1389+
);
13541390
let list_type = enum_option_strict(
13551391
options,
13561392
"type",
@@ -1383,7 +1419,12 @@ fn make_instance(closure: *const ClosureHeader, kind: &str, locales: f64, option
13831419
// `? ToObject(options)` (null → TypeError), then GetOption in order:
13841420
// localeMatcher, numberingSystem, style, numeric (options-order.js).
13851421
let options = coerce_options_reject_null(options);
1386-
let _ = enum_option_strict(options, "localeMatcher", &["lookup", "best fit"], "best fit");
1422+
let _ = enum_option_strict(
1423+
options,
1424+
"localeMatcher",
1425+
&["lookup", "best fit"],
1426+
"best fit",
1427+
);
13871428
if let Some(ns) = get_option_string_coerced(options, "numberingSystem") {
13881429
if !is_well_formed_numbering_system(&ns) {
13891430
throw_range_error(&format!(
@@ -1416,7 +1457,12 @@ fn make_instance(closure: *const ClosureHeader, kind: &str, locales: f64, option
14161457
// (minimumIntegerDigits, min/maxFractionDigits, min/maxSignificantDigits,
14171458
// roundingIncrement, roundingMode, roundingPriority, trailingZeroDisplay).
14181459
let options = get_options_object(options);
1419-
let _ = enum_option_strict(options, "localeMatcher", &["lookup", "best fit"], "best fit");
1460+
let _ = enum_option_strict(
1461+
options,
1462+
"localeMatcher",
1463+
&["lookup", "best fit"],
1464+
"best fit",
1465+
);
14201466
let pr_type = enum_option_strict(options, "type", &["cardinal", "ordinal"], "cardinal");
14211467
set_internal_field(obj, KEY_TYPE, string_value(&pr_type));
14221468
let notation = enum_option_strict(
@@ -1573,17 +1619,26 @@ extern "C" fn plural_rules_constructor_thunk(closure: *const ClosureHeader, rest
15731619
}
15741620

15751621
fn supported_locales_array(locales: f64, options: f64) -> f64 {
1576-
// SupportedLocales: when `options` is not undefined, `? ToObject(options)`
1577-
// (null → TypeError) then `? GetOption(options, "localeMatcher", …)` — so an
1578-
// invalid localeMatcher is a RangeError even though the matcher choice does
1579-
// not affect Perry's lookup result.
1622+
// `supportedLocalesOf(locales, options)`:
1623+
// 1. requestedLocales = ? CanonicalizeLocaleList(locales) ← runs FIRST,
1624+
// so a malformed locale errors before `options` is touched.
1625+
// 2. SupportedLocales(..., options): when `options` is not undefined,
1626+
// `? ToObject(options)` (null → TypeError) then
1627+
// `? GetOption(options, "localeMatcher", …)` — an invalid localeMatcher
1628+
// is a RangeError even though the matcher choice does not affect Perry's
1629+
// lookup result.
1630+
let requested = locales_from_value(locales);
15801631
if !JSValue::from_bits(options.to_bits()).is_undefined() {
15811632
let options = coerce_options_reject_null(options);
1582-
let _ = enum_option_strict(options, "localeMatcher", &["lookup", "best fit"], "best fit");
1633+
let _ = enum_option_strict(
1634+
options,
1635+
"localeMatcher",
1636+
&["lookup", "best fit"],
1637+
"best fit",
1638+
);
15831639
}
15841640
// BestAvailableLocale-filter the canonicalized request list: drop tags whose
15851641
// primary language Perry can't service (e.g. `zxx`), keeping order + dedup.
1586-
let requested = locales_from_value(locales);
15871642
let mut arr = js_array_alloc(0);
15881643
for locale in requested {
15891644
if is_available_locale(&locale) {

crates/perry-runtime/src/intl/date_collator.rs

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,9 @@ pub(crate) fn swedish_collation_key(s: &str) -> Vec<u32> {
350350
#[cfg(feature = "string-normalize")]
351351
fn collation_normalize(s: &str) -> String {
352352
use unicode_normalization::UnicodeNormalization;
353-
s.nfd().collect()
353+
// NFC (composition), not NFD: it makes canonical equivalents equal while
354+
// keeping precomposed `å/ä/ö` intact for the Swedish fast path below.
355+
s.nfc().collect()
354356
}
355357
#[cfg(not(feature = "string-normalize"))]
356358
fn collation_normalize(s: &str) -> String {
@@ -401,20 +403,23 @@ fn strip_ignorable_punctuation(s: &str) -> String {
401403
}
402404

403405
fn is_punctuation(c: char) -> bool {
404-
// ASCII punctuation plus the common Unicode punctuation/symbol ranges that
405-
// UCA marks variable; a pragmatic superset of what the parity tests exercise.
406+
// ASCII punctuation plus an explicit set of Unicode punctuation code points,
407+
// deliberately NOT whole Latin-1 ranges — those contain letters/numbers
408+
// (`ª` U+00AA, `µ` U+00B5, `º` U+00BA, the `¹²³` superscripts, `¼½¾`
409+
// fractions) that must not be stripped or distinct strings would compare
410+
// equal. The General Punctuation block (U+2000–U+206F) and CJK punctuation
411+
// (U+3000–U+303F) are all punctuation/spaces and are safe as ranges.
406412
c.is_ascii_punctuation()
407413
|| matches!(c,
408-
'\u{00A1}'..='\u{00BF}'
409-
| '\u{2010}'..='\u{2027}'
410-
| '\u{2030}'..='\u{205E}'
414+
'\u{00A1}' | '\u{00A7}' | '\u{00AB}' | '\u{00B6}' | '\u{00B7}'
415+
| '\u{00BB}' | '\u{00BF}'
416+
| '\u{2000}'..='\u{206F}'
411417
| '\u{3000}'..='\u{303F}')
412418
}
413419

414420
pub(crate) fn collator_compare_object(obj: *const ObjectHeader, left: f64, right: f64) -> f64 {
415421
let locale = get_string_field(obj, KEY_LOCALE).unwrap_or_else(|| "en-US".to_string());
416-
let ignore_punct =
417-
get_field(obj, KEY_COL_IGNORE_PUNCT).to_bits() == crate::value::TAG_TRUE;
422+
let ignore_punct = get_field(obj, KEY_COL_IGNORE_PUNCT).to_bits() == crate::value::TAG_TRUE;
418423
let (mut l, mut r) = (value_to_string(left), value_to_string(right));
419424
if ignore_punct {
420425
l = strip_ignorable_punctuation(&l);

crates/perry-runtime/src/intl/list_relative_plural.rs

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -301,19 +301,30 @@ pub(crate) fn rtf_parts(value: f64, unit: &str) -> Vec<(&'static str, String)> {
301301
parts
302302
}
303303

304+
/// `ToNumber(value)` that rejects BigInt with a TypeError, matching the
305+
/// ECMA-262 abstract operation. `js_number_coerce` alone converts `1n` → `1`
306+
/// (for `Number(1n)`), but `Intl` `format`/`select*` go through ToNumber, so
307+
/// `format(1n, "day")` must throw. A Symbol still throws inside `js_number_coerce`,
308+
/// and an object's `valueOf` is honoured there.
309+
pub(crate) fn to_number_reject_bigint(value: f64) -> f64 {
310+
if JSValue::from_bits(value.to_bits()).is_bigint() {
311+
throw_type_error("Cannot convert a BigInt value to a number");
312+
}
313+
crate::builtins::js_number_coerce(value)
314+
}
315+
304316
/// Shared steps of `format`/`formatToParts`: `value = ? ToNumber(value)` (a
305-
/// Symbol throws TypeError; an object's `valueOf` is honoured), then
317+
/// Symbol or BigInt throws TypeError; an object's `valueOf` is honoured), then
306318
/// `unit = ? ToString(unit)`, then the RangeError guards for a non-finite value
307319
/// or an unsanctioned unit. Returns the rendered parts together with the
308320
/// resolved singular `unit` (the `[[Unit]]` field formatToParts attaches).
309321
pub(crate) fn rtf_instance_parts_and_unit(
310322
value: f64,
311323
unit_arg: f64,
312324
) -> (Vec<(&'static str, String)>, &'static str) {
313-
// Full ToNumber (not `JSValue::to_number`, which returns NaN for objects and
314-
// doesn't reject Symbols): valueOf is invoked, and a Symbol value throws the
315-
// expected TypeError *before* the finite-ness RangeError (format/value-symbol.js).
316-
let number = crate::builtins::js_number_coerce(value);
325+
// ToNumber: a Symbol/BigInt value throws TypeError *before* the finite-ness
326+
// RangeError (format/value-symbol.js); an object's valueOf is invoked.
327+
let number = to_number_reject_bigint(value);
317328
let unit_str = value_to_string(unit_arg);
318329
if !number.is_finite() {
319330
throw_range_error("Value need to be finite number for Intl.RelativeTimeFormat.format()");
@@ -513,8 +524,8 @@ pub(crate) fn plural_select_range(start: f64, end: f64) -> f64 {
513524
{
514525
throw_type_error("Intl.PluralRules.prototype.selectRange: start and end must be defined");
515526
}
516-
let s = crate::builtins::js_number_coerce(start);
517-
let e = crate::builtins::js_number_coerce(end);
527+
let s = to_number_reject_bigint(start);
528+
let e = to_number_reject_bigint(end);
518529
if s.is_nan() || e.is_nan() {
519530
throw_range_error("Invalid values for Intl.PluralRules.selectRange()");
520531
}
@@ -535,16 +546,16 @@ pub(crate) fn plural_rules_resolved_options_object(obj: *const ObjectHeader) ->
535546
"type",
536547
string_value(if is_ordinal { "ordinal" } else { "cardinal" }),
537548
);
538-
let notation =
539-
get_string_field(obj, KEY_PR_NOTATION).unwrap_or_else(|| "standard".to_string());
549+
let notation = get_string_field(obj, KEY_PR_NOTATION).unwrap_or_else(|| "standard".to_string());
540550
set_field(out, "notation", string_value(&notation));
541551
// `compactDisplay` surfaces only when notation is "compact".
542552
if notation == "compact" {
543553
set_field(
544554
out,
545555
"compactDisplay",
546556
string_value(
547-
&get_string_field(obj, KEY_PR_COMPACT_DISPLAY).unwrap_or_else(|| "short".to_string()),
557+
&get_string_field(obj, KEY_PR_COMPACT_DISPLAY)
558+
.unwrap_or_else(|| "short".to_string()),
548559
),
549560
);
550561
}

0 commit comments

Comments
 (0)