Skip to content

Commit c1c42c9

Browse files
Ralphclaude
andcommitted
fix(intl): #5585 — address CodeRabbit review (fmt + correctness)
- cargo fmt (the lint check failed on formatting). - Route the CanonicalizeLocaleList branches through `canonicalize_language_tag` (feature-aware, matches intl/locales.rs) instead of `canonical_locale`. - Skip absent indices via HasProperty when walking array-like locale objects (`{length:3,0:"en"}` → `["en"]`, not a TypeError on the holes). - `unicode_extension_keyword` stops at the `x` private-use singleton so `en-x-u-kn` is not misread as a `kn` Unicode extension. - Collator `collation` option rejects the reserved `standard`/`search` values (and malformed types); a valid option now wins over the `-u-co-` keyword. - `supportedLocalesOf` runs CanonicalizeLocaleList(locales) before reading `options` (locale errors/side-effects precede option validation). - Collator compare normalizes with NFC (not NFD) so the Swedish fast path keeps its precomposed å/ä/ö; `ignorePunctuation` stripping uses an explicit punctuation set instead of whole Latin-1 ranges (no longer eats ª/µ/º/¹²³/¼). - RTF `format`/`formatToParts` and PluralRules `selectRange` use a ToNumber that rejects BigInt with TypeError (`format(1n,"day")` throws). test262 intl402 across the six dirs: 307/387 pass (79%), up from 297 — 10 more cases, no regressions. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent d4e624c commit c1c42c9

3 files changed

Lines changed: 127 additions & 56 deletions

File tree

crates/perry-runtime/src/intl.rs

Lines changed: 93 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -506,6 +506,14 @@ fn canonicalize_language_tag(tag: &str) -> Option<String> {
506506
}
507507
}
508508

509+
/// `HasProperty(O, ToString(index))` — true when the integer-indexed property is
510+
/// present (own or inherited). Used to skip holes/absent indices in
511+
/// CanonicalizeLocaleList's array/array-like walk.
512+
fn js_has_index(obj: f64, index: u32) -> bool {
513+
let key = string_value(&index.to_string());
514+
crate::object::js_object_has_property(obj, key).to_bits() == crate::value::TAG_TRUE
515+
}
516+
509517
/// CanonicalizeLocaleList element handler: a present element must be a String or
510518
/// an Object (an `Intl.Locale` or anything ToString-able), else `TypeError`; the
511519
/// resulting tag is canonicalized (`RangeError` if structurally invalid) and
@@ -520,7 +528,7 @@ fn push_locale_element(out: &mut Vec<String>, value: f64) {
520528
// undefined / null / boolean / number / Symbol element → TypeError.
521529
throw_type_error("locale must be a String or Object");
522530
};
523-
let Some(canonical) = canonical_locale(&tag) else {
531+
let Some(canonical) = canonicalize_language_tag(&tag) else {
524532
throw_invalid_language_tag(&tag);
525533
};
526534
if !out.iter().any(|existing| existing == &canonical) {
@@ -541,7 +549,7 @@ fn locales_from_value(locales: f64) -> Vec<String> {
541549
// A String argument is treated as a single-element list (not iterated by char).
542550
if js.is_any_string() {
543551
let tag = string_from_string_value(locales).unwrap_or_default();
544-
let Some(canonical) = canonical_locale(&tag) else {
552+
let Some(canonical) = canonicalize_language_tag(&tag) else {
545553
throw_invalid_language_tag(&tag);
546554
};
547555
return vec![canonical];
@@ -568,6 +576,11 @@ fn locales_from_value(locales: f64) -> Vec<String> {
568576
};
569577
let mut out = Vec::with_capacity(len as usize);
570578
for i in 0..len {
579+
// Skip absent indices (`HasProperty` is false) — e.g.
580+
// `{ length: 3, 0: "en" }` yields just `["en"]`, never `undefined`.
581+
if !js_has_index(locales, i) {
582+
continue;
583+
}
571584
push_locale_element(&mut out, get_field(obj, &i.to_string()));
572585
}
573586
return out;
@@ -611,9 +624,14 @@ fn unicode_extension_keyword(locale: &str, key: &str) -> Option<String> {
611624
let lower = locale.to_ascii_lowercase();
612625
let key = key.to_ascii_lowercase();
613626
let mut iter = lower.split('-');
614-
// Advance to the `u` singleton.
627+
// Advance to the `u` singleton. A `x` singleton starts the private-use
628+
// sequence (which must come last); a `u` inside it — e.g. `en-x-u-kn` — is
629+
// private data, not a Unicode extension, so stop scanning there.
615630
let mut in_u = false;
616-
while let Some(p) = iter.next() {
631+
for p in iter.by_ref() {
632+
if p == "x" {
633+
return None;
634+
}
617635
if p == "u" {
618636
in_u = true;
619637
break;
@@ -1202,27 +1220,34 @@ fn make_instance(closure: *const ClosureHeader, kind: &str, locales: f64, option
12021220
// (constructor-options-throwing-getters / resolvedOptions order.js).
12031221
let options = coerce_options_reject_null(options);
12041222
let usage = enum_option_strict(options, "usage", &["sort", "search"], "sort");
1205-
let _ = enum_option_strict(options, "localeMatcher", &["lookup", "best fit"], "best fit");
1206-
// `collation` is a free-form `type` string (RangeError if malformed);
1207-
// it has no CLDR effect here, so it resolves to "default".
1208-
if let Some(collation) = get_option_string_coerced(options, "collation") {
1209-
if !is_well_formed_numbering_system(&collation) {
1223+
let _ = enum_option_strict(
1224+
options,
1225+
"localeMatcher",
1226+
&["lookup", "best fit"],
1227+
"best fit",
1228+
);
1229+
// `collation` is a `type` string: malformed, or the reserved `standard`
1230+
// /`search` values, are a RangeError (the latter are only valid as a
1231+
// `usage` selector, never an explicit collation). A valid value wins
1232+
// over any `-u-co-` keyword; absent ⇒ fall back to the extension.
1233+
let collation_opt = get_option_string_coerced(options, "collation").map(|v| {
1234+
if !is_well_formed_numbering_system(&v) || v == "standard" || v == "search" {
12101235
throw_range_error(&format!(
1211-
"Value {collation} out of range for Intl options property collation"
1236+
"Value {v} out of range for Intl options property collation"
12121237
));
12131238
}
1214-
}
1239+
v
1240+
});
12151241
let numeric_opt = get_bool_option(options, "numeric");
1216-
let case_first_opt =
1217-
get_option_string_coerced(options, "caseFirst").map(|v| {
1218-
if ["upper", "lower", "false"].contains(&v.as_str()) {
1219-
v
1220-
} else {
1221-
throw_range_error(&format!(
1222-
"Value {v} out of range for Intl options property caseFirst"
1223-
))
1224-
}
1225-
});
1242+
let case_first_opt = get_option_string_coerced(options, "caseFirst").map(|v| {
1243+
if ["upper", "lower", "false"].contains(&v.as_str()) {
1244+
v
1245+
} else {
1246+
throw_range_error(&format!(
1247+
"Value {v} out of range for Intl options property caseFirst"
1248+
))
1249+
}
1250+
});
12261251
let sensitivity = enum_option_strict(
12271252
options,
12281253
"sensitivity",
@@ -1233,20 +1258,21 @@ fn make_instance(closure: *const ClosureHeader, kind: &str, locales: f64, option
12331258
// ResolveLocale: when an option is absent, fall back to the matching
12341259
// Unicode (`-u-`) extension keyword in the resolved locale — `kn`
12351260
// (numeric, value-less ⇒ true) and `kf` (caseFirst).
1236-
let numeric = numeric_opt.unwrap_or_else(|| {
1237-
match unicode_extension_keyword(&locale, "kn") {
1261+
let numeric =
1262+
numeric_opt.unwrap_or_else(|| match unicode_extension_keyword(&locale, "kn") {
12381263
Some(v) => v != "false",
12391264
None => false,
1240-
}
1241-
});
1265+
});
12421266
let case_first = case_first_opt.unwrap_or_else(|| {
12431267
unicode_extension_keyword(&locale, "kf")
12441268
.filter(|v| ["upper", "lower", "false"].contains(&v.as_str()))
12451269
.unwrap_or_else(|| "false".to_string())
12461270
});
1247-
let collation = unicode_extension_keyword(&locale, "co")
1248-
.filter(|v| !v.is_empty() && v != "standard" && v != "search")
1249-
.unwrap_or_else(|| "default".to_string());
1271+
let collation = collation_opt.unwrap_or_else(|| {
1272+
unicode_extension_keyword(&locale, "co")
1273+
.filter(|v| !v.is_empty() && v != "standard" && v != "search")
1274+
.unwrap_or_else(|| "default".to_string())
1275+
});
12501276
set_internal_field(obj, KEY_COL_USAGE, string_value(&usage));
12511277
set_internal_field(obj, KEY_COL_SENSITIVITY, string_value(&sensitivity));
12521278
set_internal_field(obj, KEY_COL_IGNORE_PUNCT, bool_value(ignore_punct));
@@ -1270,7 +1296,12 @@ fn make_instance(closure: *const ClosureHeader, kind: &str, locales: f64, option
12701296
// `? ToObject(options)` (null → TypeError), then GetOption in order:
12711297
// localeMatcher, granularity (options-order.js / options-null.js).
12721298
let options = coerce_options_reject_null(options);
1273-
let _ = enum_option_strict(options, "localeMatcher", &["lookup", "best fit"], "best fit");
1299+
let _ = enum_option_strict(
1300+
options,
1301+
"localeMatcher",
1302+
&["lookup", "best fit"],
1303+
"best fit",
1304+
);
12741305
let granularity =
12751306
normalize_granularity(get_option_string_coerced(options, "granularity"));
12761307
set_internal_field(obj, KEY_GRANULARITY, string_value(&granularity));
@@ -1292,7 +1323,12 @@ fn make_instance(closure: *const ClosureHeader, kind: &str, locales: f64, option
12921323
// TypeError), then GetOption: localeMatcher, type, style
12931324
// (options-getoptionsobject.js / options-order.js).
12941325
let options = get_options_object(options);
1295-
let _ = enum_option_strict(options, "localeMatcher", &["lookup", "best fit"], "best fit");
1326+
let _ = enum_option_strict(
1327+
options,
1328+
"localeMatcher",
1329+
&["lookup", "best fit"],
1330+
"best fit",
1331+
);
12961332
let list_type = enum_option_strict(
12971333
options,
12981334
"type",
@@ -1325,7 +1361,12 @@ fn make_instance(closure: *const ClosureHeader, kind: &str, locales: f64, option
13251361
// `? ToObject(options)` (null → TypeError), then GetOption in order:
13261362
// localeMatcher, numberingSystem, style, numeric (options-order.js).
13271363
let options = coerce_options_reject_null(options);
1328-
let _ = enum_option_strict(options, "localeMatcher", &["lookup", "best fit"], "best fit");
1364+
let _ = enum_option_strict(
1365+
options,
1366+
"localeMatcher",
1367+
&["lookup", "best fit"],
1368+
"best fit",
1369+
);
13291370
if let Some(ns) = get_option_string_coerced(options, "numberingSystem") {
13301371
if !is_well_formed_numbering_system(&ns) {
13311372
throw_range_error(&format!(
@@ -1358,7 +1399,12 @@ fn make_instance(closure: *const ClosureHeader, kind: &str, locales: f64, option
13581399
// (minimumIntegerDigits, min/maxFractionDigits, min/maxSignificantDigits,
13591400
// roundingIncrement, roundingMode, roundingPriority, trailingZeroDisplay).
13601401
let options = get_options_object(options);
1361-
let _ = enum_option_strict(options, "localeMatcher", &["lookup", "best fit"], "best fit");
1402+
let _ = enum_option_strict(
1403+
options,
1404+
"localeMatcher",
1405+
&["lookup", "best fit"],
1406+
"best fit",
1407+
);
13621408
let pr_type = enum_option_strict(options, "type", &["cardinal", "ordinal"], "cardinal");
13631409
set_internal_field(obj, KEY_TYPE, string_value(&pr_type));
13641410
let notation = enum_option_strict(
@@ -1514,17 +1560,26 @@ extern "C" fn plural_rules_constructor_thunk(closure: *const ClosureHeader, rest
15141560
}
15151561

15161562
fn supported_locales_array(locales: f64, options: f64) -> f64 {
1517-
// SupportedLocales: when `options` is not undefined, `? ToObject(options)`
1518-
// (null → TypeError) then `? GetOption(options, "localeMatcher", …)` — so an
1519-
// invalid localeMatcher is a RangeError even though the matcher choice does
1520-
// not affect Perry's lookup result.
1563+
// `supportedLocalesOf(locales, options)`:
1564+
// 1. requestedLocales = ? CanonicalizeLocaleList(locales) ← runs FIRST,
1565+
// so a malformed locale errors before `options` is touched.
1566+
// 2. SupportedLocales(..., options): when `options` is not undefined,
1567+
// `? ToObject(options)` (null → TypeError) then
1568+
// `? GetOption(options, "localeMatcher", …)` — an invalid localeMatcher
1569+
// is a RangeError even though the matcher choice does not affect Perry's
1570+
// lookup result.
1571+
let requested = locales_from_value(locales);
15211572
if !JSValue::from_bits(options.to_bits()).is_undefined() {
15221573
let options = coerce_options_reject_null(options);
1523-
let _ = enum_option_strict(options, "localeMatcher", &["lookup", "best fit"], "best fit");
1574+
let _ = enum_option_strict(
1575+
options,
1576+
"localeMatcher",
1577+
&["lookup", "best fit"],
1578+
"best fit",
1579+
);
15241580
}
15251581
// BestAvailableLocale-filter the canonicalized request list: drop tags whose
15261582
// primary language Perry can't service (e.g. `zxx`), keeping order + dedup.
1527-
let requested = locales_from_value(locales);
15281583
let mut arr = js_array_alloc(0);
15291584
for locale in requested {
15301585
if is_available_locale(&locale) {

crates/perry-runtime/src/intl/date_collator.rs

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,9 @@ pub(crate) fn swedish_collation_key(s: &str) -> Vec<u32> {
350350
#[cfg(feature = "string-normalize")]
351351
fn collation_normalize(s: &str) -> String {
352352
use unicode_normalization::UnicodeNormalization;
353-
s.nfd().collect()
353+
// NFC (composition), not NFD: it makes canonical equivalents equal while
354+
// keeping precomposed `å/ä/ö` intact for the Swedish fast path below.
355+
s.nfc().collect()
354356
}
355357
#[cfg(not(feature = "string-normalize"))]
356358
fn collation_normalize(s: &str) -> String {
@@ -401,20 +403,23 @@ fn strip_ignorable_punctuation(s: &str) -> String {
401403
}
402404

403405
fn is_punctuation(c: char) -> bool {
404-
// ASCII punctuation plus the common Unicode punctuation/symbol ranges that
405-
// UCA marks variable; a pragmatic superset of what the parity tests exercise.
406+
// ASCII punctuation plus an explicit set of Unicode punctuation code points,
407+
// deliberately NOT whole Latin-1 ranges — those contain letters/numbers
408+
// (`ª` U+00AA, `µ` U+00B5, `º` U+00BA, the `¹²³` superscripts, `¼½¾`
409+
// fractions) that must not be stripped or distinct strings would compare
410+
// equal. The General Punctuation block (U+2000–U+206F) and CJK punctuation
411+
// (U+3000–U+303F) are all punctuation/spaces and are safe as ranges.
406412
c.is_ascii_punctuation()
407413
|| matches!(c,
408-
'\u{00A1}'..='\u{00BF}'
409-
| '\u{2010}'..='\u{2027}'
410-
| '\u{2030}'..='\u{205E}'
414+
'\u{00A1}' | '\u{00A7}' | '\u{00AB}' | '\u{00B6}' | '\u{00B7}'
415+
| '\u{00BB}' | '\u{00BF}'
416+
| '\u{2000}'..='\u{206F}'
411417
| '\u{3000}'..='\u{303F}')
412418
}
413419

414420
pub(crate) fn collator_compare_object(obj: *const ObjectHeader, left: f64, right: f64) -> f64 {
415421
let locale = get_string_field(obj, KEY_LOCALE).unwrap_or_else(|| "en-US".to_string());
416-
let ignore_punct =
417-
get_field(obj, KEY_COL_IGNORE_PUNCT).to_bits() == crate::value::TAG_TRUE;
422+
let ignore_punct = get_field(obj, KEY_COL_IGNORE_PUNCT).to_bits() == crate::value::TAG_TRUE;
418423
let (mut l, mut r) = (value_to_string(left), value_to_string(right));
419424
if ignore_punct {
420425
l = strip_ignorable_punctuation(&l);

crates/perry-runtime/src/intl/list_relative_plural.rs

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -301,19 +301,30 @@ pub(crate) fn rtf_parts(value: f64, unit: &str) -> Vec<(&'static str, String)> {
301301
parts
302302
}
303303

304+
/// `ToNumber(value)` that rejects BigInt with a TypeError, matching the
305+
/// ECMA-262 abstract operation. `js_number_coerce` alone converts `1n` → `1`
306+
/// (for `Number(1n)`), but `Intl` `format`/`select*` go through ToNumber, so
307+
/// `format(1n, "day")` must throw. A Symbol still throws inside `js_number_coerce`,
308+
/// and an object's `valueOf` is honoured there.
309+
pub(crate) fn to_number_reject_bigint(value: f64) -> f64 {
310+
if JSValue::from_bits(value.to_bits()).is_bigint() {
311+
throw_type_error("Cannot convert a BigInt value to a number");
312+
}
313+
crate::builtins::js_number_coerce(value)
314+
}
315+
304316
/// Shared steps of `format`/`formatToParts`: `value = ? ToNumber(value)` (a
305-
/// Symbol throws TypeError; an object's `valueOf` is honoured), then
317+
/// Symbol or BigInt throws TypeError; an object's `valueOf` is honoured), then
306318
/// `unit = ? ToString(unit)`, then the RangeError guards for a non-finite value
307319
/// or an unsanctioned unit. Returns the rendered parts together with the
308320
/// resolved singular `unit` (the `[[Unit]]` field formatToParts attaches).
309321
pub(crate) fn rtf_instance_parts_and_unit(
310322
value: f64,
311323
unit_arg: f64,
312324
) -> (Vec<(&'static str, String)>, &'static str) {
313-
// Full ToNumber (not `JSValue::to_number`, which returns NaN for objects and
314-
// doesn't reject Symbols): valueOf is invoked, and a Symbol value throws the
315-
// expected TypeError *before* the finite-ness RangeError (format/value-symbol.js).
316-
let number = crate::builtins::js_number_coerce(value);
325+
// ToNumber: a Symbol/BigInt value throws TypeError *before* the finite-ness
326+
// RangeError (format/value-symbol.js); an object's valueOf is invoked.
327+
let number = to_number_reject_bigint(value);
317328
let unit_str = value_to_string(unit_arg);
318329
if !number.is_finite() {
319330
throw_range_error("Value need to be finite number for Intl.RelativeTimeFormat.format()");
@@ -513,8 +524,8 @@ pub(crate) fn plural_select_range(start: f64, end: f64) -> f64 {
513524
{
514525
throw_type_error("Intl.PluralRules.prototype.selectRange: start and end must be defined");
515526
}
516-
let s = crate::builtins::js_number_coerce(start);
517-
let e = crate::builtins::js_number_coerce(end);
527+
let s = to_number_reject_bigint(start);
528+
let e = to_number_reject_bigint(end);
518529
if s.is_nan() || e.is_nan() {
519530
throw_range_error("Invalid values for Intl.PluralRules.selectRange()");
520531
}
@@ -535,16 +546,16 @@ pub(crate) fn plural_rules_resolved_options_object(obj: *const ObjectHeader) ->
535546
"type",
536547
string_value(if is_ordinal { "ordinal" } else { "cardinal" }),
537548
);
538-
let notation =
539-
get_string_field(obj, KEY_PR_NOTATION).unwrap_or_else(|| "standard".to_string());
549+
let notation = get_string_field(obj, KEY_PR_NOTATION).unwrap_or_else(|| "standard".to_string());
540550
set_field(out, "notation", string_value(&notation));
541551
// `compactDisplay` surfaces only when notation is "compact".
542552
if notation == "compact" {
543553
set_field(
544554
out,
545555
"compactDisplay",
546556
string_value(
547-
&get_string_field(obj, KEY_PR_COMPACT_DISPLAY).unwrap_or_else(|| "short".to_string()),
557+
&get_string_field(obj, KEY_PR_COMPACT_DISPLAY)
558+
.unwrap_or_else(|| "short".to_string()),
548559
),
549560
);
550561
}

0 commit comments

Comments
 (0)