|
3 | 3 | // Case_Ignorable : 1063 bytes, 2789 codepoints in 459 ranges (U+0000A8 - U+0E01F0) using skiplist |
4 | 4 | // Cf : 87 bytes, 170 codepoints in 21 ranges (U+0000AD - U+0E0080) using skiplist |
5 | 5 | // Cn_Planes_0_3 : 1677 bytes, 94165 codepoints in 730 ranges (U+000378 - U+03FFFE) using skiplist |
| 6 | +// Default_Ignorable_Code_Point: 83 bytes, 4174 codepoints in 17 ranges (U+0000AD - U+0E1000) using skiplist |
6 | 7 | // Grapheme_Extend : 899 bytes, 2232 codepoints in 383 ranges (U+000300 - U+0E01F0) using skiplist |
7 | 8 | // Lowercase : 943 bytes, 2569 codepoints in 676 ranges (U+0000AA - U+01E944) using bitset |
8 | 9 | // Lt : 33 bytes, 31 codepoints in 10 ranges (U+0001C5 - U+001FFD) using skiplist |
|
12 | 13 | // to_lower : 1112 bytes, 1462 codepoints in 185 ranges (U+0000C0 - U+01E921) using 2-level LUT |
13 | 14 | // to_upper : 1998 bytes, 1554 codepoints in 299 ranges (U+0000B5 - U+01E943) using 2-level LUT |
14 | 15 | // to_title : 340 bytes, 135 codepoints in 49 ranges (U+0000DF - U+00FB17) using 2-level LUT |
15 | | -// Total : 11393 bytes |
| 16 | +// Total : 11476 bytes |
16 | 17 |
|
17 | 18 | #[inline(always)] |
18 | 19 | const fn bitset_search< |
@@ -488,6 +489,44 @@ pub mod cn_planes_0_3 { |
488 | 489 | } |
489 | 490 | } |
490 | 491 |
|
| 492 | +#[rustfmt::skip] |
| 493 | +pub mod default_ignorable_code_point { |
| 494 | + use super::ShortOffsetRunHeader; |
| 495 | + |
| 496 | + static SHORT_OFFSET_RUNS: [ShortOffsetRunHeader; 12] = [ |
| 497 | + ShortOffsetRunHeader::new(0, 847), ShortOffsetRunHeader::new(3, 1564), |
| 498 | + ShortOffsetRunHeader::new(5, 4447), ShortOffsetRunHeader::new(7, 6068), |
| 499 | + ShortOffsetRunHeader::new(9, 8203), ShortOffsetRunHeader::new(13, 12644), |
| 500 | + ShortOffsetRunHeader::new(19, 65024), ShortOffsetRunHeader::new(21, 113824), |
| 501 | + ShortOffsetRunHeader::new(29, 119155), ShortOffsetRunHeader::new(31, 917504), |
| 502 | + ShortOffsetRunHeader::new(33, 921600), ShortOffsetRunHeader::new(34, 2035712), |
| 503 | + ]; |
| 504 | + static OFFSETS: [u8; 35] = [ |
| 505 | + 173, 1, 0, 1, 0, 1, 0, 2, 0, 2, 85, 5, 0, 5, 26, 5, 49, 16, 0, 1, 0, 16, 239, 1, 160, 1, |
| 506 | + 79, 9, 0, 4, 0, 8, 0, 0, 0, |
| 507 | + ]; |
| 508 | + #[inline] |
| 509 | + pub fn lookup(c: char) -> bool { |
| 510 | + debug_assert!(!c.is_ascii()); |
| 511 | + (c as u32) >= 0xad && lookup_slow(c) |
| 512 | + } |
| 513 | + |
| 514 | + #[inline(never)] |
| 515 | + fn lookup_slow(c: char) -> bool { |
| 516 | + const { |
| 517 | + assert!(SHORT_OFFSET_RUNS.last().unwrap().0 > char::MAX as u32); |
| 518 | + let mut i = 0; |
| 519 | + while i < SHORT_OFFSET_RUNS.len() { |
| 520 | + assert!(SHORT_OFFSET_RUNS[i].start_index() < OFFSETS.len()); |
| 521 | + i += 1; |
| 522 | + } |
| 523 | + } |
| 524 | + // SAFETY: We just ensured the last element of `SHORT_OFFSET_RUNS` is greater than `std::char::MAX` |
| 525 | + // and the start indices of all elements in `SHORT_OFFSET_RUNS` are smaller than `OFFSETS.len()`. |
| 526 | + unsafe { super::skip_search(c, &SHORT_OFFSET_RUNS, &OFFSETS) } |
| 527 | + } |
| 528 | +} |
| 529 | + |
491 | 530 | #[rustfmt::skip] |
492 | 531 | pub mod grapheme_extend { |
493 | 532 | use super::ShortOffsetRunHeader; |
|
0 commit comments