Skip to content

Commit ab09b17

Browse files
Replace printables table with unicode_data.rs tables
This gets rid of the `printable.py` script, ensuring that `unicode-table-generator` handles all our Unicode data table generation needs. There are also some drive-by documentation improvements in `library/core/char/methods.rs`. There is one change in behavior: we now consider all characters with the `Default_Ignorable_Code_Point` property to be unprintable. These characters can be hidden/invisible otherwise. I've elected to give each Unicode property its own table, instead of merging them all into one. This is slightly less efficient in terms of space, but should allow us to expose these tables in the future with public methods on `char`.
1 parent 38799a3 commit ab09b17

13 files changed

Lines changed: 830 additions & 1025 deletions

File tree

library/core/src/char/methods.rs

Lines changed: 268 additions & 115 deletions
Large diffs are not rendered by default.

library/core/src/fmt/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2941,7 +2941,7 @@ impl Debug for str {
29412941
let mut chars = rest.chars();
29422942
if let Some(c) = chars.next() {
29432943
let esc = c.escape_debug_ext(EscapeDebugExtArgs {
2944-
escape_grapheme_extended: true,
2944+
escape_grapheme_extender: true,
29452945
escape_single_quote: false,
29462946
escape_double_quote: true,
29472947
});
@@ -2973,7 +2973,7 @@ impl Debug for char {
29732973
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
29742974
f.write_char('\'')?;
29752975
let esc = self.escape_debug_ext(EscapeDebugExtArgs {
2976-
escape_grapheme_extended: true,
2976+
escape_grapheme_extender: true,
29772977
escape_single_quote: true,
29782978
escape_double_quote: false,
29792979
});

library/core/src/str/lossy.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ impl fmt::Debug for Debug<'_> {
123123
let mut from = 0;
124124
for (i, c) in valid.char_indices() {
125125
let esc = c.escape_debug_ext(EscapeDebugExtArgs {
126-
escape_grapheme_extended: true,
126+
escape_grapheme_extender: true,
127127
escape_single_quote: false,
128128
escape_double_quote: true,
129129
});

library/core/src/str/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3195,7 +3195,7 @@ impl_fn_for_zst! {
31953195
#[derive(Clone)]
31963196
struct CharEscapeDebugContinue impl Fn = |c: char| -> char::EscapeDebug {
31973197
c.escape_debug_ext(EscapeDebugExtArgs {
3198-
escape_grapheme_extended: false,
3198+
escape_grapheme_extender: false,
31993199
escape_single_quote: true,
32003200
escape_double_quote: true
32013201
})

library/core/src/unicode/mod.rs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,16 @@ pub use unicode_data::conversions;
99
#[rustfmt::skip]
1010
pub(crate) use unicode_data::alphabetic::lookup as Alphabetic;
1111
pub(crate) use unicode_data::case_ignorable::lookup as Case_Ignorable;
12+
pub(crate) use unicode_data::cf::lookup as Cf;
13+
pub(crate) use unicode_data::cn_planes_0_3::lookup as Cn_planes_0_3;
14+
pub(crate) use unicode_data::default_ignorable_code_point::lookup as Default_Ignorable_Code_Point;
1215
pub(crate) use unicode_data::grapheme_extend::lookup as Grapheme_Extend;
1316
pub(crate) use unicode_data::lowercase::lookup as Lowercase;
1417
pub(crate) use unicode_data::lt::lookup as Lt;
1518
pub(crate) use unicode_data::n::lookup as N;
1619
pub(crate) use unicode_data::uppercase::lookup as Uppercase;
1720
pub(crate) use unicode_data::white_space::lookup as White_Space;
1821

19-
pub(crate) mod printable;
20-
2122
#[allow(unreachable_pub)]
2223
pub mod unicode_data;
2324

@@ -27,8 +28,13 @@ pub mod unicode_data;
2728
/// New versions of Unicode are released regularly and subsequently all methods
2829
/// in the standard library depending on Unicode are updated. Therefore the
2930
/// behavior of some `char` and `str` methods and the value of this constant
30-
/// changes over time. This is *not* considered to be a breaking change.
31+
/// changes over time, within the boundaries of Unicode's [stability policies].
32+
/// This is *not* considered to be a breaking change.
33+
///
34+
/// [stability policies]: https://www.unicode.org/policies/stability_policy.html
3135
///
3236
/// The version numbering scheme is explained in
33-
/// [Unicode 11.0 or later, Section 3.1 Versions of the Unicode Standard](https://www.unicode.org/versions/Unicode11.0.0/ch03.pdf#page=4).
37+
/// [Section 3.1 (Version Numbering)] of the Unicode Standard.
38+
///
39+
/// [Section 3.1 (Version Numbering)]: https://www.unicode.org/versions/latest/core-spec/chapter-3/#G49512
3440
pub const UNICODE_VERSION: (u8, u8, u8) = unicode_data::UNICODE_VERSION;

library/core/src/unicode/printable.py

Lines changed: 0 additions & 258 deletions
This file was deleted.

0 commit comments

Comments
 (0)