|
1 | 1 | const sanitizer = require('../../../sanitizer/_text')(); |
| 2 | +const unicode = require('../../../helper/unicode'); |
2 | 3 |
|
3 | 4 | module.exports.tests = {}; |
4 | 5 |
|
@@ -165,6 +166,28 @@ it again and again until we reach our destination.` }; |
165 | 166 | t.deepEquals(messages.warnings, []); |
166 | 167 | t.end(); |
167 | 168 | }); |
| 169 | + |
| 170 | + test('truncate should be unicode aware', (t) => { |
| 171 | + const grapheme = '\uD842\uDFB7'; |
| 172 | + const raw = { text: grapheme.repeat(200) }; |
| 173 | + const clean = {}; |
| 174 | + const messages = sanitizer.sanitize(raw, clean); |
| 175 | + |
| 176 | + // sanity: fixture genuinely distinguishes code units from graphemes |
| 177 | + t.equals(grapheme.length, 2, 'fixture is a surrogate pair (2 code units)'); |
| 178 | + t.equals([...grapheme].length, 1, 'fixture is one code point'); |
| 179 | + t.equals(grapheme.normalize('NFC'), grapheme, 'fixture is NFC-stable'); |
| 180 | + |
| 181 | + // truncated text is 140 graphemes (user-perceived characters), |
| 182 | + t.equals(clean.text, grapheme.repeat(140), 'truncated correctly'); |
| 183 | + |
| 184 | + // text.length on the truncated result is 280 (140 × 2 code units), |
| 185 | + t.equals(clean.text.length, 280, 'truncated string is 280 UTF-16 code units'); |
| 186 | + |
| 187 | + t.deepEquals(messages.errors, [], 'no errors'); |
| 188 | + t.deepEquals(messages.warnings, [`param 'text' truncated to 140 characters`]); |
| 189 | + t.end(); |
| 190 | + }); |
168 | 191 | }; |
169 | 192 |
|
170 | 193 | module.exports.all = (tape, common) => { |
|
0 commit comments