Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 1 addition & 8 deletions lib/internal/encoding.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
const kDecoder = Symbol('decoder');
const kFatal = Symbol('kFatal');
const kUTF8FastPath = Symbol('kUTF8FastPath');
const kLatin1FastPath = Symbol('kLatin1FastPath');
const kIgnoreBOM = Symbol('kIgnoreBOM');

const {
Expand All @@ -55,7 +54,7 @@
encodeIntoResults,
encodeUtf8String,
decodeUTF8,
decodeLatin1,

Check failure on line 57 in lib/internal/encoding.js

View workflow job for this annotation

GitHub Actions / lint-js-and-md

'decodeLatin1' is assigned a value but never used
} = binding;

const { Buffer } = require('buffer');
Expand Down Expand Up @@ -420,10 +419,9 @@
this[kFatal] = Boolean(options?.fatal);
// Only support fast path for UTF-8.
this[kUTF8FastPath] = enc === 'utf-8';
this[kLatin1FastPath] = enc === 'windows-1252';
this[kHandle] = undefined;

if (!this[kUTF8FastPath] && !this[kLatin1FastPath]) {
if (!this[kUTF8FastPath]) {
this.#prepareConverter();
}
}
Expand All @@ -440,16 +438,11 @@
validateDecoder(this);

this[kUTF8FastPath] &&= !(options?.stream);
this[kLatin1FastPath] &&= !(options?.stream);

if (this[kUTF8FastPath]) {
return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]);
}

if (this[kLatin1FastPath]) {
return decodeLatin1(input, this[kIgnoreBOM], this[kFatal]);
Comment thread
yashwantbezawada marked this conversation as resolved.
}

this.#prepareConverter();

validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
Expand Down
85 changes: 85 additions & 0 deletions test/parallel/test-whatwg-encoding-custom-windows-1252.js
Comment thread
Renegade334 marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
'use strict';

// Tests for Windows-1252 encoding, specifically the 0x80-0x9F range
// where it differs from ISO-8859-1 (Latin-1).
// Refs: https://github.com/nodejs/node/issues/56542
// Refs: https://encoding.spec.whatwg.org/#windows-1252

require('../common');

const assert = require('assert');

// Test specific case from issue #56542
{
const decoder = new TextDecoder('windows-1252');
const decoded = decoder.decode(new Uint8Array([0x92]));
assert.strictEqual(
decoded.charCodeAt(0),
0x2019,
'Byte 0x92 should decode to U+2019 (') not U+0092'
);
assert.strictEqual(decoded, '\u2019', 'Expected right single quotation mark');
}

// Test all 32 characters in the 0x80-0x9F range where Windows-1252
// differs from ISO-8859-1. These mappings are defined by the WHATWG
// Encoding Standard.
// Source: https://encoding.spec.whatwg.org/#index-windows-1252
{
const testCases = [
[0x80, 0x20AC, '€'], // EURO SIGN
[0x81, 0x0081, '\u0081'], // Undefined (maps to itself)
[0x82, 0x201A, '‚'], // SINGLE LOW-9 QUOTATION MARK
[0x83, 0x0192, 'ƒ'], // LATIN SMALL LETTER F WITH HOOK
[0x84, 0x201E, '„'], // DOUBLE LOW-9 QUOTATION MARK
[0x85, 0x2026, '…'], // HORIZONTAL ELLIPSIS
[0x86, 0x2020, '†'], // DAGGER
[0x87, 0x2021, '‡'], // DOUBLE DAGGER
[0x88, 0x02C6, 'ˆ'], // MODIFIER LETTER CIRCUMFLEX ACCENT
[0x89, 0x2030, '‰'], // PER MILLE SIGN
[0x8A, 0x0160, 'Š'], // LATIN CAPITAL LETTER S WITH CARON
[0x8B, 0x2039, '‹'], // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
[0x8C, 0x0152, 'Œ'], // LATIN CAPITAL LIGATURE OE
[0x8D, 0x008D, '\u008D'], // Undefined (maps to itself)
[0x8E, 0x017D, 'Ž'], // LATIN CAPITAL LETTER Z WITH CARON
[0x8F, 0x008F, '\u008F'], // Undefined (maps to itself)
[0x90, 0x0090, '\u0090'], // Undefined (maps to itself)
[0x91, 0x2018, '''], // LEFT SINGLE QUOTATION MARK
[0x92, 0x2019, '''], // RIGHT SINGLE QUOTATION MARK
[0x93, 0x201C, '"'], // LEFT DOUBLE QUOTATION MARK
[0x94, 0x201D, '"'], // RIGHT DOUBLE QUOTATION MARK
[0x95, 0x2022, '•'], // BULLET
[0x96, 0x2013, '–'], // EN DASH
[0x97, 0x2014, '—'], // EM DASH
[0x98, 0x02DC, '˜'], // SMALL TILDE
[0x99, 0x2122, '™'], // TRADE MARK SIGN
[0x9A, 0x0161, 'š'], // LATIN SMALL LETTER S WITH CARON
[0x9B, 0x203A, '›'], // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
[0x9C, 0x0153, 'œ'], // LATIN SMALL LIGATURE OE
[0x9D, 0x009D, '\u009D'], // Undefined (maps to itself)
[0x9E, 0x017E, 'ž'], // LATIN SMALL LETTER Z WITH CARON
[0x9F, 0x0178, 'Ÿ'], // LATIN CAPITAL LETTER Y WITH DIAERESIS
];

const decoder = new TextDecoder('windows-1252');

for (const [byte, expectedCodePoint, expectedChar] of testCases) {
const decoded = decoder.decode(new Uint8Array([byte]));
const actualCodePoint = decoded.charCodeAt(0);

assert.strictEqual(
actualCodePoint,
expectedCodePoint,
`Byte 0x${byte.toString(16).toUpperCase()} should decode to ` +
`U+${expectedCodePoint.toString(16).toUpperCase().padStart(4, '0')} ` +
`but got U+${actualCodePoint.toString(16).toUpperCase().padStart(4, '0')}`
);

assert.strictEqual(
decoded,
expectedChar,
`Byte 0x${byte.toString(16).toUpperCase()} should decode to ` +
`${expectedChar} but got ${decoded}`
);
}
}
Loading