diff --git a/README.md b/README.md index 972f3c69..c5962007 100644 --- a/README.md +++ b/README.md @@ -154,13 +154,44 @@ import { utf16fromString, utf16toString } from '@exodus/bytes/utf16.js' import { utf16fromStringLoose, utf16toStringLoose } from '@exodus/bytes/utf16.js' ``` -_These methods by design encode/decode BOM (codepoint `U+FEFF` Byte Order Mark) as-is._ +_These methods by design encode/decode BOM (codepoint `U+FEFF` Byte Order Mark) as-is._\ _If you need BOM handling or detection, use `@exodus/bytes/encoding.js`_ #### `utf16fromString(string, format = 'uint16')` + +Encode a string to UTF-16 bytes (strict mode) + +Throws on invalid Unicode (unpaired surrogates) + #### `utf16fromStringLoose(string, format = 'uint16')` -#### `utf16toString(arr, 'uint16')` -#### `utf16toStringLoose(arr, 'uint16')` + +Encode a string to UTF-16 bytes (loose mode) + +Replaces invalid Unicode (unpaired surrogates) with replacement codepoints `U+FFFD` +per [WHATWG Encoding](https://encoding.spec.whatwg.org/) specification. + +_Such replacement is a non-injective function, is irreversible and causes collisions.\ +Prefer using strict throwing methods for cryptography applications._ + +#### `utf16toString(arr, format = 'uint16')` + +Decode UTF-16 bytes to a string (strict mode) + +Throws on invalid UTF-16 byte sequences + +Throws on non-even byte length. + +#### `utf16toStringLoose(arr, format = 'uint16')` + +Decode UTF-16 bytes to a string (loose mode) + +Replaces invalid UTF-16 byte sequences with replacement codepoints `U+FFFD` +per [WHATWG Encoding](https://encoding.spec.whatwg.org/) specification. + +_Such replacement is a non-injective function, is irreversible and causes collisions.\ +Prefer using strict throwing methods for cryptography applications._ + +Throws on non-even byte length. ### `@exodus/bytes/single-byte.js` diff --git a/array.d.ts b/array.d.ts index 81dc817b..c299a76a 100644 --- a/array.d.ts +++ b/array.d.ts @@ -15,6 +15,7 @@ // < TypeScript 5.7 doesn't support templates for Uint8Array. // So this type is defined as a workaround to evaluate to Uint8Array on all versions of TypeScript. export type Uint8ArrayBuffer = ReturnType; +export type Uint16ArrayBuffer = ReturnType; /** * Output format for typed array conversions diff --git a/package.json b/package.json index bcb87dd2..854500d7 100644 --- a/package.json +++ b/package.json @@ -110,6 +110,7 @@ "/single-byte.d.ts", "/single-byte.node.js", "/utf16.js", + "/utf16.d.ts", "/utf16.node.js", "/utf8.js", "/utf8.d.ts", @@ -176,6 +177,7 @@ "default": "./encoding-browser.js" }, "./utf16.js": { + "types": "./utf16.d.ts", "node": "./utf16.node.js", "default": "./utf16.js" }, diff --git a/utf16.d.ts b/utf16.d.ts new file mode 100644 index 00000000..62550f5f --- /dev/null +++ b/utf16.d.ts @@ -0,0 +1,92 @@ +/** + * UTF-16 encoding/decoding + * + * ```js + * import { utf16fromString, utf16toString } from '@exodus/bytes/utf16.js' + * + * // loose + * import { utf16fromStringLoose, utf16toStringLoose } from '@exodus/bytes/utf16.js' + * ``` + * + * _These methods by design encode/decode BOM (codepoint `U+FEFF` Byte Order Mark) as-is._\ + * _If you need BOM handling or detection, use `@exodus/bytes/encoding.js`_ + * + * @module @exodus/bytes/utf16.js + */ + +/// + +import type { Uint8ArrayBuffer, Uint16ArrayBuffer } from './array.js'; + +/** + * Output format for UTF-16 encoding + */ +export type Utf16Format = 'uint16' | 'uint8-le' | 'uint8-be'; + +/** + * Encode a string to UTF-16 bytes (strict mode) + * + * Throws on invalid Unicode (unpaired surrogates) + * + * @param string - The string to encode + * @param format - Output format (default: 'uint16') + * @returns The encoded bytes + */ +export function utf16fromString(string: string, format?: 'uint16'): Uint16ArrayBuffer; +export function utf16fromString(string: string, format: 'uint8-le'): Uint8ArrayBuffer; +export function utf16fromString(string: string, format: 'uint8-be'): Uint8ArrayBuffer; +export function utf16fromString(string: string, format?: Utf16Format): Uint16ArrayBuffer | Uint8ArrayBuffer; + +/** + * Encode a string to UTF-16 bytes (loose mode) + * + * Replaces invalid Unicode (unpaired surrogates) with replacement codepoints `U+FFFD` + * per [WHATWG Encoding](https://encoding.spec.whatwg.org/) specification. + * + * _Such replacement is a non-injective function, is irreversible and causes collisions.\ + * Prefer using strict throwing methods for cryptography applications._ + * + * @param string - The string to encode + * @param format - Output format (default: 'uint16') + * @returns The encoded bytes + */ +export function utf16fromStringLoose(string: string, format?: 'uint16'): Uint16ArrayBuffer; +export function utf16fromStringLoose(string: string, format: 'uint8-le'): Uint8ArrayBuffer; +export function utf16fromStringLoose(string: string, format: 'uint8-be'): Uint8ArrayBuffer; +export function utf16fromStringLoose(string: string, format?: Utf16Format): Uint16ArrayBuffer | Uint8ArrayBuffer; + +/** + * Decode UTF-16 bytes to a string (strict mode) + * + * Throws on invalid UTF-16 byte sequences + * + * Throws on non-even byte length. + * + * @param arr - The bytes to decode + * @param format - Input format (default: 'uint16') + * @returns The decoded string + */ +export function utf16toString(arr: Uint16ArrayBuffer, format?: 'uint16'): string; +export function utf16toString(arr: Uint8ArrayBuffer, format: 'uint8-le'): string; +export function utf16toString(arr: Uint8ArrayBuffer, format: 'uint8-be'): string; +export function utf16toString(arr: Uint16ArrayBuffer | Uint8ArrayBuffer, format?: Utf16Format): string; + +/** + * Decode UTF-16 bytes to a string (loose mode) + * + * Replaces invalid UTF-16 byte sequences with replacement codepoints `U+FFFD` + * per [WHATWG Encoding](https://encoding.spec.whatwg.org/) specification. + * + * _Such replacement is a non-injective function, is irreversible and causes collisions.\ + * Prefer using strict throwing methods for cryptography applications._ + * + * Throws on non-even byte length. + * + * @param arr - The bytes to decode + * @param format - Input format (default: 'uint16') + * @returns The decoded string + */ +export function utf16toStringLoose(arr: Uint16ArrayBuffer, format?: 'uint16'): string; +export function utf16toStringLoose(arr: Uint8ArrayBuffer, format: 'uint8-le'): string; +export function utf16toStringLoose(arr: Uint8ArrayBuffer, format: 'uint8-be'): string; +export function utf16toStringLoose(arr: Uint16ArrayBuffer | Uint8ArrayBuffer, format?: Utf16Format): string;