Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 147 additions & 12 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
export type DType =
| "i1" | "u1" | "i2" | "u2" | "i4" | "u4" | "i8" | "u8"
| "f2" | "f4" | "f8" | "b1";
| "f2" | "f4" | "f8" | "b1" | `U${number}`; // e.g., U10 for strings of length 10

export type TypedArray =
| Int8Array
Expand All @@ -27,6 +27,15 @@
convertFloat16?: boolean;
}

class StringFromCodePoint extends String {
constructor(buf: ArrayBufferLike, byteOffset?: number, length?: number) {
const uint32 = new Uint32Array(buf, byteOffset, length);
const number_arr = Array.from(uint32);
const str = String.fromCodePoint(...number_arr);
super(str);
}
}

const textDecoder = new TextDecoder("latin1");

function readHeader(buf: ArrayBufferLike) {
Expand Down Expand Up @@ -64,6 +73,17 @@
function dtypeToArray(dtype: string, buf: ArrayBufferLike, offset: number, opts: Options) {
const little = dtype.startsWith("<") || dtype.startsWith("|"); // | = not applicable
const code = dtype.substring(dtype.length -2); // e.g., 'f8', 'i8'
//parse unicode dtype. The format is a 'U' character followed by a number that is the number of unicode characters in the string
if (code[0] === "U") {
const size = parseInt(code.substring(1))
const _string = String(new StringFromCodePoint(buf, offset));
const strings : string[] = [];
//split the string into an array of strings with length dtype.size
for (let i = 0; i < _string.length; i += size) {
strings.push(_string.substring(i, i + size).replace(/\0/g, ''));
}
return strings;
}
switch (code) {
case "b1": return new Uint8Array(buf, offset);
case "i1": return new Int8Array(buf, offset);
Expand Down Expand Up @@ -99,13 +119,15 @@
return (s ? -1 : 1) * Math.pow(2, e - 15) * (1 + f / Math.pow(2, 10));
}

export async function load(source: string | ArrayBuffer | ArrayBufferView, opts: Options = {}): Promise<NpyArray> {
export async function load(source: string | ArrayBuffer | ArrayBufferView | Blob, opts: Options = {}): Promise<NpyArray> {
let buf: ArrayBufferLike;
if (typeof source === "string") {
const res = await fetch(source);
buf = await res.arrayBuffer();
} else if (source instanceof ArrayBuffer) {
buf = source;
} else if (source instanceof Blob) {
buf = await source.arrayBuffer();
} else {
buf = source.buffer;
}
Expand All @@ -121,7 +143,7 @@
const dataOffset = headerOffset + headerLen;
const data = dtypeToArray(dtype, buf, dataOffset, opts);

return { data, shape, dtype: dtype.slice(1) as DType, fortranOrder };

Check failure on line 146 in src/index.ts

View workflow job for this annotation

GitHub Actions / test (22)

Type 'Uint32Array<ArrayBufferLike> | string[] | Uint8Array<ArrayBufferLike> | Int8Array<ArrayBufferLike> | ... 6 more ... | Float64Array<...>' is not assignable to type 'ArrayBufferView<ArrayBufferLike>'.
}


Expand Down Expand Up @@ -170,6 +192,108 @@
throw new TypeError(`Unsupported dtype for ${kind}`);
}

export function arrayToTypedArray(dtype: DType, array: Array): TypedArray {

Check failure on line 195 in src/index.ts

View workflow job for this annotation

GitHub Actions / test (22)

Generic type 'Array<T>' requires 1 type argument(s).
if (!Array.isArray(array)) throw new TypeError("Expected an array");

if (dtype.startsWith("U")) {
// Unicode string array
const size = parseInt(dtype.substring(1));
const buf = new ArrayBuffer(array.length * size * 4);
const uint32 = new Uint32Array(buf);
for (let i = 0; i < array.length; i++) {
const str = array[i] as string;
for (let j = 0; j < size; j++) {
const code = j < str.length ? str.codePointAt(j) ?? 0 : 0;
uint32[i * size + j] = code!;
}
}
return new Uint8Array(buf);
}

switch (dtype) {
case "b1": return new Uint8Array(array);
case "i1": return new Int8Array(array);
case "u1": return new Uint8Array(array);
case "i2": return new Int16Array(array);
case "u2": return new Uint16Array(array);
case "i4": return new Int32Array(array);
case "u4": return new Uint32Array(array);
case "i8": return new BigInt64Array(array);
case "u8": return new BigUint64Array(array);
case "f4": return new Float32Array(array);
case "f8": return new Float64Array(array);
default: throw new Error(`Unsupported dtype: ${dtype}`);
}
}

function inferUnicodeDtypeFromStringArray(array: string[]): DType {
let longestStringLength = array[0].length;
for (let i = 1; i < array.length; i++) {
const element = array[i];
if (typeof element === "string" && element.length > longestStringLength) {
longestStringLength = element.length;
}
}
return `U${Math.max(1, longestStringLength)}` as DType; // e.g., U10 for strings of length 10
}

function inferDtypeFromNumberArray(array: number[]): DType {
let isInteger = true;
let isNonNegative = true;
let maxAbsValue = 0;

for (const num of array) {
if (!Number.isInteger(num)) {
isInteger = false;
}
if (num < 0) {
isNonNegative = false;
}
const absNum = Math.abs(num);
if (absNum > maxAbsValue) {
maxAbsValue = absNum;
}
}

if (!isInteger) {
if (maxAbsValue <= 3.40282347e+38) return "f4"; // max representable float32
return "f8"; // default to float64
}

// Integer array, determine smallest fitting dtype
if (isNonNegative) {
// Unsigned integers
if (maxAbsValue <= 0xFF) return "u1";
if (maxAbsValue <= 0xFFFF) return "u2";
if (maxAbsValue <= 0xFFFFFFFF) return "u4";
return "u8";
} else {
// Signed integers
if (maxAbsValue <= 0x7F) return "i1";
if (maxAbsValue <= 0x7FFF) return "i2";
if (maxAbsValue <= 0x7FFFFFFF) return "i4";
return "i8";
}
}

export function inferDtypeFromArray(array: Array<number | number[] | string | string[]>): DType {

Check failure on line 279 in src/index.ts

View workflow job for this annotation

GitHub Actions / test (22)

Function lacks ending return statement and return type does not include 'undefined'.
if (array.length === 0) return "f8"; // default to float64 for empty arrays
const first = array[0];

if (typeof first === "number") {
return inferDtypeFromNumberArray(array as number[]);
}

if (typeof first === "string") {
return inferUnicodeDtypeFromStringArray(array as string[]);
}

if (Array.isArray(first)) {
// Nested array, infer from first sub-array
return inferDtypeFromArray(first);
}
}

/**
* True if the system is little endian.
*/
Expand All @@ -178,16 +302,22 @@
return ((new Uint32Array((new Uint8Array([1, 0, 0, 0])).buffer))[0] === 1);
}

export function dump(array: TypedArray, shape: number[]) {
function createPyDescription() {
const dtype = arrayToDtype(array);
const isByte = dtype == 'u1' || dtype == 'i1';
const endianness = isByte ? '|' : (isLittleEndian() ? '<' : '>');
const descr = `${endianness}${dtype}`;
const pyShape = shape.map((v) => { return `${v}`; }).join(",");
return `{'descr':'${descr}','fortran_order':False,'shape':(${pyShape})}`;
}
let pyDesc = createPyDescription();
function createPyDescription(dtype : DType, shape: number[]) : string {

const isByte = dtype == 'u1' || dtype == 'i1';
const endianness = isByte ? '|' : (isLittleEndian() ? '<' : '>');
const descr = `${endianness}${dtype}`;
let pyShape = shape.map((v) => { return `${v}`; }).join(",");
if (shape.length === 1) pyShape += ",";

return `{'descr':'${descr}','fortran_order':False,'shape':(${pyShape})}`;
}

export function dump(array: TypedArray | Array<number | string>, shape: number[] | undefined) : ArrayBuffer{
const dtype = array instanceof Array ? inferDtypeFromArray(array) : arrayToDtype(array);
array = array instanceof Array ? arrayToTypedArray(dtype, array) : array;

let pyDesc = createPyDescription(dtype, shape ?? [array.length]);
let headerSize = 10 + pyDesc.length;
const pad = 8 - ((headerSize + 1) % 8);
pyDesc = pyDesc + " ".repeat(pad) + "\x0A";
Expand All @@ -214,7 +344,12 @@
async load(source: string | ArrayBuffer | ArrayBufferView) {
return load(source, this.opts);
}

static float16ToFloat32(u16: number) {
return f16toF32(u16);
}

dump(array: TypedArray | Array<number | string>, shape: number[]) {
return dump(array, shape);
}
}
Binary file added test/data/unicode.npy
Binary file not shown.
16 changes: 15 additions & 1 deletion test/dump.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,31 @@ describe("npyjs dump", () => {
new BigUint64Array([BigInt(1), BigInt(1), BigInt(1), BigInt(1_000_000_000_000_000)]),
new BigInt64Array([BigInt(1), BigInt(1), BigInt(1), BigInt(1_000_000_000_000_000)]),
new Float64Array([0.1, NaN, 10_000, Infinity]),
new Array(1, 2, 3, 4),
new Array("some", "text", "to", "test here")
];
for (let array of arrays) {
const bytes = npyjs.dump(array, [2, 2]);
const expected = array instanceof Uint8ClampedArray ? new Uint8Array(array.buffer) : array;
// parse back the bytes to check the result
const result = await npyjs.load(bytes);
expect(result.shape).toEqual([2, 2]);
expect(result.data).toEqual(expected);
if (array instanceof Array) {
expect(Array.from(result.data)).toEqual(expected);
} else {
expect(result.data).toEqual(expected);
}
}
});

it("check 1D shape", async () => {
const array = new Uint8Array([1, 2, 3, 4, 5, 6]);
const bytes = npyjs.dump(array);
const result = await npyjs.load(bytes);
expect(result.shape).toEqual([6]);
expect(result.data).toEqual(array);
});

it("check width/height", async () => {
const array = new Uint8Array([1, 2, 3, 4, 5, 6]);
const shape = [3, 2];
Expand Down
75 changes: 48 additions & 27 deletions test/load.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,36 @@ function startServer(root = process.cwd()): Promise<{ server: http.Server; baseU
});
}

function checkTail(expectedDtype: string, actual: Array<number | bigint>, expectedData: Array<number | bigint>) {
actual.forEach((actual, j) => {
const expected = expectedData[j];

// Handle NaN explicitly for float comparisons
const isActualNaN = typeof actual === "number" && Number.isNaN(actual);
const isExpectedNaN = typeof expected === "number" && Number.isNaN(expected);

// Detect dtype class
const dtype: string = expectedDtype || ""; // e.g., "float32", "f4", "i8", "u8"
const isFloat = /^f\d$/.test(dtype) || /float/i.test(dtype);
const isI64 = dtype === "i8" || /int64/i.test(dtype);
const isU64 = dtype === "u8" || /uint64/i.test(dtype);

if (isFloat) {
if (isExpectedNaN) {
expect(isActualNaN).toBe(true);
} else {
expect(typeof actual).toBe("number");
expect(actual as number).toBeCloseTo(Number(expected), 5);
}
} else if (isI64 || isU64 || typeof actual === "bigint") {
// Compare as strings to avoid bigint/number mismatch
expect(actual.toString()).toBe(String(expected));
} else {
expect(actual).toBe(expected);
}
});
}

let server: http.Server;
let baseUrl: string;

Expand All @@ -56,36 +86,27 @@ describe("npyjs parser", () => {
// Tail 5 values from the result for comparison
const tail = Array.prototype.slice.call(data.data.slice(-5)) as Array<number | bigint>;

tail.forEach((actual, j) => {
const expected = records[fname][j];

// Handle NaN explicitly for float comparisons
const isActualNaN = typeof actual === "number" && Number.isNaN(actual);
const isExpectedNaN = typeof expected === "number" && Number.isNaN(expected);

// Detect dtype class
const dtype: string = data.dtype || ""; // e.g., "float32", "f4", "i8", "u8"
const isFloat = /^f\d$/.test(dtype) || /float/i.test(dtype);
const isI64 = dtype === "i8" || /int64/i.test(dtype);
const isU64 = dtype === "u8" || /uint64/i.test(dtype);

if (isFloat) {
if (isExpectedNaN) {
expect(isActualNaN).toBe(true);
} else {
expect(typeof actual).toBe("number");
expect(actual as number).toBeCloseTo(Number(expected), 5);
}
} else if (isI64 || isU64 || typeof actual === "bigint") {
// Compare as strings to avoid bigint/number mismatch
expect(actual.toString()).toBe(String(expected));
} else {
expect(actual).toBe(expected);
}
});
checkTail(data.dtype, tail, records[fname]);
}
});


it("loads from Blob", async () => {
const records = JSON.parse(await fs.readFile("test/records.json", "utf8"));
const n = new (N as any)();

const firstFile = Object.keys(records)[0];
const fpath = path.join("test", `${firstFile}.npy`);
const res = await fetch(`${baseUrl}/${fpath}`);

const blob = await res.blob();
const data = await n.load(blob);

const tail = Array.prototype.slice.call(data.data.slice(-5)) as Array<number | bigint>;

checkTail(data.dtype, tail, records[firstFile]);
});

it("converts float16 to float32 correctly (spot checks)", () => {
// Support either legacy static name or a new helper if you renamed it.
const f16 =
Expand Down
5 changes: 5 additions & 0 deletions test/records.json
Original file line number Diff line number Diff line change
Expand Up @@ -166,5 +166,10 @@
-20,
-99,
-49
],
"./data/unicode": [
"123",
"test",
"test2"
]
}
Loading