Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
7 changes: 7 additions & 0 deletions index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,13 @@ declare class npyjs {

float16ToFloat32Array(float16Array: Uint16Array): Float32Array;
static float16ToFloat32(float16: number): number;

loadBlob(
blob: Blob,
callback?: (result?: Parsed) => any
): Promise<Parsed>;

dump(array: Array | Int16Array | Int32Array | Float32Array | Float64Array | Uint8Array | Uint16Array | Uint32Array | Uint64Array | Float32Array | Float64Array, opts?: { fortranOrder?: boolean, dtype?: string }): Promise<Blob>;
}

export default npyjs;
202 changes: 199 additions & 3 deletions index.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
import fetch from 'cross-fetch';
import fetch from "cross-fetch";

class StringFromCodePoint extends String {
constructor(...args) {
const uint32 = new Uint32Array(...args);
const number_arr = Array.from(uint32);
const str = String.fromCodePoint(...number_arr);
super(str);
}
}

class npyjs {

Expand Down Expand Up @@ -125,13 +134,22 @@ class npyjs {
);
const header = JSON.parse(
hcontents
.toLowerCase() // True -> true
.replace("True","true") // True -> true
.replace("False","false") // False -> false
.replace(/'/g, '"')
.replace("(", "[")
.replace(/,*\),*/g, "]")
);
const shape = header.shape;
const dtype = this.dtypes[header.descr];
var dtype = this.dtypes[header.descr];
//parse unicode dtype. The format is a 'U' character followed by a number that is the number of unicode characters in the string
if (header.descr[1] === "U") {
dtype = {
name: "unicode",
size: parseInt(header.descr.substring(2)),
arrayConstructor: StringFromCodePoint
};
}

if (!dtype) {
console.error(`Unsupported dtype: ${header.descr}`);
Expand All @@ -143,6 +161,16 @@ class npyjs {
offsetBytes
);

//convert to a plain string array the StringFromCodePoint object
if (dtype.name === "unicode") {
const nums_ = String(nums);
nums = new Array();
//split the string into an array of strings with length dtype.size
for (let i = 0; i < nums_.length; i += dtype.size) {
nums.push(nums_.substring(i, i + dtype.size));
}
}

// Convert float16 to float32 if converter exists
const data = dtype.converter ? dtype.converter.call(this, nums) : nums;

Expand Down Expand Up @@ -175,6 +203,174 @@ class npyjs {
}
return result;
}

async loadBlob(blob, callback) {
/*
Loads an array from a Blob object.
*/
if (!(blob instanceof Blob)) {
throw new Error("Input is not a Blob.");
}
const arrayBuf = await blob.arrayBuffer();
const result = this.parse(arrayBuf);
if (callback) {
return callback(result);
}
return result;
}

dump(array, opts) {{
/**
* Dumps an array to a stream of bytes in NumPy format.
*
* Parameters
* ----------
* array : TypedArray or Array of numbers
* The data to be dumped.
* opts : Object
* dump options:
* dtype : string
* The data type. If not provided, it will be inferred from the
* input array.
* fortranOrder : boolean
* Whether the array is stored in Fortran order. Default is false.
*
* Returns
* -------
* blob : Blob
* A Blob representing the array in NumPy format.
*/

opts = opts || {};
var dtype;
var isUnicode = false;
var maxLen = 0;
// Unicode string array detection
if (Array.isArray(array) && array.length > 0 && typeof array[0] === "string") {
isUnicode = true;
// Find max string length
maxLen = array.reduce((max, s) => Math.max(max, s.length), 0);
// Build buffer of UTF-32 code points, padded to maxLen
let buf = new Uint32Array(array.length * maxLen);
for (let i = 0; i < array.length; i++) {
let codePoints = Array.from(array[i]).map(c => c.codePointAt(0));
for (let j = 0; j < maxLen; j++) {
buf[i * maxLen + j] = codePoints[j] || 0;
}
}
dtype = {
name: "unicode",
size: maxLen,
arrayConstructor: StringFromCodePoint
};
array = buf;
} else if (opts.dtype) {
dtype = this.dtypes[opts.dtype];
if (!dtype) {
throw new Error("Invalid or unsupported dtype: " + opts.dtype);
}
} else {
// Infer dtype from input array
if (Array.isArray(array)) {
// Plain array
var allInt = true;
var allUint = true;
for (var i = 0; i < array.length; i++) {
if (!Number.isInteger(array[i])) {
allInt = false;
allUint = false;
break;
} else if (array[i] < 0) {
allUint = false;
}
}
if (allInt) {
dtype = this.dtypes["<i4"];
} else if (allUint) {
dtype = this.dtypes["<u4"];
} else {
dtype = this.dtypes["<f4"];
}
array = new dtype["arrayConstructor"](array);
} else if (ArrayBuffer.isView(array)) {
// Typed array
for (var key in this.dtypes) {
if (this.dtypes[key].arrayConstructor === array.constructor) {
dtype = this.dtypes[key];
break;
}
}
if (!dtype) {
throw new Error("Unsupported array type: " + array.constructor.name);
}
} else {
throw new Error("Input data must be an array or typed array.");
}
}

var fortranOrder = opts.fortranOrder || false;
var shape;
if (isUnicode) {
shape = [array.length / maxLen];
} else if (array instanceof StringFromCodePoint) {
shape = [array.length / dtype.size];
} else {
shape = [array.length];
}

var header = {
descr: isUnicode ? `<U${maxLen}` : Object.keys(this.dtypes).find(key => this.dtypes[key] === dtype),
fortran_order: fortranOrder,
shape: shape.length === 1 ? [shape[0],] : shape
};
var headerStr = JSON.stringify(header)
.replace(/"/g, "'")
.replace(/\[/g, "(")
.replace(/\]/g, ",), ")
.replace(/,/g, ",").replace("false", "False").replace("true", "True");
if (headerStr.length + 10 > 65536) {
throw new Error("Array header too long (>64KB)");
}
while ((headerStr.length + 10) % 16 !== 0) {
headerStr += " ";
}
headerStr += "\n";

let magic = new Uint8Array([0x93]);
let npy = new TextEncoder().encode("NUMPY");
let version = new Uint8Array([1, 0]);

// Add newline at the end of header string
headerStr = headerStr.trimEnd() + "\n";
let headerArray = new TextEncoder().encode(headerStr);

// Compute padded header length
let headerLen = headerArray.length;
let padLen = (16 - ((10 + headerLen) % 16)) % 16;
// 10 = magic(6) + version(2) + headerLen(2)

let paddedHeader = new Uint8Array(headerLen + padLen);
paddedHeader.set(headerArray);
for (let i = headerLen; i < headerLen + padLen; i++) {
paddedHeader[i] = 0x20; // spaces
}

// Length bytes (little endian)
let headerLenBytes = new Uint8Array([
paddedHeader.length & 0xff,
(paddedHeader.length >> 8) & 0xff
]);

let blobParts = [
magic,
npy,
version,
headerLenBytes,
paddedHeader,
array.buffer
];
return new Blob(blobParts, { type: "application/octet-stream" });
}}
}

export default npyjs;
39 changes: 36 additions & 3 deletions test/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ import path from "path";
import http from "http";
import N from "../index.js";

//eslint-disable-next-line no-undef
describe("npyjs parser", function () {
//eslint-disable-next-line no-undef
it("should correctly parse npy files", async function () {
const server = http.createServer(async function (req, res) {
const fpath = path.resolve(req.url.slice(1));
Expand All @@ -13,14 +15,14 @@ describe("npyjs parser", function () {
res.end(data);
});
server.listen();
const {port} = server.address()
const {port} = server.address();

const records = JSON.parse(await fs.readFile("test/records.json"));
const n = new N();

for (const fname in records) {
const fpath = path.join("test", `${fname}.npy`)
const data = await n.load(`http://localhost:${port}/${fpath}`)
const fpath = path.join("test", `${fname}.npy`);
const data = await n.load(`http://localhost:${port}/${fpath}`);

// Get the last 5 values for comparison
const resultValues = Array.prototype.slice.call(
Expand Down Expand Up @@ -115,3 +117,34 @@ describe("npyjs parser", function () {
server.close();
});
});


//eslint-disable-next-line no-undef
describe("npyjs dumper", function () {
//eslint-disable-next-line no-undef
it("should correctly dump npy files", async function () {
const server = http.createServer(async function (req, res) {
const fpath = path.resolve(req.url.slice(1));
const data = await fs.readFile(fpath);
res.writeHead(200);
res.end(data);
});
server.listen();
const {port} = server.address();

const records = JSON.parse(await fs.readFile("test/records.json"));
const n = new N();

for (const fname in records) {
const fpath = path.join("test", `${fname}.npy`);
const data = await n.load(`http://localhost:${port}/${fpath}`);
const dumped = n.dump(data.data);
const reloaded = await n.loadBlob(dumped);
Array.prototype.slice.call(
reloaded.data.slice(-5)
).forEach((i, j) => {
assert.equal(records[fname][j], i);
});
}
});
});
Loading