diff --git a/crates/edgeparse-wasm/pkg/edgeparse_wasm.d.ts b/crates/edgeparse-wasm/pkg/edgeparse_wasm.d.ts new file mode 100644 index 0000000..055a551 --- /dev/null +++ b/crates/edgeparse-wasm/pkg/edgeparse_wasm.d.ts @@ -0,0 +1,74 @@ +/* tslint:disable */ +/* eslint-disable */ + +/** + * Convert PDF bytes to a structured document object (returned as JS value). + * + * # Arguments + * * `pdf_bytes` — raw PDF file as `Uint8Array` + * * `format` — output format hint: `"json"` (default) | `"markdown"` | `"html"` | `"text"` + * * `pages` — page range: `"all"` (default) or `"1-5"` or `"1,3,7"` + * * `reading_order` — `"auto"` (default) or `"off"` + * * `table_method` — `"default"` (default) or `"cluster"` + */ +export function convert(pdf_bytes: Uint8Array, format?: string | null, pages?: string | null, reading_order?: string | null, table_method?: string | null): any; + +/** + * Convert PDF bytes to a formatted output string. + * + * # Arguments + * * `pdf_bytes` — raw PDF file as `Uint8Array` + * * `format` — `"json"` (default) | `"markdown"` | `"html"` | `"text"` + * * `pages` — page range + * * `reading_order` — `"auto"` | `"off"` + * * `table_method` — `"default"` | `"cluster"` + */ +export function convert_to_string(pdf_bytes: Uint8Array, format?: string | null, pages?: string | null, reading_order?: string | null, table_method?: string | null): string; + +/** + * Initialize panic hook for better error messages in browser console. + */ +export function init(): void; + +/** + * Return the edgeparse version string. + */ +export function version(): string; + +export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module; + +export interface InitOutput { + readonly memory: WebAssembly.Memory; + readonly convert: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number, i: number, j: number, k: number) => void; + readonly convert_to_string: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number, i: number, j: number, k: number) => void; + readonly version: (a: number) => void; + readonly init: () => void; + readonly __wbindgen_export: (a: number, b: number) => number; + readonly __wbindgen_export2: (a: number, b: number, c: number, d: number) => number; + readonly __wbindgen_export3: (a: number, b: number, c: number) => void; + readonly __wbindgen_export4: (a: number) => void; + readonly __wbindgen_add_to_stack_pointer: (a: number) => number; + readonly __wbindgen_start: () => void; +} + +export type SyncInitInput = BufferSource | WebAssembly.Module; + +/** + * Instantiates the given `module`, which can either be bytes or + * a precompiled `WebAssembly.Module`. + * + * @param {{ module: SyncInitInput }} module - Passing `SyncInitInput` directly is deprecated. + * + * @returns {InitOutput} + */ +export function initSync(module: { module: SyncInitInput } | SyncInitInput): InitOutput; + +/** + * If `module_or_path` is {RequestInfo} or {URL}, makes a request and + * for everything else, calls `WebAssembly.instantiate` directly. + * + * @param {{ module_or_path: InitInput | Promise }} module_or_path - Passing `InitInput` directly is deprecated. + * + * @returns {Promise} + */ +export default function __wbg_init (module_or_path?: { module_or_path: InitInput | Promise } | InitInput | Promise): Promise; diff --git a/crates/edgeparse-wasm/pkg/edgeparse_wasm.js b/crates/edgeparse-wasm/pkg/edgeparse_wasm.js new file mode 100644 index 0000000..8902f1f --- /dev/null +++ b/crates/edgeparse-wasm/pkg/edgeparse_wasm.js @@ -0,0 +1,459 @@ +/* @ts-self-types="./edgeparse_wasm.d.ts" */ + +/** + * Convert PDF bytes to a structured document object (returned as JS value). + * + * # Arguments + * * `pdf_bytes` — raw PDF file as `Uint8Array` + * * `format` — output format hint: `"json"` (default) | `"markdown"` | `"html"` | `"text"` + * * `pages` — page range: `"all"` (default) or `"1-5"` or `"1,3,7"` + * * `reading_order` — `"auto"` (default) or `"off"` + * * `table_method` — `"default"` (default) or `"cluster"` + * @param {Uint8Array} pdf_bytes + * @param {string | null} [format] + * @param {string | null} [pages] + * @param {string | null} [reading_order] + * @param {string | null} [table_method] + * @returns {any} + */ +export function convert(pdf_bytes, format, pages, reading_order, table_method) { + try { + const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); + const ptr0 = passArray8ToWasm0(pdf_bytes, wasm.__wbindgen_export); + const len0 = WASM_VECTOR_LEN; + var ptr1 = isLikeNone(format) ? 0 : passStringToWasm0(format, wasm.__wbindgen_export, wasm.__wbindgen_export2); + var len1 = WASM_VECTOR_LEN; + var ptr2 = isLikeNone(pages) ? 0 : passStringToWasm0(pages, wasm.__wbindgen_export, wasm.__wbindgen_export2); + var len2 = WASM_VECTOR_LEN; + var ptr3 = isLikeNone(reading_order) ? 0 : passStringToWasm0(reading_order, wasm.__wbindgen_export, wasm.__wbindgen_export2); + var len3 = WASM_VECTOR_LEN; + var ptr4 = isLikeNone(table_method) ? 0 : passStringToWasm0(table_method, wasm.__wbindgen_export, wasm.__wbindgen_export2); + var len4 = WASM_VECTOR_LEN; + wasm.convert(retptr, ptr0, len0, ptr1, len1, ptr2, len2, ptr3, len3, ptr4, len4); + var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true); + var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true); + var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true); + if (r2) { + throw takeObject(r1); + } + return takeObject(r0); + } finally { + wasm.__wbindgen_add_to_stack_pointer(16); + } +} + +/** + * Convert PDF bytes to a formatted output string. + * + * # Arguments + * * `pdf_bytes` — raw PDF file as `Uint8Array` + * * `format` — `"json"` (default) | `"markdown"` | `"html"` | `"text"` + * * `pages` — page range + * * `reading_order` — `"auto"` | `"off"` + * * `table_method` — `"default"` | `"cluster"` + * @param {Uint8Array} pdf_bytes + * @param {string | null} [format] + * @param {string | null} [pages] + * @param {string | null} [reading_order] + * @param {string | null} [table_method] + * @returns {string} + */ +export function convert_to_string(pdf_bytes, format, pages, reading_order, table_method) { + let deferred7_0; + let deferred7_1; + try { + const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); + const ptr0 = passArray8ToWasm0(pdf_bytes, wasm.__wbindgen_export); + const len0 = WASM_VECTOR_LEN; + var ptr1 = isLikeNone(format) ? 0 : passStringToWasm0(format, wasm.__wbindgen_export, wasm.__wbindgen_export2); + var len1 = WASM_VECTOR_LEN; + var ptr2 = isLikeNone(pages) ? 0 : passStringToWasm0(pages, wasm.__wbindgen_export, wasm.__wbindgen_export2); + var len2 = WASM_VECTOR_LEN; + var ptr3 = isLikeNone(reading_order) ? 0 : passStringToWasm0(reading_order, wasm.__wbindgen_export, wasm.__wbindgen_export2); + var len3 = WASM_VECTOR_LEN; + var ptr4 = isLikeNone(table_method) ? 0 : passStringToWasm0(table_method, wasm.__wbindgen_export, wasm.__wbindgen_export2); + var len4 = WASM_VECTOR_LEN; + wasm.convert_to_string(retptr, ptr0, len0, ptr1, len1, ptr2, len2, ptr3, len3, ptr4, len4); + var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true); + var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true); + var r2 = getDataViewMemory0().getInt32(retptr + 4 * 2, true); + var r3 = getDataViewMemory0().getInt32(retptr + 4 * 3, true); + var ptr6 = r0; + var len6 = r1; + if (r3) { + ptr6 = 0; len6 = 0; + throw takeObject(r2); + } + deferred7_0 = ptr6; + deferred7_1 = len6; + return getStringFromWasm0(ptr6, len6); + } finally { + wasm.__wbindgen_add_to_stack_pointer(16); + wasm.__wbindgen_export3(deferred7_0, deferred7_1, 1); + } +} + +/** + * Initialize panic hook for better error messages in browser console. + */ +export function init() { + wasm.init(); +} + +/** + * Return the edgeparse version string. + * @returns {string} + */ +export function version() { + let deferred1_0; + let deferred1_1; + try { + const retptr = wasm.__wbindgen_add_to_stack_pointer(-16); + wasm.version(retptr); + var r0 = getDataViewMemory0().getInt32(retptr + 4 * 0, true); + var r1 = getDataViewMemory0().getInt32(retptr + 4 * 1, true); + deferred1_0 = r0; + deferred1_1 = r1; + return getStringFromWasm0(r0, r1); + } finally { + wasm.__wbindgen_add_to_stack_pointer(16); + wasm.__wbindgen_export3(deferred1_0, deferred1_1, 1); + } +} + +function __wbg_get_imports() { + const import0 = { + __proto__: null, + __wbg_Error_83742b46f01ce22d: function(arg0, arg1) { + const ret = Error(getStringFromWasm0(arg0, arg1)); + return addHeapObject(ret); + }, + __wbg_String_8564e559799eccda: function(arg0, arg1) { + const ret = String(getObject(arg1)); + const ptr1 = passStringToWasm0(ret, wasm.__wbindgen_export, wasm.__wbindgen_export2); + const len1 = WASM_VECTOR_LEN; + getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true); + getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true); + }, + __wbg___wbindgen_throw_6ddd609b62940d55: function(arg0, arg1) { + throw new Error(getStringFromWasm0(arg0, arg1)); + }, + __wbg_debug_271c16e6de0bc226: function(arg0, arg1, arg2, arg3) { + console.debug(getObject(arg0), getObject(arg1), getObject(arg2), getObject(arg3)); + }, + __wbg_error_1eece6b0039034ce: function(arg0, arg1, arg2, arg3) { + console.error(getObject(arg0), getObject(arg1), getObject(arg2), getObject(arg3)); + }, + __wbg_error_a6fa202b58aa1cd3: function(arg0, arg1) { + let deferred0_0; + let deferred0_1; + try { + deferred0_0 = arg0; + deferred0_1 = arg1; + console.error(getStringFromWasm0(arg0, arg1)); + } finally { + wasm.__wbindgen_export3(deferred0_0, deferred0_1, 1); + } + }, + __wbg_getRandomValues_3f44b700395062e5: function() { return handleError(function (arg0, arg1) { + globalThis.crypto.getRandomValues(getArrayU8FromWasm0(arg0, arg1)); + }, arguments); }, + __wbg_info_0194681687b5ab04: function(arg0, arg1, arg2, arg3) { + console.info(getObject(arg0), getObject(arg1), getObject(arg2), getObject(arg3)); + }, + __wbg_log_70972330cfc941dd: function(arg0, arg1, arg2, arg3) { + console.log(getObject(arg0), getObject(arg1), getObject(arg2), getObject(arg3)); + }, + __wbg_new_227d7c05414eb861: function() { + const ret = new Error(); + return addHeapObject(ret); + }, + __wbg_new_a70fbab9066b301f: function() { + const ret = new Array(); + return addHeapObject(ret); + }, + __wbg_new_ab79df5bd7c26067: function() { + const ret = new Object(); + return addHeapObject(ret); + }, + __wbg_set_282384002438957f: function(arg0, arg1, arg2) { + getObject(arg0)[arg1 >>> 0] = takeObject(arg2); + }, + __wbg_set_6be42768c690e380: function(arg0, arg1, arg2) { + getObject(arg0)[takeObject(arg1)] = takeObject(arg2); + }, + __wbg_stack_3b0d974bbf31e44f: function(arg0, arg1) { + const ret = getObject(arg1).stack; + const ptr1 = passStringToWasm0(ret, wasm.__wbindgen_export, wasm.__wbindgen_export2); + const len1 = WASM_VECTOR_LEN; + getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true); + getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true); + }, + __wbg_warn_809cad1bfc2b3a42: function(arg0, arg1, arg2, arg3) { + console.warn(getObject(arg0), getObject(arg1), getObject(arg2), getObject(arg3)); + }, + __wbindgen_cast_0000000000000001: function(arg0) { + // Cast intrinsic for `F64 -> Externref`. + const ret = arg0; + return addHeapObject(ret); + }, + __wbindgen_cast_0000000000000002: function(arg0) { + // Cast intrinsic for `I64 -> Externref`. + const ret = arg0; + return addHeapObject(ret); + }, + __wbindgen_cast_0000000000000003: function(arg0, arg1) { + // Cast intrinsic for `Ref(String) -> Externref`. + const ret = getStringFromWasm0(arg0, arg1); + return addHeapObject(ret); + }, + __wbindgen_cast_0000000000000004: function(arg0) { + // Cast intrinsic for `U64 -> Externref`. + const ret = BigInt.asUintN(64, arg0); + return addHeapObject(ret); + }, + __wbindgen_object_clone_ref: function(arg0) { + const ret = getObject(arg0); + return addHeapObject(ret); + }, + __wbindgen_object_drop_ref: function(arg0) { + takeObject(arg0); + }, + }; + return { + __proto__: null, + "./edgeparse_wasm_bg.js": import0, + }; +} + +function addHeapObject(obj) { + if (heap_next === heap.length) heap.push(heap.length + 1); + const idx = heap_next; + heap_next = heap[idx]; + + heap[idx] = obj; + return idx; +} + +function dropObject(idx) { + if (idx < 1028) return; + heap[idx] = heap_next; + heap_next = idx; +} + +function getArrayU8FromWasm0(ptr, len) { + ptr = ptr >>> 0; + return getUint8ArrayMemory0().subarray(ptr / 1, ptr / 1 + len); +} + +let cachedDataViewMemory0 = null; +function getDataViewMemory0() { + if (cachedDataViewMemory0 === null || cachedDataViewMemory0.buffer.detached === true || (cachedDataViewMemory0.buffer.detached === undefined && cachedDataViewMemory0.buffer !== wasm.memory.buffer)) { + cachedDataViewMemory0 = new DataView(wasm.memory.buffer); + } + return cachedDataViewMemory0; +} + +function getStringFromWasm0(ptr, len) { + ptr = ptr >>> 0; + return decodeText(ptr, len); +} + +let cachedUint8ArrayMemory0 = null; +function getUint8ArrayMemory0() { + if (cachedUint8ArrayMemory0 === null || cachedUint8ArrayMemory0.byteLength === 0) { + cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer); + } + return cachedUint8ArrayMemory0; +} + +function getObject(idx) { return heap[idx]; } + +function handleError(f, args) { + try { + return f.apply(this, args); + } catch (e) { + wasm.__wbindgen_export4(addHeapObject(e)); + } +} + +let heap = new Array(1024).fill(undefined); +heap.push(undefined, null, true, false); + +let heap_next = heap.length; + +function isLikeNone(x) { + return x === undefined || x === null; +} + +function passArray8ToWasm0(arg, malloc) { + const ptr = malloc(arg.length * 1, 1) >>> 0; + getUint8ArrayMemory0().set(arg, ptr / 1); + WASM_VECTOR_LEN = arg.length; + return ptr; +} + +function passStringToWasm0(arg, malloc, realloc) { + if (realloc === undefined) { + const buf = cachedTextEncoder.encode(arg); + const ptr = malloc(buf.length, 1) >>> 0; + getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf); + WASM_VECTOR_LEN = buf.length; + return ptr; + } + + let len = arg.length; + let ptr = malloc(len, 1) >>> 0; + + const mem = getUint8ArrayMemory0(); + + let offset = 0; + + for (; offset < len; offset++) { + const code = arg.charCodeAt(offset); + if (code > 0x7F) break; + mem[ptr + offset] = code; + } + if (offset !== len) { + if (offset !== 0) { + arg = arg.slice(offset); + } + ptr = realloc(ptr, len, len = offset + arg.length * 3, 1) >>> 0; + const view = getUint8ArrayMemory0().subarray(ptr + offset, ptr + len); + const ret = cachedTextEncoder.encodeInto(arg, view); + + offset += ret.written; + ptr = realloc(ptr, len, offset, 1) >>> 0; + } + + WASM_VECTOR_LEN = offset; + return ptr; +} + +function takeObject(idx) { + const ret = getObject(idx); + dropObject(idx); + return ret; +} + +let cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true }); +cachedTextDecoder.decode(); +const MAX_SAFARI_DECODE_BYTES = 2146435072; +let numBytesDecoded = 0; +function decodeText(ptr, len) { + numBytesDecoded += len; + if (numBytesDecoded >= MAX_SAFARI_DECODE_BYTES) { + cachedTextDecoder = new TextDecoder('utf-8', { ignoreBOM: true, fatal: true }); + cachedTextDecoder.decode(); + numBytesDecoded = len; + } + return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len)); +} + +const cachedTextEncoder = new TextEncoder(); + +if (!('encodeInto' in cachedTextEncoder)) { + cachedTextEncoder.encodeInto = function (arg, view) { + const buf = cachedTextEncoder.encode(arg); + view.set(buf); + return { + read: arg.length, + written: buf.length + }; + }; +} + +let WASM_VECTOR_LEN = 0; + +let wasmModule, wasm; +function __wbg_finalize_init(instance, module) { + wasm = instance.exports; + wasmModule = module; + cachedDataViewMemory0 = null; + cachedUint8ArrayMemory0 = null; + wasm.__wbindgen_start(); + return wasm; +} + +async function __wbg_load(module, imports) { + if (typeof Response === 'function' && module instanceof Response) { + if (typeof WebAssembly.instantiateStreaming === 'function') { + try { + return await WebAssembly.instantiateStreaming(module, imports); + } catch (e) { + const validResponse = module.ok && expectedResponseType(module.type); + + if (validResponse && module.headers.get('Content-Type') !== 'application/wasm') { + console.warn("`WebAssembly.instantiateStreaming` failed because your server does not serve Wasm with `application/wasm` MIME type. Falling back to `WebAssembly.instantiate` which is slower. Original error:\n", e); + + } else { throw e; } + } + } + + const bytes = await module.arrayBuffer(); + return await WebAssembly.instantiate(bytes, imports); + } else { + const instance = await WebAssembly.instantiate(module, imports); + + if (instance instanceof WebAssembly.Instance) { + return { instance, module }; + } else { + return instance; + } + } + + function expectedResponseType(type) { + switch (type) { + case 'basic': case 'cors': case 'default': return true; + } + return false; + } +} + +function initSync(module) { + if (wasm !== undefined) return wasm; + + + if (module !== undefined) { + if (Object.getPrototypeOf(module) === Object.prototype) { + ({module} = module) + } else { + console.warn('using deprecated parameters for `initSync()`; pass a single object instead') + } + } + + const imports = __wbg_get_imports(); + if (!(module instanceof WebAssembly.Module)) { + module = new WebAssembly.Module(module); + } + const instance = new WebAssembly.Instance(module, imports); + return __wbg_finalize_init(instance, module); +} + +async function __wbg_init(module_or_path) { + if (wasm !== undefined) return wasm; + + + if (module_or_path !== undefined) { + if (Object.getPrototypeOf(module_or_path) === Object.prototype) { + ({module_or_path} = module_or_path) + } else { + console.warn('using deprecated parameters for the initialization function; pass a single object instead') + } + } + + if (module_or_path === undefined) { + module_or_path = new URL('edgeparse_wasm_bg.wasm', import.meta.url); + } + const imports = __wbg_get_imports(); + + if (typeof module_or_path === 'string' || (typeof Request === 'function' && module_or_path instanceof Request) || (typeof URL === 'function' && module_or_path instanceof URL)) { + module_or_path = fetch(module_or_path); + } + + const { instance, module } = await __wbg_load(await module_or_path, imports); + + return __wbg_finalize_init(instance, module); +} + +export { initSync, __wbg_init as default }; diff --git a/crates/edgeparse-wasm/pkg/edgeparse_wasm_bg.wasm b/crates/edgeparse-wasm/pkg/edgeparse_wasm_bg.wasm new file mode 100644 index 0000000..9682480 Binary files /dev/null and b/crates/edgeparse-wasm/pkg/edgeparse_wasm_bg.wasm differ diff --git a/crates/edgeparse-wasm/pkg/edgeparse_wasm_bg.wasm.d.ts b/crates/edgeparse-wasm/pkg/edgeparse_wasm_bg.wasm.d.ts new file mode 100644 index 0000000..51393c5 --- /dev/null +++ b/crates/edgeparse-wasm/pkg/edgeparse_wasm_bg.wasm.d.ts @@ -0,0 +1,13 @@ +/* tslint:disable */ +/* eslint-disable */ +export const memory: WebAssembly.Memory; +export const convert: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number, i: number, j: number, k: number) => void; +export const convert_to_string: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number, i: number, j: number, k: number) => void; +export const version: (a: number) => void; +export const init: () => void; +export const __wbindgen_export: (a: number, b: number) => number; +export const __wbindgen_export2: (a: number, b: number, c: number, d: number) => number; +export const __wbindgen_export3: (a: number, b: number, c: number) => void; +export const __wbindgen_export4: (a: number) => void; +export const __wbindgen_add_to_stack_pointer: (a: number) => number; +export const __wbindgen_start: () => void; diff --git a/crates/edgeparse-wasm/pkg/package.json b/crates/edgeparse-wasm/pkg/package.json new file mode 100644 index 0000000..a577014 --- /dev/null +++ b/crates/edgeparse-wasm/pkg/package.json @@ -0,0 +1,21 @@ +{ + "name": "@edgeparse/edgeparse-wasm", + "type": "module", + "description": "EdgeParse PDF parser — WebAssembly build for browsers", + "version": "0.1.1", + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "https://github.com/raphaelmansuy/edgeparse" + }, + "files": [ + "edgeparse_wasm_bg.wasm", + "edgeparse_wasm.js", + "edgeparse_wasm.d.ts" + ], + "main": "edgeparse_wasm.js", + "types": "edgeparse_wasm.d.ts", + "sideEffects": [ + "./snippets/*" + ] +} \ No newline at end of file