|
1 | 1 | import * as xmljs from 'xml-js'; |
2 | 2 | import { v4 as uuidv4 } from 'uuid'; |
| 3 | +import crc32 from 'buffer-crc32'; |
3 | 4 | import { DocxExporter, exportSchemaToJson } from './exporter'; |
4 | 5 | import { createDocumentJson, addDefaultStylesIfMissing } from './v2/importer/docxImporter.js'; |
5 | 6 | import { deobfuscateFont, getArrayBufferFromUrl } from './helpers.js'; |
@@ -68,33 +69,6 @@ const collectRunDefaultProperties = ( |
68 | 69 | } |
69 | 70 | }; |
70 | 71 |
|
71 | | -/** |
72 | | - * SHA-256 hash helpers using the Web Crypto API. |
73 | | - * Works in all modern browsers and Node.js 20+. |
74 | | - */ |
75 | | -async function sha256Hex(bytes) { |
76 | | - const hash = await crypto.subtle.digest('SHA-256', bytes); |
77 | | - return Array.from(new Uint8Array(hash)) |
78 | | - .map((b) => b.toString(16).padStart(2, '0')) |
79 | | - .join('') |
80 | | - .toUpperCase(); |
81 | | -} |
82 | | - |
83 | | -async function hashString(str) { |
84 | | - return sha256Hex(new TextEncoder().encode(str)); |
85 | | -} |
86 | | - |
87 | | -async function hashFile(fileSource) { |
88 | | - if (fileSource instanceof ArrayBuffer) { |
89 | | - return sha256Hex(fileSource); |
90 | | - } else if (fileSource instanceof Blob || fileSource instanceof File) { |
91 | | - return sha256Hex(await fileSource.arrayBuffer()); |
92 | | - } else if (fileSource instanceof Uint8Array) { |
93 | | - return sha256Hex(fileSource); |
94 | | - } |
95 | | - return null; |
96 | | -} |
97 | | - |
98 | 72 | class SuperConverter { |
99 | 73 | static allowedElements = Object.freeze({ |
100 | 74 | 'w:document': 'doc', |
@@ -778,32 +752,44 @@ class SuperConverter { |
778 | 752 |
|
779 | 753 | /** |
780 | 754 | * Generate identifier hash from documentGuid and dcterms:created |
781 | | - * Uses SHA-256 of the combined string for a compact identifier |
| 755 | + * Uses CRC32 of the combined string for a compact identifier |
782 | 756 | * Only call when both documentGuid and timestamp exist |
783 | 757 | * @returns {string} Hash identifier in format "HASH-XXXXXXXX" |
784 | 758 | */ |
785 | | - async #generateIdentifierHash() { |
| 759 | + #generateIdentifierHash() { |
786 | 760 | const combined = `${this.documentGuid}|${this.getDocumentCreatedTimestamp()}`; |
787 | | - const hash = await hashString(combined); |
788 | | - return `HASH-${hash.substring(0, 8)}`; |
| 761 | + const buffer = Buffer.from(combined, 'utf8'); |
| 762 | + const hash = crc32(buffer); |
| 763 | + return `HASH-${hash.toString('hex').toUpperCase()}`; |
789 | 764 | } |
790 | 765 |
|
791 | 766 | /** |
792 | 767 | * Generate content hash from file bytes |
793 | | - * Uses SHA-256 of the raw file content for a stable identifier |
| 768 | + * Uses CRC32 of the raw file content for a stable identifier |
794 | 769 | * @returns {Promise<string>} Hash identifier in format "HASH-XXXXXXXX" |
795 | 770 | */ |
796 | 771 | async #generateContentHash() { |
797 | 772 | if (!this.fileSource) { |
| 773 | + // No file source available, generate a random hash (last resort) |
798 | 774 | return `HASH-${uuidv4().replace(/-/g, '').substring(0, 8).toUpperCase()}`; |
799 | 775 | } |
800 | 776 |
|
801 | 777 | try { |
802 | | - const hash = await hashFile(this.fileSource); |
803 | | - if (!hash) { |
| 778 | + let buffer; |
| 779 | + |
| 780 | + if (Buffer.isBuffer(this.fileSource)) { |
| 781 | + buffer = this.fileSource; |
| 782 | + } else if (this.fileSource instanceof ArrayBuffer) { |
| 783 | + buffer = Buffer.from(this.fileSource); |
| 784 | + } else if (this.fileSource instanceof Blob || this.fileSource instanceof File) { |
| 785 | + const arrayBuffer = await this.fileSource.arrayBuffer(); |
| 786 | + buffer = Buffer.from(arrayBuffer); |
| 787 | + } else { |
804 | 788 | return `HASH-${uuidv4().replace(/-/g, '').substring(0, 8).toUpperCase()}`; |
805 | 789 | } |
806 | | - return `HASH-${hash.substring(0, 8)}`; |
| 790 | + |
| 791 | + const hash = crc32(buffer); |
| 792 | + return `HASH-${hash.toString('hex').toUpperCase()}`; |
807 | 793 | } catch (e) { |
808 | 794 | console.warn('[super-converter] Could not generate content hash:', e); |
809 | 795 | return `HASH-${uuidv4().replace(/-/g, '').substring(0, 8).toUpperCase()}`; |
@@ -835,7 +821,7 @@ class SuperConverter { |
835 | 821 |
|
836 | 822 | if (hasGuid && hasTimestamp) { |
837 | 823 | // Both exist: use identifierHash |
838 | | - this.documentUniqueIdentifier = await this.#generateIdentifierHash(); |
| 824 | + this.documentUniqueIdentifier = this.#generateIdentifierHash(); |
839 | 825 | } else { |
840 | 826 | // Missing one or both: use contentHash for stability (same file = same hash) |
841 | 827 | // But generate missing metadata so re-exported file will have complete metadata |
|
0 commit comments