|
1 | 1 | import JUSTC from 'justc'; |
2 | | - |
3 | | -const name__ = 'JSSC'; |
4 | | -const prefix = name__+': '; |
| 2 | +import { name__, prefix } from './meta'; |
5 | 3 | if ((String.fromCharCode(65536).charCodeAt(0) === 65536) || !(String.fromCharCode(256).charCodeAt(0) === 256)) { |
6 | 4 | throw new Error(prefix+'Supported UTF-16 only!') |
7 | 5 | } |
8 | 6 |
|
9 | | -function stringCodes(str) { |
10 | | - let output = []; |
11 | | - let max = 0; |
12 | | - let maxCharCode = 0; |
13 | | - let min = Infinity; |
14 | | - String(str).split('').forEach(char => { |
15 | | - const code = char.charCodeAt(); |
16 | | - output.push(code); |
17 | | - max = Math.max(max, code.toString().length); |
18 | | - maxCharCode = Math.max(maxCharCode, code); |
19 | | - min = Math.min(min, code.toString().length); |
20 | | - }); |
21 | | - return {max, output, maxCharCode, min}; |
22 | | -} |
23 | | - |
24 | | -function codesString(cds) { |
25 | | - let output = ''; |
26 | | - cds.forEach(code => { |
27 | | - output += String.fromCharCode(code); |
28 | | - }); |
29 | | - return output |
30 | | -} |
31 | | - |
32 | | -function charCode(num) { |
33 | | - return String.fromCharCode(num + 32); |
34 | | -} |
35 | | -function checkChar(cde) { |
36 | | - return cde % 65535 === cde |
37 | | -} |
38 | | - |
39 | | -function stringChunks(str, num) { |
40 | | - const output = []; |
41 | | - for (let i = 0; i < str.length; i += num) { |
42 | | - output.push(str.slice(i, i + num)) |
43 | | - } |
44 | | - return output |
45 | | -} |
46 | | -function chunkArray(array, num) { |
47 | | - const result = []; |
48 | | - for (let i = 0; i < array.length; i += num) { |
49 | | - result.push(array.slice(i, i + num)); |
50 | | - } |
51 | | - return result; |
52 | | -} |
53 | | - |
54 | | -function decToBin(num, wnum) { |
55 | | - return num.toString(2).padStart(wnum, '0'); |
56 | | -} |
57 | | -function binToDec(str) { |
58 | | - return parseInt(str, 2); |
59 | | -} |
| 7 | +import { |
| 8 | + stringCodes, |
| 9 | + codesString, |
| 10 | + charCode, |
| 11 | + checkChar, |
| 12 | + stringChunks, |
| 13 | + chunkArray, |
| 14 | + decToBin, |
| 15 | + binToDec |
| 16 | +} from './utils'; |
60 | 17 |
|
61 | 18 | function charsBase() { |
62 | 19 | const charsBase = {}; |
@@ -596,116 +553,7 @@ function decompressSequences(str) { |
596 | 553 | return result; |
597 | 554 | } |
598 | 555 |
|
599 | | -const freqMap = { |
600 | | - ESCAPE_BYTE: 0xFF, |
601 | | - TOP_COUNT: 254, |
602 | | - SPLITTER: " \u200B", |
603 | | - |
604 | | - compress(text, splitter = this.SPLITTER) { |
605 | | - const freq = {}; |
606 | | - for (let char of text) { |
607 | | - freq[char] = (freq[char] || 0) + 1; |
608 | | - } |
609 | | - |
610 | | - const topChars = Object.entries(freq) |
611 | | - .sort((a, b) => b[1] - a[1]) |
612 | | - .slice(0, this.TOP_COUNT) |
613 | | - .map(entry => entry[0]); |
614 | | - |
615 | | - const charToIndex = new Map(topChars.map((char, i) => [char, i])); |
616 | | - |
617 | | - let header = String.fromCharCode(topChars.length) + topChars.join(''); |
618 | | - |
619 | | - let bytes = []; |
620 | | - for (let char of text) { |
621 | | - if (charToIndex.has(char)) { |
622 | | - /* frequent */ |
623 | | - bytes.push(charToIndex.get(char)); |
624 | | - } else { |
625 | | - /* rare */ |
626 | | - bytes.push(this.ESCAPE_BYTE); |
627 | | - const code = char.charCodeAt(0); |
628 | | - bytes.push((code >> 8) & 0xFF); |
629 | | - bytes.push(code & 0xFF); |
630 | | - } |
631 | | - } |
632 | | - |
633 | | - /* to UTF16 */ |
634 | | - let compressedBody = ""; |
635 | | - for (let i = 0; i < bytes.length; i += 2) { |
636 | | - const b1 = bytes[i]; |
637 | | - const b2 = (i + 1 < bytes.length) ? bytes[i + 1] : 0x00; |
638 | | - compressedBody += String.fromCharCode((b1 << 8) | b2); |
639 | | - } |
640 | | - |
641 | | - return header + splitter + compressedBody; |
642 | | - }, |
643 | | - |
644 | | - decompress(compressedText, splitter = this.SPLITTER) { |
645 | | - const parts = compressedText.split(splitter); |
646 | | - |
647 | | - if (parts.length < 2) { |
648 | | - throw new Error(prefix+'Invalid freqMap data: splitter not found'); |
649 | | - } |
650 | | - |
651 | | - const headerPart = parts[0]; |
652 | | - const bodyPart = parts.slice(1).join(splitter); |
653 | | - |
654 | | - const topCount = headerPart.charCodeAt(0); |
655 | | - const topChars = headerPart.substring(1, topCount + 1); |
656 | | - |
657 | | - let bytes = []; |
658 | | - for (let i = 0; i < bodyPart.length; i++) { |
659 | | - const code = bodyPart.charCodeAt(i); |
660 | | - bytes.push((code >> 8) & 0xFF); |
661 | | - bytes.push(code & 0xFF); |
662 | | - } |
663 | | - |
664 | | - let result = ""; |
665 | | - for (let i = 0; i < bytes.length; i++) { |
666 | | - const b = bytes[i]; |
667 | | - if (b === this.ESCAPE_BYTE) { |
668 | | - const charCode = (bytes[i + 1] << 8) | bytes[i + 2]; |
669 | | - result += String.fromCharCode(charCode); |
670 | | - i += 2; |
671 | | - } else if (b < topCount) { |
672 | | - result += topChars[b]; |
673 | | - } |
674 | | - } |
675 | | - return result; |
676 | | - }, |
677 | | - |
678 | | - /** |
679 | | - * 0 = Fail |
680 | | - * 1 = Success |
681 | | - * 2 = Remove last character (Success) |
682 | | - * @param {string} text |
683 | | - * @param {string?} splitter |
684 | | - * @returns {number|[number, number, string, string]} |
685 | | - */ |
686 | | - test(text, splitter = this.SPLITTER) { |
687 | | - try { |
688 | | - if (text.includes(splitter)) return 0; |
689 | | - const packed = this.compress(text, splitter); |
690 | | - const unpacked = this.decompress(packed, splitter); |
691 | | - if (packed.length < text.length) { |
692 | | - if (unpacked == text) return [1, packed.length, splitter, packed]; |
693 | | - else if (unpacked.slice(0,-1) == text) return [2, packed.length, splitter, packed]; |
694 | | - else return 0; |
695 | | - } |
696 | | - return 0; |
697 | | - } catch (_) { |
698 | | - return 0; |
699 | | - } |
700 | | - } |
701 | | -}; |
702 | | - |
703 | | -const freqMapSplitters = [ |
704 | | - " \u200B","\u0000", |
705 | | - "\u001F", "\u0001", |
706 | | - "\uFFFD", "\u2022", |
707 | | - "|§|", "\uFEFF" |
708 | | -]; |
| 556 | +import { freqMap, freqMapSplitters } from './freqMap'; |
709 | 557 |
|
710 | 558 | function segments(str) { |
711 | 559 | if (typeof str !== 'string' || str.length === 0) return []; |
|
0 commit comments