|
| 1 | +import { detectLanguage, getLanguageByAlias, getLanguageEditorId, languages } from '../languages'; |
| 2 | +import type { ContentConfig } from '../models'; |
| 3 | +import { blobToBase64, loadScript } from '../utils/utils'; |
| 4 | +import { metaPngUrl, tesseractUrl } from '../vendors'; |
| 5 | +import { importCompressedCode } from './code'; |
| 6 | +import { importProject } from './project-id'; |
| 7 | + |
| 8 | +let Tesseract: |
| 9 | + | { |
| 10 | + createWorker: (lang: string) => Promise<{ |
| 11 | + recognize: (blob: Blob) => Promise<{ data: { text: string } }>; |
| 12 | + terminate: () => void; |
| 13 | + }>; |
| 14 | + } |
| 15 | + | undefined; |
| 16 | + |
| 17 | +const ocr = async (image: Blob) => { |
| 18 | + Tesseract = Tesseract ?? (await import(tesseractUrl)).default; |
| 19 | + if (!Tesseract) return ''; |
| 20 | + const worker = await Tesseract.createWorker('eng'); |
| 21 | + const ret = await worker.recognize(image); |
| 22 | + worker.terminate(); |
| 23 | + return ret.data.text; |
| 24 | +}; |
| 25 | + |
| 26 | +/** |
| 27 | + * detect images created by LiveCodes "Code to Image" with share URL |
| 28 | + */ |
| 29 | +const getConfigFromShareUrl = (text: string, isShareUrl = false) => { |
| 30 | + const shareUrlPattern = /\?x=(id\/\S{11,20})/g; |
| 31 | + let projectId = [...text.matchAll(new RegExp(shareUrlPattern))].at(-1)?.[1]; |
| 32 | + if (projectId) { |
| 33 | + projectId = projectId.replace(/]/g, 'j'); |
| 34 | + const alphabet = '23456789abcdefghijkmnpqrstuvwxyz'; |
| 35 | + if ( |
| 36 | + projectId |
| 37 | + .slice('id/'.length) |
| 38 | + .split('') |
| 39 | + .every((c) => alphabet.includes(c)) |
| 40 | + ) { |
| 41 | + return importProject(projectId); |
| 42 | + } |
| 43 | + } |
| 44 | + if (isShareUrl) { |
| 45 | + try { |
| 46 | + const url = new URL(text.trim()); |
| 47 | + const code = decodeURIComponent(url.href.split('#config=')[1] || ''); |
| 48 | + if (code) { |
| 49 | + return importCompressedCode(code); |
| 50 | + } |
| 51 | + } catch { |
| 52 | + // |
| 53 | + } |
| 54 | + } |
| 55 | + return null; |
| 56 | +}; |
| 57 | + |
| 58 | +const cleanUpCode = async (code: string) => { |
| 59 | + if (!code?.trim()) return ''; |
| 60 | + let lines = code.trim().split('\n'); |
| 61 | + const [firstLine, ...rest] = lines; |
| 62 | + const lastLines = lines.slice(-2).join('\n'); |
| 63 | + |
| 64 | + const config = await getConfigFromShareUrl(lastLines); |
| 65 | + if (config) return config; |
| 66 | + |
| 67 | + // remove first line if it contains window buttons |
| 68 | + const buttonCharacters = ['0', 'C', 'N', 'J', 'X', '(', ')', '[', ']', '|']; |
| 69 | + const charactersFound = firstLine |
| 70 | + .slice(0, 6) |
| 71 | + .split('') |
| 72 | + .filter((c) => buttonCharacters.includes(c)).length; |
| 73 | + const hasButtons = charactersFound > 2 || charactersFound / firstLine.length > 0.6; |
| 74 | + if (hasButtons) { |
| 75 | + code = rest.join('\n'); |
| 76 | + } |
| 77 | + |
| 78 | + lines = code.trim().split('\n'); |
| 79 | + |
| 80 | + // remove line numbers |
| 81 | + if (lines.filter((l) => l.match(/^[0-9]{1,4}\s?/)).length / lines.length > 0.3) { |
| 82 | + code = lines.map((l) => l.replace(/^\S{1,4}\s?/, '')).join('\n'); |
| 83 | + } |
| 84 | + |
| 85 | + code = code.replace(/[‘’]/g, "'").replace(/[“”]/g, '"'); |
| 86 | + return code; |
| 87 | +}; |
| 88 | + |
| 89 | +export const importFromImage = async (blob: Blob): Promise<Partial<ContentConfig>> => { |
| 90 | + try { |
| 91 | + const metaPng: any = await loadScript(metaPngUrl, 'MetaPNG'); |
| 92 | + const arrayBuffer = await blob.arrayBuffer(); |
| 93 | + const uint8Array = new Uint8Array(arrayBuffer); |
| 94 | + const livecodesUrl = metaPng.getMetadata(uint8Array, 'LiveCodes URL'); |
| 95 | + if (livecodesUrl) { |
| 96 | + const config = await getConfigFromShareUrl(livecodesUrl, true); |
| 97 | + if (config) return config; |
| 98 | + } |
| 99 | + } catch { |
| 100 | + // not PNG or not generated by LiveCodes, continue |
| 101 | + } |
| 102 | + |
| 103 | + try { |
| 104 | + const text = await ocr(blob); |
| 105 | + const content = await cleanUpCode(text); |
| 106 | + if (content && typeof content === 'object') { |
| 107 | + // config from share url |
| 108 | + return content; |
| 109 | + } |
| 110 | + |
| 111 | + if (content.trim().length > 3) { |
| 112 | + const langs = languages.map((lang) => lang.name); |
| 113 | + const detected = await detectLanguage(content, langs); |
| 114 | + detected.language = getLanguageByAlias(detected.language) || detected.language; |
| 115 | + detected.secondBest = getLanguageByAlias(detected.secondBest) || detected.secondBest; |
| 116 | + // language name or filename with extension in image |
| 117 | + const langNamesInCode = languages |
| 118 | + .filter( |
| 119 | + (lang) => |
| 120 | + content.search(new RegExp(`\\b${lang.name}\\b`, 'i')) !== -1 || |
| 121 | + content.search(new RegExp(`\\b${lang.extensions[0]}\\b`, 'i')) !== -1, |
| 122 | + ) |
| 123 | + .map((lang) => lang.name); |
| 124 | + const language = |
| 125 | + langNamesInCode.find( |
| 126 | + (lang) => lang === detected.language || lang === detected.secondBest, |
| 127 | + ) ?? |
| 128 | + langNamesInCode[0] ?? |
| 129 | + detected.language ?? |
| 130 | + detected.secondBest ?? |
| 131 | + 'html'; |
| 132 | + |
| 133 | + const editorId = getLanguageEditorId(language) ?? 'markup'; |
| 134 | + return { |
| 135 | + activeEditor: editorId, |
| 136 | + [editorId]: { |
| 137 | + language, |
| 138 | + content, |
| 139 | + }, |
| 140 | + }; |
| 141 | + } |
| 142 | + } catch { |
| 143 | + // |
| 144 | + } |
| 145 | + |
| 146 | + // fallback |
| 147 | + return { |
| 148 | + markup: { |
| 149 | + language: 'html', |
| 150 | + content: `<img src="${await blobToBase64(blob)}" alt="image" />`, |
| 151 | + }, |
| 152 | + }; |
| 153 | +}; |
0 commit comments