Skip to content

Commit aef91c2

Browse files
authored
Merge pull request #837 from live-codes/import-image
import code in images (OCR)
2 parents b793193 + b192195 commit aef91c2

14 files changed

Lines changed: 246 additions & 27 deletions

File tree

docs/docs/features/import.mdx

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,10 @@ Import is supported from any of the following:
7878
- [Shared projects](./share.mdx)
7979
- Raw code
8080
- Code in web page DOM
81-
- Projects shared in official playgrounds of [TypeScript](https://www.typescriptlang.org/play) and [Vue](https://play.vuejs.org/)
8281
- Local file(s)
8382
- Code in zip file (Local or URL)
83+
- Code in image - OCR (Local or URL)
84+
- Projects shared in official playgrounds of [TypeScript](https://www.typescriptlang.org/play) and [Vue](https://play.vuejs.org/)
8485
- [Exported project JSON](./export.mdx) (single project and bulk import)
8586

8687
Import sources are identified by URL patterns (e.g. origin, pathname and extension).
@@ -175,6 +176,14 @@ Currently, CodePen API does not allow directly importing code from Pens. However
175176

176177
**Note:** External resources (styles/scripts) are not exported with source code in zip file export of CodePen. However, export to GitHub gist does export these. So if a Pen with external resources exported as zip file is not imported properly, try exporting to GitHub gist or manually add the [external resources](./external-resources.mdx).
177178

179+
## Import Code from Image (OCR)
180+
181+
Code can be extracted from images (local or via URL) using [Tesseract.js](https://github.com/naptha/tesseract.js), a library for Optical Character Recognition (OCR).
182+
To ensure accurate identification, the text in the image should be clear, have high contrast against the background, and be free from unrelated text.
183+
Language detection is performed using [highlight.js](https://highlightjs.readthedocs.io/en/latest/api.html#highlightauto), which makes its best guess based on the content.
184+
185+
Best results are obtained when the image is generated using LiveCodes "[Code to Image](./code-to-image.mdx)" feature.
186+
178187
## Import Exported LiveCodes Projects
179188

180189
A [single project exported as JSON](./export.mdx#exporting-a-single-project) can be imported in the same or a different device from the import screen under the tab "Import Project JSON". The JSON file can be supplied as a local file upload or from a URL.

src/livecodes/UI/code-to-image.ts

Lines changed: 32 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ import {
2121
loadScript,
2222
loadStylesheet,
2323
} from '../utils';
24-
import { colorisBaseUrl, htmlToImageUrl } from '../vendors';
24+
import { colorisBaseUrl, htmlToImageUrl, metaPngUrl } from '../vendors';
2525

2626
type PreviewEditorOptions = Pick<
2727
EditorOptions,
@@ -75,7 +75,7 @@ export const createCodeToImageUI = async ({
7575
deps: {
7676
createEditor: (options: PreviewEditorOptions) => Promise<CodeEditor>;
7777
getFormatFn: () => Promise<FormatFn>;
78-
getShareUrl: (config: Partial<Config>) => Promise<string>;
78+
getShareUrl: (config: Partial<Config>, shortUrl?: boolean) => Promise<string>;
7979
getSavedPreset: () => Partial<Preset> | undefined;
8080
savePreset: (preset: Partial<Preset>) => void;
8181
};
@@ -214,6 +214,7 @@ export const createCodeToImageUI = async ({
214214
const initializeEditor = async (options: Preset) => {
215215
const ed = await deps.createEditor(getEditorOptions(options));
216216
if (ed.getValue().trim() === '') {
217+
editorId = 'script';
217218
ed.setLanguage('tsx', defaultCode);
218219
}
219220
deps.getFormatFn().then((fn) => {
@@ -306,17 +307,14 @@ export const createCodeToImageUI = async ({
306307
};
307308
updateWatermark(currentUrl);
308309

309-
const getCodeConfig = (): Partial<Config> => {
310-
const language = editor.getLanguage();
311-
return {
312-
title: windowControls.querySelector('#code-to-img-title')!.textContent || '',
313-
activeEditor: editorId,
314-
[editorId]: {
315-
language,
316-
content: editor.getValue(),
317-
},
318-
};
319-
};
310+
const getCodeConfig = (): Partial<Config> => ({
311+
title: windowControls.querySelector('#code-to-img-title')!.textContent || '',
312+
activeEditor: editorId,
313+
[editorId]: {
314+
language: editor.getLanguage(),
315+
content: editor.getValue(),
316+
},
317+
});
320318
let cachedConfig: Partial<Config> | undefined;
321319

322320
let formData: Preset;
@@ -424,7 +422,7 @@ export const createCodeToImageUI = async ({
424422
const newConfig = getCodeConfig();
425423
if (formData.watermark && JSON.stringify(cachedConfig) !== JSON.stringify(newConfig)) {
426424
cachedConfig = newConfig;
427-
const url = await deps.getShareUrl(newConfig);
425+
const url = await deps.getShareUrl(newConfig, /* shortUrl = */ true);
428426
updateWatermark(url);
429427
}
430428
};
@@ -449,6 +447,7 @@ export const createCodeToImageUI = async ({
449447
eventsManager.addEventListener(window, 'resize', () => adjustSize(getFormData(), true));
450448

451449
const htmlToImagePromise = loadScript(htmlToImageUrl, 'htmlToImage');
450+
const metaPngPromise = loadScript(metaPngUrl, 'MetaPNG');
452451

453452
const getImageUrl = async () => {
454453
const htmlToImage: any = await htmlToImagePromise;
@@ -465,7 +464,7 @@ export const createCodeToImageUI = async ({
465464
svg: 'toSvg',
466465
};
467466

468-
return htmlToImage[methodNames[formData.format] || 'toPng'](container, {
467+
let dataUrl = await htmlToImage[methodNames[formData.format] || 'toPng'](container, {
469468
quality: 1,
470469
width: width * scale,
471470
height: height * scale,
@@ -477,6 +476,24 @@ export const createCodeToImageUI = async ({
477476
height: `${height}px`,
478477
},
479478
});
479+
480+
if (formData.format === 'png') {
481+
try {
482+
const metaPng: any = await metaPngPromise;
483+
const newConfig = getCodeConfig();
484+
let url: string | undefined;
485+
if (formData.watermark && JSON.stringify(cachedConfig) === JSON.stringify(newConfig)) {
486+
url = watermark.innerText.trim();
487+
} else {
488+
url = await deps.getShareUrl(newConfig, formData.watermark);
489+
}
490+
dataUrl = metaPng.addMetadataFromBase64DataURI(dataUrl, 'LiveCodes URL', url);
491+
} catch {
492+
// could not add PNG metadata
493+
}
494+
}
495+
496+
return dataUrl;
480497
};
481498

482499
const saveBtn = codeToImageContainer.querySelector<HTMLButtonElement>('#code-to-img-save-btn')!;

src/livecodes/UI/import.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ export const createImportUI = ({
8484
e.preventDefault();
8585
const buttonText = importButton.innerHTML;
8686
importButton.innerHTML = window.deps.translateString('generic.loading', 'Loading...');
87+
notifications.info(window.deps.translateString('generic.loading', 'Loading...'));
8788
importButton.disabled = true;
8889
const importInput = getUrlImportInput(importContainer);
8990
const url = importInput.value;
@@ -111,7 +112,7 @@ export const createImportUI = ({
111112
const codeImportInput = getCodeImportInput(importContainer);
112113
eventsManager.addEventListener(codeImportInput, 'change', () => {
113114
if (!codeImportInput.files?.length) return;
114-
115+
notifications.info(window.deps.translateString('generic.loading', 'Loading...'));
115116
importFromFiles(codeImportInput.files, populateConfig, eventsManager)
116117
.then(loadConfig)
117118
.then(modal.close)
@@ -126,6 +127,7 @@ export const createImportUI = ({
126127
e.preventDefault();
127128
const buttonText = importJsonUrlButton.innerHTML;
128129
importJsonUrlButton.innerHTML = window.deps.translateString('generic.loading', 'Loading...');
130+
notifications.info(window.deps.translateString('generic.loading', 'Loading...'));
129131
importJsonUrlButton.disabled = true;
130132
const importInput = getImportJsonUrlInput(importContainer);
131133
const url = importInput.value;
@@ -242,6 +244,7 @@ export const createImportUI = ({
242244

243245
const fileInput = getImportFileInput(importContainer);
244246
eventsManager.addEventListener(fileInput, 'change', () => {
247+
notifications.info(window.deps.translateString('generic.loading', 'Loading...'));
245248
loadFile<Config>(fileInput)
246249
.then(loadConfig)
247250
.then(modal.close)

src/livecodes/core.ts

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4279,9 +4279,12 @@ const handleCodeToImage = () => {
42794279

42804280
const currentUrl = (location.origin + location.pathname).split('/').slice(0, -1).join('/');
42814281

4282-
const getShareUrl = async (config: Partial<Config>) => {
4283-
const param = '/?x=id/' + (await shareService.shareProject(config));
4284-
return currentUrl + param;
4282+
const getShareUrl = async (config: Partial<Config>, shortUrl = true) => {
4283+
if (shortUrl) {
4284+
const param = '/?x=id/' + (await shareService.shareProject(config));
4285+
return currentUrl + param;
4286+
}
4287+
return getPlaygroundUrl({ appUrl: currentUrl, config });
42854288
};
42864289

42874290
const codeToImageModule: typeof import('./UI/code-to-image') = await import(

src/livecodes/html/import.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
<li>Raw code</li>
5656
<li>Code in web page DOM</li>
5757
<li>Code in zip file</li>
58+
<li>Code in image (OCR)</li>
5859
<li>Official playgrounds<br />(TypeScript and Vue)</li>
5960
</ul>
6061
Please visit the

src/livecodes/i18n/locales/en/translation.lokalise.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1657,8 +1657,8 @@
16571657
"translation": "Bulk import started..."
16581658
},
16591659
"import.code.desc": {
1660-
"notes": "### <tag-1> ###\n<ul />\n\n### <tag-2> ###\n<li />\n\n### <tag-3> ###\n<li />\n\n### <tag-4> ###\n<li />\n\n### <tag-5> ###\n<li />\n\n### <tag-6> ###\n<li />\n\n### <tag-7> ###\n<li />\n\n### <tag-8> ###\n<li />\n\n### <tag-9> ###\n<li />\n\n### <tag-10> ###\n<li />\n\n### <tag-11> ###\n<li />\n\n### <tag-12> ###\n<li />\n\n### <tag-13> ###\n<br />\n\n### <tag-14> ###\n<a href=\"{{DOCS_BASE_URL}}features/import\" target=\"_blank\" rel=\"noopener\" />\n\n",
1661-
"translation": "Supported Sources: <tag-1> <tag-2>GitHub gist</tag-2> <tag-3>GitHub file</tag-3> <tag-4>Directory in a GitHub repo</tag-4> <tag-5>Gitlab snippet</tag-5> <tag-6>Gitlab file</tag-6> <tag-7>Directory in a Gitlab repo</tag-7> <tag-8>JS Bin</tag-8> <tag-9>Raw code</tag-9> <tag-10>Code in web page DOM</tag-10> <tag-11>Code in zip file</tag-11> <tag-12>Official playgrounds<tag-13></tag-13>(TypeScript and Vue)</tag-12> </tag-1> Please visit the <tag-14>documentations</tag-14> for details."
1660+
"notes": "### <tag-1> ###\n<ul />\n\n### <tag-2> ###\n<li />\n\n### <tag-3> ###\n<li />\n\n### <tag-4> ###\n<li />\n\n### <tag-5> ###\n<li />\n\n### <tag-6> ###\n<li />\n\n### <tag-7> ###\n<li />\n\n### <tag-8> ###\n<li />\n\n### <tag-9> ###\n<li />\n\n### <tag-10> ###\n<li />\n\n### <tag-11> ###\n<li />\n\n### <tag-12> ###\n<li />\n\n### <tag-13> ###\n<li />\n\n### <tag-14> ###\n<br />\n\n### <tag-15> ###\n<a href=\"{{DOCS_BASE_URL}}features/import\" target=\"_blank\" rel=\"noopener\" />\n\n",
1661+
"translation": "Supported Sources: <tag-1> <tag-2>GitHub gist</tag-2> <tag-3>GitHub file</tag-3> <tag-4>Directory in a GitHub repo</tag-4> <tag-5>Gitlab snippet</tag-5> <tag-6>Gitlab file</tag-6> <tag-7>Directory in a Gitlab repo</tag-7> <tag-8>JS Bin</tag-8> <tag-9>Raw code</tag-9> <tag-10>Code in web page DOM</tag-10> <tag-11>Code in zip file</tag-11> <tag-12>Code in image (OCR)</tag-12> <tag-13>Official playgrounds<tag-14></tag-14>(TypeScript and Vue)</tag-13> </tag-1> Please visit the <tag-15>documentations</tag-15> for details."
16621662
},
16631663
"import.code.fromFile": {
16641664
"notes": "",

src/livecodes/i18n/locales/en/translation.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -647,7 +647,7 @@ const translation = {
647647
started: 'Bulk import started...',
648648
},
649649
code: {
650-
desc: 'Supported Sources: <1> <2>GitHub gist</2> <3>GitHub file</3> <4>Directory in a GitHub repo</4> <5>Gitlab snippet</5> <6>Gitlab file</6> <7>Directory in a Gitlab repo</7> <8>JS Bin</8> <9>Raw code</9> <10>Code in web page DOM</10> <11>Code in zip file</11> <12>Official playgrounds<13></13>(TypeScript and Vue)</12> </1> Please visit the <14>documentations</14> for details.',
650+
desc: 'Supported Sources: <1> <2>GitHub gist</2> <3>GitHub file</3> <4>Directory in a GitHub repo</4> <5>Gitlab snippet</5> <6>Gitlab file</6> <7>Directory in a Gitlab repo</7> <8>JS Bin</8> <9>Raw code</9> <10>Code in web page DOM</10> <11>Code in zip file</11> <12>Code in image (OCR)</12> <13>Official playgrounds<14></14>(TypeScript and Vue)</13> </1> Please visit the <15>documentations</15> for details.',
651651
fromFile: 'Import local files',
652652
fromURL: 'Import from URL',
653653
heading: 'Import Code',

src/livecodes/import/files.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import type { ContentConfig, EventsManager } from '../models';
2+
import { importFromImage } from './image';
23
import type { SourceFile, populateConfig as populateConfigFn } from './utils';
34
import { importFromZip } from './zip';
45

@@ -41,11 +42,18 @@ export const importFromFiles = async (
4142
});
4243

4344
const loadZipFile = (files: FileList) => importFromZip(files[0], populateConfig);
45+
const loadImage = (files: FileList) => importFromImage(files[0]);
4446

4547
if (!files?.length) return {};
4648

4749
const getConfigFromFiles =
48-
files?.length === 1 && files[0].name.endsWith('.zip') ? loadZipFile : loadFiles;
50+
files?.length > 1
51+
? loadFiles
52+
: files[0].name.endsWith('.zip')
53+
? loadZipFile
54+
: files[0].type.startsWith('image/') && files[0].type !== 'image/svg+xml'
55+
? loadImage
56+
: loadFiles;
4957

5058
return getConfigFromFiles(files);
5159
};

src/livecodes/import/image.ts

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
import { detectLanguage, getLanguageByAlias, getLanguageEditorId, languages } from '../languages';
2+
import type { ContentConfig } from '../models';
3+
import { blobToBase64, loadScript } from '../utils/utils';
4+
import { metaPngUrl, tesseractUrl } from '../vendors';
5+
import { importCompressedCode } from './code';
6+
import { importProject } from './project-id';
7+
8+
let Tesseract:
9+
| {
10+
createWorker: (lang: string) => Promise<{
11+
recognize: (blob: Blob) => Promise<{ data: { text: string } }>;
12+
terminate: () => void;
13+
}>;
14+
}
15+
| undefined;
16+
17+
const ocr = async (image: Blob) => {
18+
Tesseract = Tesseract ?? (await import(tesseractUrl)).default;
19+
if (!Tesseract) return '';
20+
const worker = await Tesseract.createWorker('eng');
21+
const ret = await worker.recognize(image);
22+
worker.terminate();
23+
return ret.data.text;
24+
};
25+
26+
/**
27+
* detect images created by LiveCodes "Code to Image" with share URL
28+
*/
29+
const getConfigFromShareUrl = (text: string, isShareUrl = false) => {
30+
const shareUrlPattern = /\?x=(id\/\S{11,20})/g;
31+
let projectId = [...text.matchAll(new RegExp(shareUrlPattern))].at(-1)?.[1];
32+
if (projectId) {
33+
projectId = projectId.replace(/]/g, 'j');
34+
const alphabet = '23456789abcdefghijkmnpqrstuvwxyz';
35+
if (
36+
projectId
37+
.slice('id/'.length)
38+
.split('')
39+
.every((c) => alphabet.includes(c))
40+
) {
41+
return importProject(projectId);
42+
}
43+
}
44+
if (isShareUrl) {
45+
try {
46+
const url = new URL(text.trim());
47+
const code = decodeURIComponent(url.href.split('#config=')[1] || '');
48+
if (code) {
49+
return importCompressedCode(code);
50+
}
51+
} catch {
52+
//
53+
}
54+
}
55+
return null;
56+
};
57+
58+
const cleanUpCode = async (code: string) => {
59+
if (!code?.trim()) return '';
60+
let lines = code.trim().split('\n');
61+
const [firstLine, ...rest] = lines;
62+
const lastLines = lines.slice(-2).join('\n');
63+
64+
const config = await getConfigFromShareUrl(lastLines);
65+
if (config) return config;
66+
67+
// remove first line if it contains window buttons
68+
const buttonCharacters = ['0', 'C', 'N', 'J', 'X', '(', ')', '[', ']', '|'];
69+
const charactersFound = firstLine
70+
.slice(0, 6)
71+
.split('')
72+
.filter((c) => buttonCharacters.includes(c)).length;
73+
const hasButtons = charactersFound > 2 || charactersFound / firstLine.length > 0.6;
74+
if (hasButtons) {
75+
code = rest.join('\n');
76+
}
77+
78+
lines = code.trim().split('\n');
79+
80+
// remove line numbers
81+
if (lines.filter((l) => l.match(/^[0-9]{1,4}\s?/)).length / lines.length > 0.3) {
82+
code = lines.map((l) => l.replace(/^\S{1,4}\s?/, '')).join('\n');
83+
}
84+
85+
code = code.replace(/[]/g, "'").replace(/[]/g, '"');
86+
return code;
87+
};
88+
89+
export const importFromImage = async (blob: Blob): Promise<Partial<ContentConfig>> => {
90+
try {
91+
const metaPng: any = await loadScript(metaPngUrl, 'MetaPNG');
92+
const arrayBuffer = await blob.arrayBuffer();
93+
const uint8Array = new Uint8Array(arrayBuffer);
94+
const livecodesUrl = metaPng.getMetadata(uint8Array, 'LiveCodes URL');
95+
if (livecodesUrl) {
96+
const config = await getConfigFromShareUrl(livecodesUrl, true);
97+
if (config) return config;
98+
}
99+
} catch {
100+
// not PNG or not generated by LiveCodes, continue
101+
}
102+
103+
try {
104+
const text = await ocr(blob);
105+
const content = await cleanUpCode(text);
106+
if (content && typeof content === 'object') {
107+
// config from share url
108+
return content;
109+
}
110+
111+
if (content.trim().length > 3) {
112+
const langs = languages.map((lang) => lang.name);
113+
const detected = await detectLanguage(content, langs);
114+
detected.language = getLanguageByAlias(detected.language) || detected.language;
115+
detected.secondBest = getLanguageByAlias(detected.secondBest) || detected.secondBest;
116+
// language name or filename with extension in image
117+
const langNamesInCode = languages
118+
.filter(
119+
(lang) =>
120+
content.search(new RegExp(`\\b${lang.name}\\b`, 'i')) !== -1 ||
121+
content.search(new RegExp(`\\b${lang.extensions[0]}\\b`, 'i')) !== -1,
122+
)
123+
.map((lang) => lang.name);
124+
const language =
125+
langNamesInCode.find(
126+
(lang) => lang === detected.language || lang === detected.secondBest,
127+
) ??
128+
langNamesInCode[0] ??
129+
detected.language ??
130+
detected.secondBest ??
131+
'html';
132+
133+
const editorId = getLanguageEditorId(language) ?? 'markup';
134+
return {
135+
activeEditor: editorId,
136+
[editorId]: {
137+
language,
138+
content,
139+
},
140+
};
141+
}
142+
} catch {
143+
//
144+
}
145+
146+
// fallback
147+
return {
148+
markup: {
149+
language: 'html',
150+
content: `<img src="${await blobToBase64(blob)}" alt="image" />`,
151+
},
152+
};
153+
};

src/livecodes/import/project-id.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { shareService } from '../services';
22

33
export const importProject = (url: string) => {
4-
const id = url.slice(3);
4+
const id = url.slice('id/'.length);
55
return shareService.getProject(id);
66
};

0 commit comments

Comments
 (0)