Skip to content

Commit 64d68ac

Browse files
committed
Initial implementation using polling
1 parent 28aeec4 commit 64d68ac

1 file changed

Lines changed: 134 additions & 9 deletions

File tree

src/plugin.js

Lines changed: 134 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,158 @@
1+
const { Buffer } = require('node:buffer')
2+
const { setTimeout } = require('node:timers/promises')
3+
4+
const TEXT_TITAN = 356425
5+
const PRINT_M1 = 39995
6+
7+
const md5 = (data, encoding = 'hex') => {
8+
let hash = createHash('md5')
9+
hash.update(data)
10+
return hash.digest(encoding)
11+
}
12+
113
export default class ArtimiPlugin {
214
constructor (options, context) {
315
this.options = Object.assign({}, ArtimiPlugin.defaults, options)
416
this.context = context
17+
this.controller = new AbortController()
518
}
619

720
async transcribe (transcription, { buffer, ...raw }) {
21+
let { logger } = this.context
22+
let { host, token, htr, model } = this.options
23+
24+
let images = [await this.upload(buffer)]
25+
let config = {
26+
model: model || (htr ? TEXT_TITAN : PRINT_M1)
27+
}
28+
29+
logger.info('Submit transcription request...')
30+
let res = await fetch(`${host}/transcription`, {
31+
method: 'POST',
32+
headers: {
33+
'Authorization': `Bearer ${token}`,
34+
'Content-Type': 'application/json'
35+
},
36+
body: JSON.stringify({ config, images })
37+
})
38+
39+
if (!res.ok) {
40+
throw new Error(await res.text())
41+
}
42+
43+
let job = await res.json()
44+
transcription.config.jobId = job.id
45+
46+
logger.info(`Fetch transcription #${job.id}...`)
47+
await this.poll(job)
48+
49+
transcription.text = job.output.text
50+
transcription.data = job.output.alto
51+
}
52+
53+
async poll (job) {
54+
let { logger } = this.context
55+
let { host, interval, maxRetries } = this.options
56+
57+
let numRetries = 0
58+
59+
while (true) {
60+
let next
61+
62+
try {
63+
let res = await fetch(`${host}/transcription/${job.id}`, {
64+
headers: {
65+
Authorization: `Bearer ${token}`
66+
}
67+
})
68+
69+
if (!res.ok) {
70+
throw new Error(await res.text())
71+
}
72+
73+
next = await res.json()
74+
} catch (err) {
75+
logger.error({ stack: err.stack }, `Request failed: ${err.message}`)
76+
if (++numRetries > maxRetries) {
77+
throw err
78+
}
79+
}
80+
81+
switch (next.state) {
82+
case 'completed':
83+
return next
84+
case 'created':
85+
case 'active':
86+
case 'paused':
87+
break
88+
default:
89+
throw new Error(`transcription request state "${next.state}"`)
90+
}
91+
92+
await new Promise(resolve => {
93+
setTimeout(interval, null, {
94+
signal: this.controller.signal
95+
}).then(resolve, resolve)
96+
})
97+
}
98+
}
99+
100+
async upload (buffer) {
8101
let { logger, sharp } = this.context
102+
let { host, token } = this.options
9103

10104
// TODO use PNG if image uses alpha
105+
let type = 'jpeg'
11106
let image = await sharp.toBuffer('jpeg', buffer, { raw })
12107

13-
logger.info('Upload image to cache...')
14-
logger.info('Submit transcription request...')
15-
logger.info('Fetch transcription...')
108+
let checksum = md5(image)
109+
let contentMd5 = Buffer.from(checksum, 'hex').toString('base64')
110+
let contentType = `image/${type}`
16111

17-
// logger.info(`transkribus process id set to ${proc.id}`)
18-
// transcription.config.proc = proc.id
19-
// await session.poll(proc)
112+
let res = await fetch(`${host}/uploads/${checksum}.${type}`, {
113+
method: 'PUT',
114+
redirect: 'manual',
115+
headers: {
116+
'Authorization': `Bearer ${token}`,
117+
'X-Content-Length': image.length,
118+
'Content-MD5': contentMd5,
119+
'Content-Type': contentType
120+
}
121+
})
20122

21-
// transcription.text = proc.content.text
22-
// transcription.data = await session.alto(proc)
123+
switch (res.status) {
124+
case 204:
125+
logger.info('Image already cached...')
126+
return `${checksum}.${type}`
127+
case 307:
128+
logger.info('Upload image to cache...')
129+
res = await fetch(res.headers.get('location'), {
130+
method: 'PUT',
131+
body: image,
132+
headers: {
133+
'Content-MD5': contentMd5,
134+
'Content-Type': contentType
135+
}
136+
})
137+
if (!res.ok) {
138+
throw new Error(await res.text())
139+
}
140+
return `${checksum}.${type}`
141+
default:
142+
throw new Error(await res.text())
143+
}
23144
}
24145

25146
async unload () {
147+
this.conroller.abort()
26148
}
27149
}
28150

29151
ArtimiPlugin.defaults = {
30152
htr: true,
31153
model: null,
32-
token: null
154+
token: null,
155+
interval: 10_000,
156+
maxRetries: 3,
157+
host: 'http://localhost:3000'
33158
}

0 commit comments

Comments
 (0)