-
-
Notifications
You must be signed in to change notification settings - Fork 3k
Expand file tree
/
Copy pathExportHandler.ts
More file actions
193 lines (171 loc) · 7.69 KB
/
ExportHandler.ts
File metadata and controls
193 lines (171 loc) · 7.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
'use strict';
/**
* Handles the export requests
*/
/*
* 2011 Peter 'Pita' Martischka (Primary Technology Ltd)
* 2014 John McLear (Etherpad Foundation / McLear Ltd)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS-IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
const exporthtml = require('../utils/ExportHtml');
const exporttxt = require('../utils/ExportTxt');
const exportEtherpad = require('../utils/ExportEtherpad');
import crypto from 'node:crypto';
import fs from 'fs';
import settings from '../utils/Settings';
import os from 'os';
const hooks = require('../../static/js/pluginfw/hooks');
import util from 'util';
const { checkValidRev } = require('../utils/checkValidRev');
const fsp_writeFile = util.promisify(fs.writeFile);
const fsp_unlink = util.promisify(fs.unlink);
const tempDirectory = os.tmpdir();
/**
* do a requested export
* @param {Object} req the request object
* @param {Object} res the response object
* @param {String} padId the pad id to export
* @param {String} readOnlyId the read only id of the pad to export
* @param {String} type the type to export
*/
exports.doExport = async (req: any, res: any, padId: string, readOnlyId: string, type:string) => {
// avoid naming the read-only file as the original pad's id
let fileName = readOnlyId ? readOnlyId : padId;
// allow fileName to be overwritten by a hook, the type type is kept static for security reasons
const hookFileName = await hooks.aCallFirst('exportFileName', padId);
// if fileName is set then set it to the padId, note that fileName is returned as an array.
if (hookFileName.length) {
fileName = hookFileName;
}
// tell the browser that this is a downloadable file
res.attachment(`${fileName}.${type}`);
if (req.params.rev !== undefined) {
// ensure revision is a number
// modify req, as we use it in a later call to exportConvert
req.params.rev = checkValidRev(req.params.rev);
}
// if this is a plain text export, we can do this directly
// We have to over engineer this because tabs are stored as attributes and not plain text
if (type === 'etherpad') {
// Honor the :rev URL segment on `.etherpad` exports the same way the
// other formats already do — revNum limits the serialized pad to revs
// 0..rev (issue #5071).
const pad = await exportEtherpad.getPadRaw(padId, readOnlyId, req.params.rev);
res.send(pad);
} else if (type === 'txt') {
const txt = await exporttxt.getPadTXTDocument(padId, req.params.rev);
res.send(txt);
} else {
// render the html document
let html = await exporthtml.getPadHTMLDocument(padId, req.params.rev, readOnlyId);
// decide what to do with the html export
// if this is a html export, we can send this from here directly
if (type === 'html') {
// do any final changes the plugin might want to make
const newHTML = await hooks.aCallFirst('exportHTMLSend', html);
if (newHTML.length) html = newHTML;
res.send(html);
return;
}
// Soffice-first dispatch (issue #7538). When soffice is configured
// we keep the legacy convert-via-tempfile path; when it's not, we
// hand DOCX to html-to-docx and PDF to our pdfkit walker — both
// pure-JS, in-process. No fallback chain: native errors surface as
// 5xx so admins see real failures instead of silent shadowing.
const {sofficeAvailable} = require('../utils/Settings');
const sofState = sofficeAvailable();
const goNative = sofState === 'no'
|| (sofState === 'withoutPDF' && type === 'pdf');
if (goNative) {
const {
stripRemoteImages, extractBody, wrapLooseLines, dropEmptyBlocks,
applyMonospaceToCode,
} = require('../utils/ExportSanitizeHtml');
// The HTML pipeline returns a full document (head, style, body); the
// legacy soffice path renders that fine, but the in-process
// converters need just the body content to avoid leaking CSS into
// the output and to drop the document-level whitespace that creates
// stray paragraph breaks at the top of the result.
// dropEmptyBlocks strips heading-styled blank-line wrappers that
// ep_headings2 emits between every styled line.
const bodyHtml = dropEmptyBlocks(stripRemoteImages(extractBody(html)));
html = null;
try {
if (type === 'docx') {
// applyMonospaceToCode strips `<code>`/`<pre>`/`<tt>` wrappers
// (html-to-docx ignores them AND has a bug where it drops
// `<a href>` children of those tags) and emits styled
// monospace spans, forwarding any block-level alignment style
// to a wrapping `<p>`. Run BEFORE wrapLooseLines so the
// resulting `<p>` lands at the loose-line boundary instead
// of getting double-wrapped.
//
// wrapLooseLines then handles `<br>` semantics: bare `<br>`
// outside `<p>` becomes a soft break, `<br><br>` becomes a
// paragraph boundary plus blank-line markers.
const docxHtml = wrapLooseLines(applyMonospaceToCode(bodyHtml));
const htmlToDocx = require('html-to-docx');
const buf = await htmlToDocx(docxHtml);
res.contentType(
'application/vnd.openxmlformats-officedocument.wordprocessingml.document');
res.send(buf);
return;
}
if (type === 'pdf') {
const {htmlToPdfBuffer} = require('../utils/ExportPdfNative');
const buf = await htmlToPdfBuffer(bodyHtml);
res.contentType('application/pdf');
res.send(buf);
return;
}
// soffice-only formats (odt, doc) are blocked at the route guard
// when soffice is null; reaching here means the guard is wrong.
res.status(500).send(`Cannot export ${type} without soffice configured`);
return;
} catch (err) {
console.error(
`native ${type} export failed for pad "${padId}":`,
err && (err as Error).stack ? (err as Error).stack : err);
res.status(500).send(`Failed to export pad as ${type}.`);
return;
}
}
// soffice path — write the html export to a file. Use CSPRNG output
// for the temp path token (see matching note in ImportHandler.ts).
const randNum = crypto.randomBytes(16).toString('hex');
const srcFile = `${tempDirectory}/etherpad_export_${randNum}.html`;
await fsp_writeFile(srcFile, html);
// ensure html can be collected by the garbage collector
html = null;
// send the convert job to the converter (libreoffice)
const destFile = `${tempDirectory}/etherpad_export_${randNum}.${type}`;
// Allow plugins to overwrite the convert in export process
const result = await hooks.aCallAll('exportConvert', {srcFile, destFile, req, res});
if (result.length > 0) {
// console.log("export handled by plugin", destFile);
} else {
const converter = require('../utils/LibreOffice');
await converter.convertFile(srcFile, destFile, type);
}
// send the file
await res.sendFile(destFile, null);
// clean up temporary files
await fsp_unlink(srcFile);
// 100ms delay to accommodate for slow windows fs
if (os.type().indexOf('Windows') > -1) {
await new Promise((resolve) => setTimeout(resolve, 100));
}
await fsp_unlink(destFile);
}
};