Skip to content

Commit ea2d635

Browse files
committed
fix: added consistent support for xml declarations
1 parent bf6f13c commit ea2d635

4 files changed

Lines changed: 71 additions & 42 deletions

File tree

packages/studio-web/src/app/b64.service.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,11 @@ export class B64Service {
6060
}
6161

6262
xmlToB64(xml: Document) {
63-
return this.utf8_to_b64(new XMLSerializer().serializeToString(xml));
63+
return this.utf8_to_b64(
64+
new XMLSerializer()
65+
.serializeToString(xml)
66+
.replace("?><read", "?>\n<read"),
67+
);
6468
}
6569

6670
blobToB64(blob: any) {

packages/studio-web/src/app/editor/editor.component.ts

Lines changed: 52 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -287,37 +287,63 @@ export class EditorComponent implements OnDestroy, OnInit, AfterViewInit {
287287
const parser = new DOMParser();
288288
const readalong = parser.parseFromString(text, "text/html");
289289
const element = readalong.querySelector("read-along");
290-
291290
if (element === undefined || element === null) {
292291
return undefined;
293292
}
294293

295-
// Store the element as parsed XML
296-
// Create body element, which mysteriously gets removed from the text element.
297-
let textNode = element.querySelector("text");
298-
if (textNode && !textNode.querySelector("body")) {
299-
const body = document.createElement("body");
300-
body.id = "t0b0";
301-
while (textNode.hasChildNodes()) {
302-
// @ts-ignore
303-
body.appendChild(textNode.firstChild);
294+
// What is the appropriate source for the XML read along document? Either it was
295+
// encoded in the element's href attribute, or included as a child element of the
296+
// <read-along /> element.
297+
//
298+
// Prioritize the href implementation since it is more common.
299+
const href = element.getAttribute("href");
300+
if (href) {
301+
const reply = await fetch(href);
302+
if (reply.ok) {
303+
// FIXME: potential zip-bombing?
304+
let xmlString = await reply.text();
305+
if (!xmlString.startsWith("<?xml")) {
306+
xmlString = `<?xml version='1.0' encoding='utf-8'?>\n` + xmlString;
307+
}
308+
309+
this.editorService.rasControl$.setValue(
310+
parser.parseFromString(xmlString, "application/xml"),
311+
);
312+
}
313+
} else {
314+
// Store the element as parsed XML
315+
// Create body element, which gets removed from the text element. This occurs
316+
// because the document was parsed with text/html mimetype which only allows
317+
// a single <body /> element as a child of <html />.
318+
let textNode = element.querySelector("text");
319+
if (textNode && !textNode.querySelector("body")) {
320+
const body = document.createElement("body");
321+
body.id = "t0b0";
322+
while (textNode.hasChildNodes()) {
323+
// @ts-ignore
324+
body.appendChild(textNode.firstChild);
325+
}
326+
textNode.appendChild(body);
327+
}
328+
329+
// Similar issue, the document was parsed with a text/html mimetype, attributes
330+
// in HTML are always lowercased.
331+
const serializer = new XMLSerializer();
332+
let xmlString = serializer
333+
.serializeToString(element)
334+
.replace(/arpabet=/g, "ARPABET=") // Our DTD says ARPABET is upper case
335+
.replace(/xmlns="[\w\/\:\.]*"/g, ""); // Our DTD does not accept xmlns that the parser inserts
336+
if (!xmlString.startsWith("<?xml")) {
337+
xmlString = `<?xml version='1.0' encoding='utf-8'?>\n` + xmlString;
304338
}
305-
textNode.appendChild(body);
339+
340+
this.editorService.rasControl$.setValue(
341+
parser.parseFromString(xmlString, "application/xml"),
342+
); // re-parse as XML
306343
}
307-
const serializer = new XMLSerializer();
308-
const xmlString = serializer
309-
.serializeToString(element)
310-
.replace(/arpabet=/g, "ARPABET=") // Our DTD says ARPABET is upper case
311-
.replace(/xmlns="[\w\/\:\.]*"/g, ""); // Our DTD does not accept xmlns that the parser inserts
312-
//console.log(xmlString);
313-
this.editorService.rasControl$.setValue(
314-
parser.parseFromString(xmlString, "text/xml"),
315-
); // re-parse as XML
316-
//console.log(this.editorService.rasControl$.value);
317344

318345
// Oh, there's an audio file, okay, try to load it
319346
const audio = element.getAttribute("audio");
320-
321347
if (audio !== null) {
322348
const reply = await fetch(audio);
323349
// Did that work? Great!
@@ -328,20 +354,11 @@ export class EditorComponent implements OnDestroy, OnInit, AfterViewInit {
328354
);
329355
}
330356
}
331-
// Is read-along linked (including data URI) or embedded?
332-
const href = element.getAttribute("href");
333-
if (href === null) {
334-
if (this.editorService.rasControl$.value) {
335-
this.createSegments(this.editorService.rasControl$.value);
336-
}
337-
} else {
338-
const reply = await fetch(href);
339-
if (reply.ok) {
340-
const text2 = await reply.text();
341-
// FIXME: potential zip-bombing?
342-
this.parseReadalong(text2);
343-
}
357+
358+
if (this.editorService.rasControl$.value) {
359+
this.createSegments(this.editorService.rasControl$.value);
344360
}
361+
345362
return readalong.querySelector("body")?.innerHTML;
346363
}
347364

packages/studio-web/src/app/ras.service.ts

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { catchError, Observable, of, take } from "rxjs";
1+
import { map, Observable } from "rxjs";
22

33
import { HttpClient } from "@angular/common/http";
44
import { Injectable } from "@angular/core";
@@ -72,7 +72,15 @@ export class RasService {
7272
}
7373

7474
assembleReadalong$(body: ReadAlongRequest): Observable<ReadAlong> {
75-
return this.http.post<ReadAlong>(this.baseURL + "/assemble", body);
75+
return this.http.post<ReadAlong>(this.baseURL + "/assemble", body).pipe(
76+
map((ras: ReadAlong) => {
77+
if (!ras.processed_ras.startsWith("<?xml")) {
78+
ras.processed_ras =
79+
`<?xml version='1.0' encoding='utf-8'?>\n` + ras.processed_ras;
80+
}
81+
return ras;
82+
}),
83+
);
7684
}
7785
getLangs$(): Observable<Array<SupportedLanguage>> {
7886
return this.http.get<Array<SupportedLanguage>>(this.baseURL + "/langs");

packages/studio-web/src/app/shared/download/download.service.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -344,9 +344,9 @@ Please host all assets on your server, include the font and package imports defi
344344
// - add .readalong file
345345
await this.updateTranslations(rasXML, readalong);
346346

347-
const xmlString = this.xmlSerializer.serializeToString(
348-
rasXML.documentElement,
349-
);
347+
const xmlString = this.xmlSerializer
348+
.serializeToString(rasXML)
349+
.replace("?><read", "?>\n<read");
350350
const rasFile = new Blob([xmlString], { type: "application/xml" });
351351
assetsFolder?.file(`${basename}.readalong`, rasFile);
352352
// - add index.html file
@@ -448,7 +448,7 @@ Use the text editor to paste the snippet below in your WordPress page:
448448
.convertRasFormat$(
449449
{
450450
dur: audio.duration,
451-
ras: new XMLSerializer().serializeToString(rasXML.documentElement),
451+
ras: new XMLSerializer().serializeToString(rasXML),
452452
},
453453
selectedOutputFormat,
454454
)

0 commit comments

Comments
 (0)