@@ -287,37 +287,63 @@ export class EditorComponent implements OnDestroy, OnInit, AfterViewInit {
287287 const parser = new DOMParser ( ) ;
288288 const readalong = parser . parseFromString ( text , "text/html" ) ;
289289 const element = readalong . querySelector ( "read-along" ) ;
290-
291290 if ( element === undefined || element === null ) {
292291 return undefined ;
293292 }
294293
295- // Store the element as parsed XML
296- // Create body element, which mysteriously gets removed from the text element.
297- let textNode = element . querySelector ( "text" ) ;
298- if ( textNode && ! textNode . querySelector ( "body" ) ) {
299- const body = document . createElement ( "body" ) ;
300- body . id = "t0b0" ;
301- while ( textNode . hasChildNodes ( ) ) {
302- // @ts -ignore
303- body . appendChild ( textNode . firstChild ) ;
294+ // What is the appropriate source for the XML read along document? Either it was
295+ // encoded in the element's href attribute, or included as a child element of the
296+ // <read-along /> element.
297+ //
298+ // Prioritize the href implementation since it is more common.
299+ const href = element . getAttribute ( "href" ) ;
300+ if ( href ) {
301+ const reply = await fetch ( href ) ;
302+ if ( reply . ok ) {
303+ // FIXME: potential zip-bombing?
304+ let xmlString = await reply . text ( ) ;
305+ if ( ! xmlString . startsWith ( "<?xml" ) ) {
306+ xmlString = `<?xml version='1.0' encoding='utf-8'?>\n` + xmlString ;
307+ }
308+
309+ this . editorService . rasControl$ . setValue (
310+ parser . parseFromString ( xmlString , "application/xml" ) ,
311+ ) ;
312+ }
313+ } else {
314+ // Store the element as parsed XML
315+ // Create body element, which gets removed from the text element. This occurs
316+ // because the document was parsed with text/html mimetype which only allows
317+ // a single <body /> element as a child of <html />.
318+ let textNode = element . querySelector ( "text" ) ;
319+ if ( textNode && ! textNode . querySelector ( "body" ) ) {
320+ const body = document . createElement ( "body" ) ;
321+ body . id = "t0b0" ;
322+ while ( textNode . hasChildNodes ( ) ) {
323+ // @ts -ignore
324+ body . appendChild ( textNode . firstChild ) ;
325+ }
326+ textNode . appendChild ( body ) ;
327+ }
328+
329+ // Similar issue, the document was parsed with a text/html mimetype, attributes
330+ // in HTML are always lowercased.
331+ const serializer = new XMLSerializer ( ) ;
332+ let xmlString = serializer
333+ . serializeToString ( element )
334+ . replace ( / a r p a b e t = / g, "ARPABET=" ) // Our DTD says ARPABET is upper case
335+ . replace ( / x m l n s = " [ \w \/ \: \. ] * " / g, "" ) ; // Our DTD does not accept xmlns that the parser inserts
336+ if ( ! xmlString . startsWith ( "<?xml" ) ) {
337+ xmlString = `<?xml version='1.0' encoding='utf-8'?>\n` + xmlString ;
304338 }
305- textNode . appendChild ( body ) ;
339+
340+ this . editorService . rasControl$ . setValue (
341+ parser . parseFromString ( xmlString , "application/xml" ) ,
342+ ) ; // re-parse as XML
306343 }
307- const serializer = new XMLSerializer ( ) ;
308- const xmlString = serializer
309- . serializeToString ( element )
310- . replace ( / a r p a b e t = / g, "ARPABET=" ) // Our DTD says ARPABET is upper case
311- . replace ( / x m l n s = " [ \w \/ \: \. ] * " / g, "" ) ; // Our DTD does not accept xmlns that the parser inserts
312- //console.log(xmlString);
313- this . editorService . rasControl$ . setValue (
314- parser . parseFromString ( xmlString , "text/xml" ) ,
315- ) ; // re-parse as XML
316- //console.log(this.editorService.rasControl$.value);
317344
318345 // Oh, there's an audio file, okay, try to load it
319346 const audio = element . getAttribute ( "audio" ) ;
320-
321347 if ( audio !== null ) {
322348 const reply = await fetch ( audio ) ;
323349 // Did that work? Great!
@@ -328,20 +354,11 @@ export class EditorComponent implements OnDestroy, OnInit, AfterViewInit {
328354 ) ;
329355 }
330356 }
331- // Is read-along linked (including data URI) or embedded?
332- const href = element . getAttribute ( "href" ) ;
333- if ( href === null ) {
334- if ( this . editorService . rasControl$ . value ) {
335- this . createSegments ( this . editorService . rasControl$ . value ) ;
336- }
337- } else {
338- const reply = await fetch ( href ) ;
339- if ( reply . ok ) {
340- const text2 = await reply . text ( ) ;
341- // FIXME: potential zip-bombing?
342- this . parseReadalong ( text2 ) ;
343- }
357+
358+ if ( this . editorService . rasControl$ . value ) {
359+ this . createSegments ( this . editorService . rasControl$ . value ) ;
344360 }
361+
345362 return readalong . querySelector ( "body" ) ?. innerHTML ;
346363 }
347364
0 commit comments