@@ -408,25 +408,6 @@ add_attrib(ImageSpec& spec, string_view xmlname, string_view xmlvalue,
408408
409409
410410
411- // Utility: Search str for the first substring in str (starting from
412- // position pos) that starts with startmarker and ends with endmarker.
413- // If not found, return false. If found, return true, store the
414- // beginning and ending indices in startpos and endpos.
415- static bool
416- extract_middle (string_view str, size_t pos, string_view startmarker,
417- string_view endmarker, size_t & startpos, size_t & endpos)
418- {
419- startpos = str.find (startmarker, pos);
420- if (startpos == std::string::npos)
421- return false ; // start marker not found
422- endpos = str.find (endmarker, startpos);
423- if (endpos == std::string::npos)
424- return false ; // end marker not found
425- endpos += endmarker.size ();
426- return true ;
427- }
428-
429-
430411// Decode one XMP node and its children.
431412// Return value is the size of the resulting attribute (can be used to
432413// catch runaway or corrupt XML).
@@ -536,35 +517,35 @@ decode_xmp(string_view xml, ImageSpec& spec)
536517#endif
537518 if (!xml.length ())
538519 return true ;
539- for (size_t startpos = 0 , endpos = 0 ;
540- extract_middle (xml, endpos, " <rdf:Description" , " </rdf:Description>" ,
541- startpos, endpos);) {
542- // Turn that middle section into an XML document
543- string_view rdf = xml.substr (startpos, endpos - startpos); // scooch in
544- #if DEBUG_XMP_READ
545- std::cerr << " RDF is:\n ---\n " << rdf.substr (0 , 4096 ) << " \n ---\n " ;
546- #endif
547- pugi::xml_document doc;
548- pugi::xml_parse_result parse_result
549- = doc.load_buffer (rdf.data (), rdf.size (),
550- pugi::parse_default | pugi::parse_fragment);
551- if (!parse_result) {
520+ // Some callers (e.g. JPEG APP1 markers) prepend a namespace URI before the
521+ // actual XML. Skip any leading non-XML content.
522+ auto xmlstart = xml.find (' <' );
523+ if (xmlstart == string_view::npos)
524+ return true ;
525+ xml = xml.substr (xmlstart);
526+ pugi::xml_document doc;
527+ pugi::xml_parse_result parse_result
528+ = doc.load_buffer (xml.data (), xml.size (),
529+ pugi::parse_default | pugi::parse_fragment);
530+ if (!parse_result) {
552531#if DEBUG_XMP_READ
553- std::cerr << " Error parsing XML @" << parse_result.offset << " : "
554- << parse_result.description () << " \n " ;
532+ std::cerr << " Error parsing XML @" << parse_result.offset << " : "
533+ << parse_result.description () << " \n " ;
555534#endif
556- // Instead of returning early here if there were errors parsing
557- // the XML -- I have noticed that very minor XML malformations
558- // are common in XMP found in files -- hope for the best and
559- // go ahead and assume that maybe it managed to put something
560- // useful in the resulting document.
561- #if 0
562- return true;
563- #endif
564- }
565- // Decode the contents of the XML document (it will recurse)
566- decode_xmp_node (doc.first_child (), spec);
535+ // Instead of returning early here if there were errors parsing
536+ // the XML -- I have noticed that very minor XML malformations
537+ // are common in XMP found in files -- hope for the best and
538+ // go ahead and assume that maybe it managed to put something
539+ // useful in the resulting document.
567540 }
541+ // Find the first rdf:Description node anywhere in the document.
542+ // decode_xmp_node() iterates siblings, so passing the first one processes
543+ // all rdf:Description siblings (i.e. all blocks in the rdf:RDF container).
544+ auto first_desc = doc.find_node ([](pugi::xml_node n) {
545+ return strcmp (n.name (), " rdf:Description" ) == 0 ;
546+ });
547+ if (first_desc)
548+ decode_xmp_node (first_desc, spec);
568549#if DEBUG_XMP_READ
569550 std::cerr << " XMP total parse time " << timer () << " \n " ;
570551#endif
0 commit comments