@@ -56,16 +56,20 @@ public static function xpath(
5656 /**
5757 * Sanitizes the values that are assigned to the tags of the XML.
5858 *
59- * @param string $xml Text to assign as value to the XML node.
59+ * @param string $value Text to assign as value to the XML node.
6060 * @return string Sanitized text.
6161 */
62- public static function sanitize (string $ xml ): string
62+ public static function sanitize (string $ value ): string
6363 {
6464 // If no text is passed or it is a number, do nothing.
65- if (!$ xml || is_numeric ($ xml )) {
66- return $ xml ;
65+ if (!$ value || is_numeric ($ value )) {
66+ return $ value ;
6767 }
6868
69+ // Remove control characters (ASCII 0x00-0x1F and 0x7F) that can cause
70+ // problems in XML-DSIG.
71+ $ value = preg_replace ('/[\x00-\x1F\x7F]/ ' , '' , $ value );
72+
6973 // Convert "predefined entities" of XML.
7074 $ replace = [
7175 '& ' => '& ' ,
@@ -79,26 +83,26 @@ public static function sanitize(string $xml): string
7983 '' ' => '\'' ,
8084 '' ' => '\'' ,
8185 ];
82- $ xml = str_replace (array_keys ($ replace ), array_values ($ replace ), $ xml );
86+ $ value = str_replace (array_keys ($ replace ), array_values ($ replace ), $ value );
8387
8488 // This is on purpose, the replacements must be done again.
85- $ xml = str_replace ('& ' , '& ' , $ xml );
89+ $ value = str_replace ('& ' , '& ' , $ value );
8690
87- /*$xml = str_replace(
91+ /*$value = str_replace(
8892 ['"', '\''],
8993 ['"', '''],
90- $xml
94+ $value
9195 );*/
9296
9397 // Return the sanitized text.
94- return $ xml ;
98+ return $ value ;
9599 }
96100
97101 /**
98102 * Fixes the entities ''' and '"' in the XML.
99103 *
100- * The correction is only done within the content of the XML tags, but not
101- * in the attributes of the tags.
104+ * The correction is done in the content of the XML tags, and also in the
105+ * attributes of the tags.
102106 *
103107 * @param string $xml XML to fix.
104108 * @return string Fixed XML.
@@ -115,18 +119,49 @@ public static function fixEntities(string $xml): string
115119 $ newXml = '' ;
116120 $ n_chars = strlen ($ xml );
117121 $ convert = false ;
122+ $ inAttribute = false ;
123+ $ attributeDelimiter = null ;
118124
119125 for ($ i = 0 ; $ i < $ n_chars ; ++$ i ) {
120- if ($ xml [$ i ] === '> ' ) {
126+ $ char = $ xml [$ i ];
127+
128+ // Detect when we enter/exit attribute values.
129+ if (
130+ !$ convert
131+ && $ char === '= '
132+ && $ i + 1 < $ n_chars
133+ && ($ xml [$ i + 1 ] === '" ' || $ xml [$ i + 1 ] === "' " )
134+ ) {
135+ $ inAttribute = true ;
136+ $ attributeDelimiter = $ xml [$ i + 1 ];
137+ $ i ++; // Skip the delimiter.
138+ $ newXml .= $ char . $ attributeDelimiter ;
139+ continue ;
140+ }
141+
142+ // Detect when we exit attribute values.
143+ if ($ inAttribute && $ char === $ attributeDelimiter ) {
144+ $ inAttribute = false ;
145+ $ attributeDelimiter = null ;
146+ $ newXml .= $ char ;
147+ continue ;
148+ }
149+
150+ // Toggle convert mode for tag content.
151+ if ($ char === '> ' ) {
121152 $ convert = true ;
122153 }
123- if ($ xml [ $ i ] === '< ' ) {
154+ if ($ char === '< ' ) {
124155 $ convert = false ;
125156 }
126- $ newXml .= $ convert
127- ? str_replace ($ replaceFrom , $ replaceTo , $ xml [$ i ])
128- : $ xml [$ i ]
129- ;
157+
158+ // Only convert entities if we're in tag content and not in an
159+ // attribute.
160+ if ($ convert && !$ inAttribute ) {
161+ $ newXml .= str_replace ($ replaceFrom , $ replaceTo , $ char );
162+ } else {
163+ $ newXml .= $ char ;
164+ }
130165 }
131166
132167 return $ newXml ;
0 commit comments