-
Notifications
You must be signed in to change notification settings - Fork 331
Expand file tree
/
Copy pathXmlDomUtils.java
More file actions
348 lines (307 loc) · 11.8 KB
/
XmlDomUtils.java
File metadata and controls
348 lines (307 loc) · 11.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
package datadog.trace.bootstrap.instrumentation;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xml.sax.InputSource;
/**
* Utility class for converting W3C DOM XML structures to Map/List representations that are
* compatible with WAF analysis and schema extraction.
*
* <p>This centralized utility eliminates code duplication across multiple instrumentation modules
* that need to process XML content for AppSec analysis.
*/
public final class XmlDomUtils {
/** Default maximum recursion depth for XML DOM conversion to prevent stack overflow. */
public static final int DEFAULT_MAX_CONVERSION_DEPTH = 15;
private XmlDomUtils() {
// Utility class - prevent instantiation
}
/**
* Convert a W3C DOM Document to a WAF-compatible Map/List structure using the default recursion
* depth.
*
* @param document the XML document to convert
* @return converted structure wrapped in a list for consistency, or null if document is null
*/
public static Object convertDocument(Document document) {
return convertDocument(document, DEFAULT_MAX_CONVERSION_DEPTH);
}
/**
* Convert a W3C DOM Document to a WAF-compatible Map/List structure.
*
* @param document the XML document to convert
* @param maxRecursion maximum recursion depth to prevent stack overflow
* @return converted structure wrapped in a list for consistency, or null if document is null
*/
public static Object convertDocument(Document document, int maxRecursion) {
if (document == null) {
return null;
}
return convertW3cNode(document.getDocumentElement(), maxRecursion);
}
/**
* Convert a W3C DOM Element to a WAF-compatible Map/List structure using the default recursion
* depth.
*
* @param element the XML element to convert
* @return converted structure wrapped in a list for consistency, or null if element is null
*/
public static Object convertElement(Element element) {
return convertElement(element, DEFAULT_MAX_CONVERSION_DEPTH);
}
/**
* Convert a W3C DOM Element to a WAF-compatible Map/List structure.
*
* @param element the XML element to convert
* @param maxRecursion maximum recursion depth to prevent stack overflow
* @return converted structure wrapped in a list for consistency, or null if element is null
*/
public static Object convertElement(Element element, int maxRecursion) {
if (element == null) {
return null;
}
return convertW3cNode(element, maxRecursion);
}
/**
* Convert a W3C DOM Node to a WAF-compatible Map/List structure.
*
* <p>This method recursively processes XML nodes, converting: - Elements to Maps with
* "attributes" and "children" keys - Text nodes to their trimmed string content - Other node
* types are ignored (return null)
*
* @param node the XML node to convert
* @param maxRecursion maximum recursion depth to prevent stack overflow
* @return Map for elements, String for text nodes, null for other types or when maxRecursion <= 0
*/
public static Object convertW3cNode(Node node, int maxRecursion) {
if (node == null || maxRecursion <= 0) {
return null;
}
if (node instanceof Element) {
return convertElementNode((Element) node, maxRecursion);
} else if (node instanceof Text) {
return convertTextNode((Text) node);
}
// Ignore other node types (comments, processing instructions, etc.)
return null;
}
/** Convert an Element node to a Map with attributes and children. */
private static Map<String, Object> convertElementNode(Element element, int maxRecursion) {
Map<String, String> attributes = Collections.emptyMap();
if (element.hasAttributes()) {
attributes = new HashMap<>();
NamedNodeMap attrMap = element.getAttributes();
for (int i = 0; i < attrMap.getLength(); i++) {
Attr item = (Attr) attrMap.item(i);
attributes.put(item.getName(), item.getValue());
}
}
List<Object> children = Collections.emptyList();
if (element.hasChildNodes()) {
NodeList childNodes = element.getChildNodes();
children = new ArrayList<>(childNodes.getLength());
for (int i = 0; i < childNodes.getLength(); i++) {
Node item = childNodes.item(i);
Object childResult = convertW3cNode(item, maxRecursion - 1);
if (childResult != null) {
children.add(childResult);
}
}
}
Map<String, Object> repr = new HashMap<>();
if (!attributes.isEmpty()) {
repr.put("attributes", attributes);
}
if (!children.isEmpty()) {
repr.put("children", children);
}
return repr;
}
/** Convert a Text node to its trimmed string content. */
private static String convertTextNode(Text textNode) {
String textContent = textNode.getTextContent();
if (textContent != null) {
textContent = textContent.trim();
if (!textContent.isEmpty()) {
return textContent;
}
}
return null;
}
/**
* Check if a string contains XML content by examining both strings and DOM objects.
*
* @param obj the object to check
* @return true if the object contains XML content, false otherwise
*/
public static boolean isXmlContent(Object obj) {
if (obj == null) {
return false;
}
// Check for W3C DOM XML objects
if (obj instanceof Document || obj instanceof Element || obj instanceof Node) {
return true;
}
// Check for XML string content
if (obj instanceof String) {
String content = (String) obj;
if (content.trim().isEmpty()) {
return false;
}
String trimmed = content.trim();
// Explicitly exclude JSON content
if (trimmed.startsWith("{") || trimmed.startsWith("[")) {
return false;
}
// Check for XML declaration
if (trimmed.startsWith("<?xml")) {
return true;
}
// Check for XML element (must start with < and end with >, and contain at least one closing
// tag or self-closing tag)
if (trimmed.startsWith("<")
&& trimmed.endsWith(">")
&& (trimmed.contains("</") || trimmed.contains("/>"))) {
return true;
}
}
return false;
}
/**
* Check if a string contains XML content by looking for XML declaration or root element.
*
* @param content the string content to check
* @return true if the string contains XML content, false otherwise
*/
public static boolean isXmlContent(String content) {
if (content == null || content.trim().isEmpty()) {
return false;
}
String trimmed = content.trim();
// Explicitly exclude JSON content
if (trimmed.startsWith("{") || trimmed.startsWith("[")) {
return false;
}
return trimmed.startsWith("<?xml")
|| (trimmed.startsWith("<")
&& trimmed.endsWith(">")
&& (trimmed.contains("</") || trimmed.contains("/>")));
}
/**
* Process XML content (strings or DOM objects) for WAF compatibility using the default recursion
* depth. This ensures XML attack payloads are properly detected by the WAF.
*
* @param xmlObj the XML object to process (can be Document, Element, Node, or String)
* @return processed XML structure compatible with WAF analysis, or null if processing fails
*/
public static Object processXmlForWaf(Object xmlObj) {
return processXmlForWaf(xmlObj, DEFAULT_MAX_CONVERSION_DEPTH);
}
/**
* Process XML content (strings or DOM objects) for WAF compatibility. This ensures XML attack
* payloads are properly detected by the WAF.
*
* @param xmlObj the XML object to process (can be Document, Element, Node, or String)
* @param maxRecursion maximum recursion depth to prevent stack overflow
* @return processed XML structure compatible with WAF analysis, or null if processing fails
*/
public static Object processXmlForWaf(Object xmlObj, int maxRecursion) {
if (xmlObj == null) {
return null;
}
// Handle W3C DOM objects directly
if (xmlObj instanceof Document) {
return convertDocument((Document) xmlObj, maxRecursion);
}
if (xmlObj instanceof Element) {
return convertElement((Element) xmlObj, maxRecursion);
}
if (xmlObj instanceof Node) {
// Return the converted node directly
return convertW3cNode((Node) xmlObj, maxRecursion);
}
// Handle XML strings by parsing them first
if (xmlObj instanceof String) {
try {
return parseXmlStringToWafFormat((String) xmlObj, maxRecursion);
} catch (Exception e) {
// Return null if parsing fails - let caller handle logging
return null;
}
}
return null;
}
/**
* Convert XML string to WAF-compatible format using the default recursion depth. This ensures XML
* attack payloads are properly detected by the WAF.
*
* @param xmlContent the XML string content to parse
* @return parsed XML structure compatible with WAF analysis
* @throws Exception if XML parsing fails
*/
public static Object parseXmlStringToWafFormat(String xmlContent) throws Exception {
return parseXmlStringToWafFormat(xmlContent, DEFAULT_MAX_CONVERSION_DEPTH);
}
/**
* Convert XML string to WAF-compatible format following Spring framework pattern. This ensures
* XML attack payloads are properly detected by the WAF.
*
* @param xmlContent the XML string content to parse
* @param maxRecursion maximum recursion depth to prevent stack overflow
* @return parsed XML structure compatible with WAF analysis
* @throws Exception if XML parsing fails
*/
public static Object parseXmlStringToWafFormat(String xmlContent, int maxRecursion)
throws Exception {
if (xmlContent == null || xmlContent.trim().isEmpty()) {
return null;
}
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
// Security settings to prevent XXE attacks during parsing
factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
factory.setFeature("http://xml.org/sax/features/external-general-entities", false);
factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
factory.setExpandEntityReferences(false);
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.parse(new InputSource(new StringReader(xmlContent)));
return convertDocument(document, maxRecursion);
}
/**
* Convert XML string to WAF-compatible format using the default recursion depth. This is a
* convenience method that wraps parseXmlStringToWafFormat and handles exceptions internally.
*
* @param xmlContent the XML string content to handle
* @return parsed XML structure compatible with WAF analysis, or null if parsing fails
*/
public static Object handleXmlString(String xmlContent) {
return handleXmlString(xmlContent, DEFAULT_MAX_CONVERSION_DEPTH);
}
/**
* Convert XML string to WAF-compatible format. This is a convenience method that wraps
* parseXmlStringToWafFormat and handles exceptions internally.
*
* @param xmlContent the XML string content to handle
* @param maxRecursion maximum recursion depth to prevent stack overflow
* @return parsed XML structure compatible with WAF analysis, or null if parsing fails
*/
public static Object handleXmlString(String xmlContent, int maxRecursion) {
try {
return parseXmlStringToWafFormat(xmlContent, maxRecursion);
} catch (Exception e) {
// Return null if parsing fails - let caller handle logging
return null;
}
}
}