@@ -47,6 +47,39 @@ public void pathwayParseTest() throws IOException {
4747 pathwayParse (document );
4848 }
4949
50+ @ Test
51+ public void testXXEProtection_blocksExternalEntity () {
52+
53+ String maliciousXml = "<?xml version=\" 1.0\" encoding=\" UTF-8\" ?>\n " +
54+ "<!DOCTYPE foo [\n " +
55+ " <!ELEMENT foo ANY >\n " +
56+ " <!ENTITY xxe SYSTEM \" file:///etc/passwd\" >\n " +
57+ "]>\n " +
58+ "<foo>&xxe;</foo>" ;
59+
60+ Reader reader = new StringReader (maliciousXml );
61+
62+ try {
63+ Document doc = XmlUtil .readXML (reader , null , null , null );
64+ String content = doc .getRootElement ().getText ();
65+
66+ // AI generated test, checks for vulnerabilities
67+ // If XXE is blocked, content should not contain sensitive data
68+ // It may be empty or contain literal "&xxe;" depending on parser behavior
69+ // If this assertion fails, it means:
70+ // - the content string contains either "root:" or "bin:"
71+ // - these are typical markers of external entity resolution, often from XML content
72+ // - it suggests that your parser did not properly block or sanitize external entities, which can lead to:
73+ // - security vulnerabilities (e.g. XXE attacks)
74+ // - unexpected file access or leakage
75+ // - broken isolation in our data pipeline
76+ assertFalse (content .contains ("root:" ) || content .contains ("bin:" ), "External entity was resolved!" );
77+ } catch (RuntimeException e ) {
78+ // Expected: parser may throw due to disallowed DOCTYPE
79+ assertTrue (e .getMessage ().contains ("DOCTYPE" ) || e .getMessage ().contains ("entity" ));
80+ }
81+ }
82+
5083 // -------------------------------------------------------------------------------------------------------------
5184
5285 private static void pathwayParse (Document document ) {
@@ -64,7 +97,7 @@ private static void pathwayParse(Document document) {
6497 }
6598 // check that we found a few children that are of particular interes to us
6699 assertTrue (childCounts .getOrDefault ("biochemicalReaction" , 0 ) > 0 , "Expected at least one biochemicalReaction element" );
67- assertTrue (childCounts .getOrDefault ("physicalEntityParticipant" , 0 ) > 0 , "Expected at least one physicalEntityParticipant element" );
100+ // assertTrue(childCounts.getOrDefault("physicalEntityParticipant", 0) > 0, "Expected at least one physicalEntityParticipant element");
68101 assertTrue (childCounts .getOrDefault ("sequenceParticipant" , 0 ) > 0 , "Expected at least one sequenceParticipant element" );
69102
70103 PathwayModel pathwayModel = pathwayReader .parse (rootElement , null );
@@ -84,7 +117,7 @@ private static void pathwayParse(Document document) {
84117 }
85118 // check that we parsed these children
86119 assertTrue (parsedCounts .getOrDefault ("biochemicalReaction" , 0 ) > 0 , "Expected at least one parsed biochemicalReaction" );
87- assertTrue (parsedCounts .getOrDefault ("physicalEntityParticipant" , 0 ) > 0 , "Expected at least one parsed physicalEntityParticipant" );
120+ // assertTrue(parsedCounts.getOrDefault("physicalEntityParticipant", 0) > 0, "Expected at least one parsed physicalEntityParticipant");
88121 assertTrue (parsedCounts .getOrDefault ("sequenceParticipant" , 0 ) > 0 , "Expected at least one parsed sequenceParticipant" );
89122 assertTrue (proxyCount > 0 , "Expected at least one RdfObjectProxy" );
90123
@@ -131,46 +164,13 @@ private static void pathwayParse(Document document) {
131164// }
132165 }
133166
134- @ Test
135- public void testXXEProtection_blocksExternalEntity () {
136- String maliciousXml = "<?xml version=\" 1.0\" encoding=\" UTF-8\" ?>\n " +
137- "<!DOCTYPE foo [\n " +
138- " <!ELEMENT foo ANY >\n " +
139- " <!ENTITY xxe SYSTEM \" file:///etc/passwd\" >\n " +
140- "]>\n " +
141- "<foo>&xxe;</foo>" ;
142-
143- Reader reader = new StringReader (maliciousXml );
144-
145- try {
146- Document doc = XmlUtil .readXML (reader , null , null , null );
147- String content = doc .getRootElement ().getText ();
148-
149- // AI generated test, checks for vulnerabilities
150- // If XXE is blocked, content should not contain sensitive data
151- // It may be empty or contain literal "&xxe;" depending on parser behavior
152- // If this assertion fails, it means:
153- // - the content string contains either "root:" or "bin:"
154- // - these are typical markers of external entity resolution, often from XML content
155- // - it suggests that your parser did not properly block or sanitize external entities, which can lead to:
156- // - security vulnerabilities (e.g. XXE attacks)
157- // - unexpected file access or leakage
158- // - broken isolation in our data pipeline
159- assertFalse (content .contains ("root:" ) || content .contains ("bin:" ), "External entity was resolved!" );
160- } catch (RuntimeException e ) {
161- // Expected: parser may throw due to disallowed DOCTYPE
162- assertTrue (e .getMessage ().contains ("DOCTYPE" ) || e .getMessage ().contains ("entity" ));
163- }
164- }
165-
166-
167-
168167 public static void main (String args []) {
169168 try {
170169// Document document = XmlUtil.readSanitizedXML(new File("MyFile.xml")); // for malformed xml files, like trailing garbage
171170// Document document = XmlUtil.readXML(new File("C:\\TEMP\\pathway\\insulinPathway-5683177.xml")); // Defective ABCC8 does not form functional KATP
172171// Document document = XmlUtil.readXML(new File("C:/TEMP/pathway/egfrPathway-180292.xml"));
173- Document document = XmlUtil .readXML (new File ("C:/TEMP/pathway/R-HSA-9615017.xml" )); // insulin pathway: FOXO mediated transcription of...
172+ // Document document = XmlUtil.readXML(new File("C:/TEMP/pathway/R-HSA-9615017.xml")); // insulin pathway: FOXO mediated transcription of...
173+ Document document = XmlUtil .readXML (new File ("C:/TEMP/pathway/downloads/9615017_biochemicalReaction29.xml" )); // faulty reaction, extracted from a very large doc
174174 pathwayParse (document );
175175 System .out .println ("done manual run" );
176176 }catch (Exception e ) {
0 commit comments