Skip to content

Commit 10dcbbb

Browse files
committed
filtering rdf / xml string by target id; adding dna / rna parsing
1 parent bd2ac2d commit 10dcbbb

6 files changed

Lines changed: 140 additions & 207 deletions

File tree

vcell-core/src/main/java/org/vcell/pathway/InteractionImpl.java

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -120,18 +120,6 @@ public void replace(HashMap<String, BioPaxObject> resourceMap, HashSet<BioPaxObj
120120
System.out.println(this + ": Unable to resolve proxy participant to PhysicalEntity");
121121
}
122122
}
123-
124-
// InteractionParticipant thing = participants.get(i);
125-
// if(thing.getPhysicalEntity() instanceof RdfObjectProxy) {
126-
// RdfObjectProxy rdfObjectProxy = (RdfObjectProxy)thing.getPhysicalEntity();
127-
// if (rdfObjectProxy.getID() != null){
128-
// BioPaxObject concreteObject = resourceMap.get(rdfObjectProxy.getID());
129-
// if (concreteObject != null){
130-
// PhysicalEntity physicalEntity = (PhysicalEntity)concreteObject;
131-
// participants.set(i, new InteractionParticipant(this, physicalEntity, thing.getType()));
132-
// }
133-
// }
134-
// }
135123
}
136124
}
137125

vcell-core/src/main/java/org/vcell/pathway/PathwayModel.java

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -258,20 +258,15 @@ public void reconcileReferences(ClientTaskStatusSupport clientTaskStatusSupport)
258258
String id = proxy.getID();
259259
BioPaxObject realObject = resourceMap.get(id);
260260
if (realObject instanceof PhysicalEntityParticipant) {
261+
// this also cover the case when realObject is instanceof SequenceParticipant
262+
// since SequenceParticipant extends PhysicalEntityParticipant
261263
PhysicalEntityParticipant pep = (PhysicalEntityParticipant) realObject;
262264
PhysicalEntity pe = pep.getPhysicalEntity();
263265
// rebind so future .replace() calls see the PE, not the participant
264266
resourceMap.put(id, pe);
265267
objectsToDelete.add(pep);
266268
proxiesToDelete.add(proxy);
267269
}
268-
else if (realObject instanceof SequenceParticipant) {
269-
SequenceParticipant sp = (SequenceParticipant) realObject;
270-
PhysicalEntity pe = sp.getPhysicalEntity();
271-
resourceMap.put(id, pe);
272-
objectsToDelete.add(sp);
273-
proxiesToDelete.add(proxy);
274-
}
275270
} else if(o instanceof BiopaxProxy.CellularLocationVocabularyProxy) {
276271
BiopaxProxy.CellularLocationVocabularyProxy proxy = (BiopaxProxy.CellularLocationVocabularyProxy) o;
277272
String id = proxy.getID();
@@ -321,7 +316,6 @@ else if (realObject instanceof SequenceParticipant) {
321316
hideUtilityClassObjects();
322317
cleanupUnresolvedProxies();
323318
ConvertModulationToCatalysis();
324-
// System.out.println(show(false));
325319
ProcessKineticLaws();
326320
setDisableUpdate(false);
327321
firePathwayChanged(new PathwayEvent(this,PathwayEvent.CHANGED));

vcell-core/src/main/java/org/vcell/pathway/PathwayXMLHelper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ public static void showIgnored(Attribute attribute) {
124124
}
125125
public static void showUnexpected(Element childElement) {
126126
String message = "Unexpected element " + getElementPathString(childElement);
127-
//System.out.println(message);
127+
System.err.println(message);
128128
}
129129
public static void showIgnored(Element childElement, String reason) {
130130
String message = "Ignoring element " + getElementPathString(childElement) + " " + reason;

vcell-core/src/main/java/org/vcell/pathway/persistence/PathwayReader.java

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,10 @@ public PathwayModel parse(Element rootElement, ClientTaskStatusSupport clientTas
9494
addObjectSmallMolecule(childElement);
9595
}else if (childElement.getName().equals("protein")){
9696
addObjectProtein(childElement);
97+
}else if (childElement.getName().equals("dna")){
98+
addObjectDna(childElement);
99+
}else if (childElement.getName().equals("rna")){
100+
addObjectRna(childElement);
97101
}else if (childElement.getName().equals("complex")){
98102
addObjectComplex(childElement);
99103
}else if (childElement.getName().equals("catalysis")){
@@ -273,7 +277,13 @@ private boolean addContentEntity(Entity entity, Element element, Element childEl
273277
insertAtStart(entity.getName(),childElement.getTextTrim());
274278
return true;
275279
}else if (childElement.getName().equals("NAME")){
276-
entity.getName().add(childElement.getTextTrim());
280+
String name = childElement.getTextTrim();
281+
entity.getName().add(name);
282+
String geneSymbol = extractGeneSymbol(name);
283+
if(geneSymbol != null && geneSymbol.length() > 0 && entity.getName().size() == 1) {
284+
// insert at start, only if we don't have already parsed a SHORT-NAME
285+
insertAtStart(entity.getName(), geneSymbol);
286+
}
277287
return true;
278288
}else if (childElement.getName().equals("XREF")){
279289
entity.getxRef().add(addObjectXref(childElement));
@@ -2651,6 +2661,15 @@ private UnificationXref addObjectUnificationXref(Element element) {
26512661
return unificationXref;
26522662
}
26532663

2654-
2655-
2664+
public static String extractGeneSymbol(String input) {
2665+
if (input == null) return null;
2666+
2667+
String[] tokens = input.trim().split("\\s+");
2668+
if (tokens.length == 2 && tokens[0].startsWith("ENSEMBL:ENS")) {
2669+
return tokens[1];
2670+
}
2671+
return null; // pattern not matched
2672+
}
2673+
2674+
26562675
}

vcell-core/src/test/java/cbit/vcell/pathway/PathwayParseTest.java

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,39 @@ public void pathwayParseTest() throws IOException {
4747
pathwayParse(document);
4848
}
4949

50+
@Test
51+
public void testXXEProtection_blocksExternalEntity() {
52+
53+
String maliciousXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
54+
"<!DOCTYPE foo [\n" +
55+
" <!ELEMENT foo ANY >\n" +
56+
" <!ENTITY xxe SYSTEM \"file:///etc/passwd\" >\n" +
57+
"]>\n" +
58+
"<foo>&xxe;</foo>";
59+
60+
Reader reader = new StringReader(maliciousXml);
61+
62+
try {
63+
Document doc = XmlUtil.readXML(reader, null, null, null);
64+
String content = doc.getRootElement().getText();
65+
66+
// AI generated test, checks for vulnerabilities
67+
// If XXE is blocked, content should not contain sensitive data
68+
// It may be empty or contain literal "&xxe;" depending on parser behavior
69+
// If this assertion fails, it means:
70+
// - the content string contains either "root:" or "bin:"
71+
// - these are typical markers of external entity resolution, often from XML content
72+
// - it suggests that your parser did not properly block or sanitize external entities, which can lead to:
73+
// - security vulnerabilities (e.g. XXE attacks)
74+
// - unexpected file access or leakage
75+
// - broken isolation in our data pipeline
76+
assertFalse(content.contains("root:") || content.contains("bin:"), "External entity was resolved!");
77+
} catch (RuntimeException e) {
78+
// Expected: parser may throw due to disallowed DOCTYPE
79+
assertTrue(e.getMessage().contains("DOCTYPE") || e.getMessage().contains("entity"));
80+
}
81+
}
82+
5083
// -------------------------------------------------------------------------------------------------------------
5184

5285
private static void pathwayParse(Document document) {
@@ -64,7 +97,7 @@ private static void pathwayParse(Document document) {
6497
}
6598
// check that we found a few children that are of particular interes to us
6699
assertTrue(childCounts.getOrDefault("biochemicalReaction", 0) > 0, "Expected at least one biochemicalReaction element");
67-
assertTrue(childCounts.getOrDefault("physicalEntityParticipant", 0) > 0, "Expected at least one physicalEntityParticipant element");
100+
// assertTrue(childCounts.getOrDefault("physicalEntityParticipant", 0) > 0, "Expected at least one physicalEntityParticipant element");
68101
assertTrue(childCounts.getOrDefault("sequenceParticipant", 0) > 0, "Expected at least one sequenceParticipant element");
69102

70103
PathwayModel pathwayModel = pathwayReader.parse(rootElement, null);
@@ -84,7 +117,7 @@ private static void pathwayParse(Document document) {
84117
}
85118
// check that we parsed these children
86119
assertTrue(parsedCounts.getOrDefault("biochemicalReaction", 0) > 0, "Expected at least one parsed biochemicalReaction");
87-
assertTrue(parsedCounts.getOrDefault("physicalEntityParticipant", 0) > 0, "Expected at least one parsed physicalEntityParticipant");
120+
// assertTrue(parsedCounts.getOrDefault("physicalEntityParticipant", 0) > 0, "Expected at least one parsed physicalEntityParticipant");
88121
assertTrue(parsedCounts.getOrDefault("sequenceParticipant", 0) > 0, "Expected at least one parsed sequenceParticipant");
89122
assertTrue(proxyCount > 0, "Expected at least one RdfObjectProxy");
90123

@@ -131,46 +164,13 @@ private static void pathwayParse(Document document) {
131164
// }
132165
}
133166

134-
@Test
135-
public void testXXEProtection_blocksExternalEntity() {
136-
String maliciousXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
137-
"<!DOCTYPE foo [\n" +
138-
" <!ELEMENT foo ANY >\n" +
139-
" <!ENTITY xxe SYSTEM \"file:///etc/passwd\" >\n" +
140-
"]>\n" +
141-
"<foo>&xxe;</foo>";
142-
143-
Reader reader = new StringReader(maliciousXml);
144-
145-
try {
146-
Document doc = XmlUtil.readXML(reader, null, null, null);
147-
String content = doc.getRootElement().getText();
148-
149-
// AI generated test, checks for vulnerabilities
150-
// If XXE is blocked, content should not contain sensitive data
151-
// It may be empty or contain literal "&xxe;" depending on parser behavior
152-
// If this assertion fails, it means:
153-
// - the content string contains either "root:" or "bin:"
154-
// - these are typical markers of external entity resolution, often from XML content
155-
// - it suggests that your parser did not properly block or sanitize external entities, which can lead to:
156-
// - security vulnerabilities (e.g. XXE attacks)
157-
// - unexpected file access or leakage
158-
// - broken isolation in our data pipeline
159-
assertFalse(content.contains("root:") || content.contains("bin:"), "External entity was resolved!");
160-
} catch (RuntimeException e) {
161-
// Expected: parser may throw due to disallowed DOCTYPE
162-
assertTrue(e.getMessage().contains("DOCTYPE") || e.getMessage().contains("entity"));
163-
}
164-
}
165-
166-
167-
168167
public static void main(String args[]) {
169168
try {
170169
// Document document = XmlUtil.readSanitizedXML(new File("MyFile.xml")); // for malformed xml files, like trailing garbage
171170
// Document document = XmlUtil.readXML(new File("C:\\TEMP\\pathway\\insulinPathway-5683177.xml")); // Defective ABCC8 does not form functional KATP
172171
// Document document = XmlUtil.readXML(new File("C:/TEMP/pathway/egfrPathway-180292.xml"));
173-
Document document = XmlUtil.readXML(new File("C:/TEMP/pathway/R-HSA-9615017.xml")); // insulin pathway: FOXO mediated transcription of...
172+
// Document document = XmlUtil.readXML(new File("C:/TEMP/pathway/R-HSA-9615017.xml")); // insulin pathway: FOXO mediated transcription of...
173+
Document document = XmlUtil.readXML(new File("C:/TEMP/pathway/downloads/9615017_biochemicalReaction29.xml")); // faulty reaction, extracted from a very large doc
174174
pathwayParse(document);
175175
System.out.println("done manual run");
176176
}catch (Exception e) {

0 commit comments

Comments
 (0)