Skip to content

Commit 027dd3b

Browse files
authored
Merge pull request #1574 from virtualcell/dan-pathway-commons-moved
Upgrade Pathway commons
2 parents eeb63de + 8f680c0 commit 027dd3b

24 files changed

Lines changed: 3201 additions & 284 deletions

File tree

vcell-client/src/main/java/cbit/vcell/client/desktop/biomodel/BioModelEditorPathwayCommonsPanel.java

Lines changed: 111 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,13 @@
2121
import java.awt.event.KeyListener;
2222
import java.awt.event.MouseAdapter;
2323
import java.awt.event.MouseEvent;
24+
import java.io.IOException;
25+
import java.io.StringReader;
26+
import java.io.InputStream;
27+
import java.net.HttpURLConnection;
2428
import java.net.URL;
2529
import java.net.URLEncoder;
30+
import java.nio.charset.StandardCharsets;
2631
import java.time.Duration;
2732
import java.util.ArrayList;
2833
import java.util.Collections;
@@ -41,9 +46,13 @@
4146
import javax.swing.tree.DefaultTreeCellRenderer;
4247
import javax.swing.tree.DefaultTreeModel;
4348
import javax.swing.tree.TreePath;
49+
import javax.xml.parsers.*;
50+
import org.w3c.dom.*;
51+
import java.io.File;
4452

4553
import cbit.vcell.client.server.DynamicClientProperties;
4654
import org.sbpax.util.StringUtil;
55+
import org.vcell.util.ClientTaskStatusSupport;
4756
import org.vcell.pathway.PathwayModel;
4857
import org.vcell.pathway.persistence.PathwayIOUtil;
4958
import org.vcell.pathway.persistence.RDFXMLContext;
@@ -66,6 +75,7 @@
6675
import cbit.vcell.client.task.ClientTaskDispatcher;
6776
import cbit.vcell.desktop.BioModelNode;
6877
import cbit.vcell.resource.PropertyLoader;
78+
import org.xml.sax.InputSource;
6979

7080
@SuppressWarnings("serial")
7181
public class BioModelEditorPathwayCommonsPanel extends DocumentEditorSubPanel {
@@ -182,17 +192,30 @@ void addSearchResponse(String searchText, Element searchResponse) {
182192
} else {
183193
// pathwaysList = new ArrayList<Pathway>();
184194
pathwaysList.clear();
185-
List<Element> hitElements = DOMUtil.childElements(searchResponse, "search_hit");
186-
int pathwayCount = 0;
187-
for(Element hitElement : hitElements) {
188-
Hit hit = new Hit(hitElement);
189-
List<Pathway> pL = hit.pathways(); // pathway
190-
int numPathways = pL.size();
191-
if (numPathways > 0) {
192-
for (Pathway pathway: pL) {
193-
pathway.setOrganism(hit.organism());
194-
if(!pathwaysList.contains(pathway)) {
195-
pathwaysList.add(pathway);
195+
196+
NodeList hitNodes = searchResponse.getElementsByTagName("searchHit");
197+
System.out.println("Found " + hitNodes.getLength() + " hits\n");
198+
199+
200+
for (int i = 0; i < hitNodes.getLength(); i++) {
201+
Element hitElement = (Element) hitNodes.item(i);
202+
String uri = Hit.getText(hitElement, "uri");
203+
if (uri.contains("reactome")) {
204+
205+
NodeList responses = hitElement.getElementsByTagName("pathway");
206+
if(responses.getLength() == 0) {
207+
continue; // we skip some malformed entries
208+
}
209+
Hit hit = new Hit(hitElement);
210+
211+
List<Pathway> pL = hit.pathways(); // pathway
212+
int numPathways = pL.size();
213+
if (numPathways > 0) {
214+
for (Pathway pathway : pL) {
215+
// pathway.setOrganism(hit.organism());
216+
if (!pathwaysList.contains(pathway)) {
217+
pathwaysList.add(pathway);
218+
}
196219
}
197220
}
198221
}
@@ -254,6 +277,9 @@ public Component getTreeCellRendererComponent(JTree tree, Object value,
254277
// <html><body><p style="color:red">This is some text!</p></body></html>
255278
// <u> = underlined, <b> = bold
256279
String dbName = pathway.dataSource().name();
280+
if(dbName.startsWith("pc14:")) {
281+
dbName = dbName.replace("pc14:", "");
282+
}
257283
if(dbName.contains("Interaction Database")) {
258284
dbName = dbName.replace("Interaction Database", "Db");
259285
}
@@ -332,33 +358,24 @@ public void showPathway() {
332358
AsynchClientTask task1 = new AsynchClientTask("Importing pathway '" + pathway.name() + "'", AsynchClientTask.TASKTYPE_NONSWING_BLOCKING) {
333359
@Override
334360
public void run(final Hashtable<String, Object> hashTable) throws Exception {
335-
final URL url = new URL(DynamicClientProperties.getDynamicClientProperties().getProperty(PropertyLoader.PATHWAY_WEB_DO_URL) + "?"
336-
+ PathwayCommonsKeyword.cmd + "=" + PathwayCommonsKeyword.get_record_by_cpath_id
337-
+ "&" + PathwayCommonsKeyword.version + "=" + PathwayCommonsVersion.v2.name
338-
+ "&" + PathwayCommonsKeyword.q + "=" + pathway.primaryId()
339-
+ "&" + PathwayCommonsKeyword.output + "=" + PathwayCommonsKeyword.biopax);
340-
341-
System.out.println(url.toString());
361+
362+
String id = extractReactomeId(pathway.primaryId());
363+
final URL url = new URL(" https://reactome.org/ReactomeRESTfulAPI/RESTfulWS/biopaxExporter/Level2/" + id);
364+
365+
System.out.println(url.toString());
342366
String ERROR_CODE_TAG = "error_code";
343-
// String ERROR_MSG_TAG = "error_msg";
344-
final String contentString = ClientDownloader.downloadBytes(url, Duration.ofSeconds(10));
367+
368+
String contentString = ClientDownloader.downloadBytes(url, Duration.ofSeconds(20));
369+
345370
org.jdom2.Document jdomDocument = XmlUtil.stringToXML(contentString, null);
346371
org.jdom2.Element rootElement = jdomDocument.getRootElement();
347372
String errorCode = rootElement.getChildText(ERROR_CODE_TAG);
348373
if (errorCode != null){
349374
throw new RuntimeException("Failed to access " + url + " \n\nPlease try again.");
350375
}
351-
352-
// String xmlText = StringUtil.textFromInputStream(connection.getInputStream());
353-
// PathwayReader pathwayReader = new PathwayReader();
354-
// org.jdom2.Document jdomDocument = XmlUtil.stringToXML(xmlText, null);
355-
356-
// String xmlText = StringUtil.textFromInputStream(connection.getInputStream(), "UTF-8");
357-
// PathwayReader pathwayReader = new PathwayReader();
358-
// org.jdom2.Document jdomDocument = XmlUtil.stringToXML(xmlText, "UTF-8");
359-
360-
PathwayModel pathwayModel = PathwayIOUtil.extractPathwayFromJDOM(jdomDocument, new RDFXMLContext(),
361-
getClientTaskStatusSupport());
376+
377+
ClientTaskStatusSupport ctss = getClientTaskStatusSupport();
378+
PathwayModel pathwayModel = PathwayIOUtil.extractPathwayFromJDOM(jdomDocument, new RDFXMLContext(), ctss);
362379
PathwayData pathwayData = new PathwayData(pathway.name(), pathwayModel);
363380
hashTable.put("pathwayData", pathwayData);
364381
}
@@ -410,27 +427,59 @@ public void search() {
410427

411428
@Override
412429
public void run(Hashtable<String, Object> hashTable) throws Exception {
413-
URL url = new URL(DynamicClientProperties.getDynamicClientProperties().getProperty(PropertyLoader.PATHWAY_WEB_DO_URL) + "?"
414-
+ PathwayCommonsKeyword.cmd + "=" + PathwayCommonsKeyword.search
415-
+ "&" + PathwayCommonsKeyword.version + "=" + PathwayCommonsVersion.v2.name
416-
+ "&" + PathwayCommonsKeyword.q + "=" + URLEncoder.encode(searchText, "UTF-8")
417-
+ "&" + PathwayCommonsKeyword.maxHits + "=" + 14
418-
+ "&" + PathwayCommonsKeyword.output + "=" + PathwayCommonsKeyword.xml);
419-
System.out.println(url);
420-
String responseContent = ClientDownloader.downloadBytes(url, Duration.ofSeconds(10));
421-
Document document = DOMUtil.parse(responseContent);
422-
423-
Element errorElement = DOMUtil.firstChildElement(document, "error");
424-
if (errorElement != null) {
425-
// String xml = DOMUtil.firstChildContent(document, "error");
426-
throw new RuntimeException(errorElement.getTextContent());
427-
}
428-
Element searchResponse = DOMUtil.firstChildElement(document, "search_response");
429-
if (searchResponse != null) {
430-
// String xml = DOMUtil.firstChildContent(document, "search_response");
431-
// System.out.println(xml);
432-
hashTable.put("searchResponse", searchResponse);
430+
431+
// Encode the quoted term: "%22insulin%22"
432+
String encodedQ = URLEncoder.encode('"' + searchText + '"', StandardCharsets.UTF_8.name());
433+
String uri = "https://www.pathwaycommons.org/pc2/search?"
434+
+ "q=" + encodedQ
435+
+ "&type=pathway";
436+
System.out.println(uri);
437+
438+
HttpURLConnection conn = (HttpURLConnection)new URL(uri).openConnection();
439+
conn.setRequestProperty("Accept", "application/xml");
440+
441+
Element searchResponse = null;
442+
try (InputStream in = conn.getInputStream()) {
443+
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
444+
// Prevent XXE attacks in XML parsing, suggested by GitHub Advanced Security bot
445+
dbf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
446+
dbf.setFeature("http://xml.org/sax/features/external-general-entities", false);
447+
dbf.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
448+
dbf.setFeature(javax.xml.XMLConstants.FEATURE_SECURE_PROCESSING, true);
449+
DocumentBuilder db = dbf.newDocumentBuilder();
450+
Document doc = db.parse(in);
451+
doc.getDocumentElement().normalize();
452+
453+
searchResponse = doc.getDocumentElement();
454+
System.out.println("Root element: " + searchResponse.getNodeName());
433455
}
456+
457+
// let's make sure we have some searchHit elements
458+
NodeList hitNodes = searchResponse.getElementsByTagName("searchHit");
459+
System.out.println("Found " + hitNodes.getLength() + " hits\n");
460+
hashTable.put("searchResponse", searchResponse);
461+
462+
// for (int i = 0; i < hitNodes.getLength(); i++) {
463+
// Element hit = (Element) hitNodes.item(i);
464+
//
465+
// String uri = getText(hit, "uri");
466+
// String name = getText(hit, "name");
467+
// String source = getText(hit, "dataSource");
468+
// String organism = getText(hit, "organism");
469+
// String participants = getText(hit, "numParticipants");
470+
// String processes = getText(hit, "numProcesses");
471+
//
472+
// System.out.println("Pathway: " + name);
473+
// System.out.println(" URI: " + uri);
474+
// System.out.println(" Source: " + source);
475+
// System.out.println(" Organism: " + (organism != null ? organism : "[unspecified]"));
476+
// System.out.println(" Participants: " + participants + " | Processes: " + processes);
477+
// System.out.println();
478+
//
479+
// if(uri.contains("reactome")) {
480+
// hashTable.put("searchHit", hit);
481+
// }
482+
// }
434483
}
435484
};
436485
AsynchClientTask task2 = new AsynchClientTask("showing", AsynchClientTask.TASKTYPE_SWING_BLOCKING) {
@@ -509,8 +558,7 @@ private void initialize() {
509558
gbc.insets = new Insets(4,4,4,4);
510559
gbc.fill = GridBagConstraints.BOTH;
511560
add(new JScrollPane(responseTree), gbc);
512-
513-
561+
514562
gridy ++;
515563
CollapsiblePanel filterPanel = new CollapsiblePanel("Filter", true);
516564
filterPanel.getContentPanel().setLayout(new GridBagLayout());
@@ -574,7 +622,6 @@ private void initialize() {
574622
// optionsPanel.expand(false);
575623
// add(optionsPanel, gbc);
576624

577-
578625
gridy ++;
579626
gbc = new GridBagConstraints();
580627
gbc.gridx = 0;
@@ -636,4 +683,14 @@ public static boolean isPathwayObject(Object object) {
636683
@Override
637684
protected void onSelectedObjectsChange(Object[] selectedObjects) {
638685
}
686+
687+
public static String extractReactomeId(String primaryId) {
688+
final String PREFIX = "R-HSA-";
689+
int index = primaryId.indexOf(PREFIX);
690+
if (index == -1) {
691+
throw new IllegalArgumentException("Malformed primaryId: missing 'R-HSA-' prefix");
692+
}
693+
return primaryId.substring(index + PREFIX.length());
694+
}
695+
639696
}

vcell-core/src/main/java/cbit/util/xml/XmlUtil.java

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,9 @@
2929
import javax.xml.transform.stream.StreamResult;
3030
import javax.xml.transform.stream.StreamSource;
3131
import java.io.*;
32+
import java.nio.charset.StandardCharsets;
3233
import java.util.ArrayList;
34+
import java.util.Arrays;
3335
import java.util.List;
3436
import java.util.Objects;
3537

@@ -139,7 +141,8 @@ public static void vetXMLForMaliciousEntities(String xml) throws IOException, JD
139141

140142
builder.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); // https://semgrep.dev/docs/cheat-sheets/java-xxe
141143
builder.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
142-
144+
builder.setFeature("http://xml.org/sax/features/external-general-entities", false);
145+
builder.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
143146
builder.build(new StringReader(xml));
144147
}
145148

@@ -167,6 +170,8 @@ public static Document readXML(Reader reader, String schemaLocation, String pars
167170
}
168171
builder.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); // https://semgrep.dev/docs/cheat-sheets/java-xxe
169172
builder.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
173+
builder.setFeature("http://xml.org/sax/features/external-general-entities", false);
174+
builder.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
170175
sDoc = builder.build(reader);
171176
// ----- Element root = null;
172177
// ----- root = sDoc.getRootElement();
@@ -195,6 +200,8 @@ public static Document readXML(File file) throws RuntimeException {
195200
builder.setErrorHandler(errorHandler);
196201
builder.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); // https://semgrep.dev/docs/cheat-sheets/java-xxe
197202
builder.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
203+
builder.setFeature("http://xml.org/sax/features/external-general-entities", false);
204+
builder.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
198205
try {
199206
sDoc = builder.build(file);
200207
// Element root = null;
@@ -224,6 +231,8 @@ public static Document readXML(InputStream inputStream) throws RuntimeException
224231
builder.setErrorHandler(errorHandler);
225232
builder.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); // https://semgrep.dev/docs/cheat-sheets/java-xxe
226233
builder.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
234+
builder.setFeature("http://xml.org/sax/features/external-general-entities", false);
235+
builder.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
227236
try {
228237
sDoc = builder.build(inputStream);
229238
// Element root = null;
@@ -244,9 +253,31 @@ public static Document readXML(InputStream inputStream) throws RuntimeException
244253

245254
return sDoc;
246255
}
247-
248256

249-
/**
257+
// use this to repair malformed xml files, like trailing garbage
258+
public static Document readSanitizedXML(File file) throws IOException, JDOMException {
259+
try (InputStream rawInput = new FileInputStream(file)) {
260+
261+
byte[] rawBytes = rawInput.readAllBytes(); // read raw bytes
262+
263+
byte[] cleanedBytes = stripBOM(rawBytes); // strip BOM if present
264+
265+
String xmlText = new String(cleanedBytes, StandardCharsets.UTF_8); // trim leading junk before <?xml
266+
int xmlStart = xmlText.indexOf("<?xml");
267+
if (xmlStart > 0) {
268+
xmlText = xmlText.substring(xmlStart);
269+
}
270+
271+
// convert cleaned string back to InputStream
272+
InputStream cleanedInput = new ByteArrayInputStream(xmlText.getBytes(StandardCharsets.UTF_8));
273+
274+
return XmlUtil.readXML(cleanedInput);
275+
}
276+
}
277+
278+
279+
280+
/**
250281
* This method is used to set the Default Namespace to the XML document represented by 'rootNode'.
251282
* Creation date: (5/8/2003 12:51:03 PM)
252283
* @return Element
@@ -405,4 +436,16 @@ public static Geometry cloneGeometry(Geometry geometry) throws XmlParseException
405436
XmlReader xmlReader = new XmlReader(true);
406437
return xmlReader.getGeometry(xmlproducer.getXML(geometry));
407438
}
439+
440+
private static byte[] stripBOM(byte[] bytes) {
441+
// UTF-8 BOM: 0xEF,0xBB,0xBF
442+
if (bytes.length >= 3 &&
443+
bytes[0] == (byte)0xEF &&
444+
bytes[1] == (byte)0xBB &&
445+
bytes[2] == (byte)0xBF) {
446+
return Arrays.copyOfRange(bytes, 3, bytes.length);
447+
}
448+
return bytes;
449+
}
450+
408451
}

vcell-core/src/main/java/org/vcell/pathway/Complex.java

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -58,18 +58,26 @@ public void replace(RdfObjectProxy objectProxy, BioPaxObject concreteObject){
5858
public void replace(HashMap<String, BioPaxObject> resourceMap, HashSet<BioPaxObject> replacedBPObjects){
5959
super.replace(resourceMap, replacedBPObjects);
6060

61-
for (int i=0; i<component.size(); i++) {
61+
for (int i = 0; i < component.size(); i++) {
6262
PhysicalEntity thing = component.get(i);
63-
if(thing instanceof RdfObjectProxy) {
64-
RdfObjectProxy rdfObjectProxy = (RdfObjectProxy)thing;
65-
if (rdfObjectProxy.getID() != null){
66-
BioPaxObject concreteObject = resourceMap.get(rdfObjectProxy.getID());
67-
if (concreteObject != null){
68-
component.set(i, (PhysicalEntity)concreteObject);
69-
}
63+
64+
if (thing instanceof RdfObjectProxy) {
65+
RdfObjectProxy proxy = (RdfObjectProxy) thing;
66+
BioPaxObject candidate = resourceMap.get(proxy.getID());
67+
68+
// resolve one level deeper if still a proxy
69+
if (candidate instanceof RdfObjectProxy) {
70+
candidate = resourceMap.get(((RdfObjectProxy) candidate).getID());
71+
}
72+
73+
if (candidate instanceof PhysicalEntity) {
74+
component.set(i, (PhysicalEntity) candidate);
75+
} else { // resolution failed
76+
System.out.println(this + ": Unable to resolve proxy component to PhysicalEntity");
7077
}
7178
}
7279
}
80+
7381
for (int i=0; i<componentStoichiometry.size(); i++) {
7482
Stoichiometry thing = componentStoichiometry.get(i);
7583
if(thing instanceof RdfObjectProxy) {

0 commit comments

Comments
 (0)