Skip to content

Commit d9e398b

Browse files
committed
change semantics of rdkit.index.sanitize=false
if false we first work with santize=true and fall back to false in case of an error
1 parent 35fc1ff commit d9e398b

6 files changed

Lines changed: 141 additions & 29 deletions

File tree

src/main/java/org/rdkit/neo4j/handlers/RDKitEventHandler.java

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,26 +15,26 @@
1515
* #L%
1616
*/
1717

18-
import java.util.Arrays;
19-
import java.util.List;
20-
import java.util.Set;
21-
import java.util.stream.Collectors;
22-
import java.util.stream.StreamSupport;
23-
18+
import org.RDKit.MolSanitizeException;
2419
import org.neo4j.graphdb.GraphDatabaseService;
2520
import org.neo4j.graphdb.Label;
2621
import org.neo4j.graphdb.Node;
2722
import org.neo4j.graphdb.event.LabelEntry;
2823
import org.neo4j.graphdb.event.TransactionData;
2924
import org.neo4j.graphdb.event.TransactionEventHandler;
30-
3125
import org.rdkit.neo4j.models.Constants;
3226
import org.rdkit.neo4j.models.NodeFields;
3327
import org.rdkit.neo4j.models.NodeParameters;
3428
import org.rdkit.neo4j.utils.Converter;
3529
import org.slf4j.Logger;
3630
import org.slf4j.LoggerFactory;
3731

32+
import java.util.Arrays;
33+
import java.util.List;
34+
import java.util.Set;
35+
import java.util.stream.Collectors;
36+
import java.util.stream.StreamSupport;
37+
3838
/**
3939
* RDKit event handler
4040
* Handler tracks new nodes with property `smiles` or `mdlmol`
@@ -72,8 +72,17 @@ public Object beforeCommit(TransactionData data) throws Exception {
7272

7373
for (Node node: nodesMol) {
7474
final String mol = (String) node.getProperty(NodeFields.MdlMol.getValue());
75-
final NodeParameters block = converter.convertMolBlock(mol, sanitize);
7675

76+
NodeParameters block;
77+
try {
78+
block = converter.convertMolBlock(mol, true);
79+
} catch (MolSanitizeException e) {
80+
if (sanitize) {
81+
throw e;
82+
} else {
83+
block = converter.convertMolBlock(mol, false);
84+
}
85+
}
7786
addProperties(node, block);
7887
}
7988

@@ -83,8 +92,17 @@ public Object beforeCommit(TransactionData data) throws Exception {
8392

8493
for (Node node: nodesSmiles) {
8594
final String smiles = (String) node.getProperty(NodeFields.Smiles.getValue());
86-
final NodeParameters block = converter.convertSmiles(smiles, sanitize);
8795

96+
NodeParameters block;
97+
try {
98+
block = converter.convertSmiles(smiles, true);
99+
} catch (MolSanitizeException e) {
100+
if (sanitize) {
101+
throw e;
102+
} else {
103+
block = converter.convertSmiles(smiles, false);
104+
}
105+
}
88106
addProperties(node, block);
89107
}
90108

src/main/java/org/rdkit/neo4j/handlers/RDKitEventHandlerExtensionFactory.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222
import org.neo4j.kernel.impl.spi.KernelContext;
2323
import org.neo4j.kernel.lifecycle.Lifecycle;
2424
import org.neo4j.kernel.lifecycle.LifecycleAdapter;
25+
import org.neo4j.logging.Log;
2526
import org.neo4j.logging.internal.LogService;
26-
2727
import org.rdkit.neo4j.bin.LibraryLoader;
2828
import org.rdkit.neo4j.bin.LoaderException;
2929
import org.rdkit.neo4j.config.RDKitSettings;
@@ -53,13 +53,14 @@ public class RDKitEventHandlerExtensionFactory extends KernelExtensionFactory<De
5353
@Override
5454
public Lifecycle newInstance(KernelContext kernelContext, final Dependencies dependencies) {
5555
return new LifecycleAdapter() {
56-
// LogService log = dependencies.log();
56+
final Log log = dependencies.log().getUserLog(RDKitEventHandlerExtensionFactory.class);
5757

5858
private RDKitEventHandler handler;
5959

6060
@Override
6161
public void start() {
62-
logger.debug("Starting RDKit trigger watcher");
62+
63+
log.info("Starting RDKit trigger watcher");
6364
boolean sanitize = dependencies.config().get(RDKitSettings.indexSanitize);
6465
logger.debug("sanitize = %s", sanitize);
6566
handler = new RDKitEventHandler(dependencies.getGraphDatabaseService(), sanitize);
@@ -68,7 +69,7 @@ public void start() {
6869

6970
@Override
7071
public void shutdown() {
71-
logger.debug("Stopping RDKit trigger watcher");
72+
log.info("Stopping RDKit trigger watcher");
7273
if (handler != null)
7374
dependencies.getGraphDatabaseService().unregisterTransactionEventHandler(handler);
7475
}

src/main/java/org/rdkit/neo4j/procedures/ExactSearch.java

Lines changed: 34 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,26 @@
1515
* #L%
1616
*/
1717

18-
import java.util.List;
19-
import java.util.stream.Collectors;
20-
import java.util.stream.Stream;
21-
22-
import org.neo4j.graphdb.*;
23-
import org.neo4j.procedure.*;
24-
18+
import org.RDKit.MolSanitizeException;
19+
import org.neo4j.graphdb.Label;
20+
import org.neo4j.graphdb.Node;
21+
import org.neo4j.kernel.configuration.Config;
22+
import org.neo4j.kernel.internal.GraphDatabaseAPI;
23+
import org.neo4j.procedure.Description;
24+
import org.neo4j.procedure.Mode;
25+
import org.neo4j.procedure.Name;
26+
import org.neo4j.procedure.Procedure;
27+
import org.rdkit.neo4j.config.RDKitSettings;
2528
import org.rdkit.neo4j.handlers.RDKitEventHandler;
2629
import org.rdkit.neo4j.models.Constants;
27-
import org.rdkit.neo4j.models.NodeParameters;
2830
import org.rdkit.neo4j.models.NodeFields;
31+
import org.rdkit.neo4j.models.NodeParameters;
2932
import org.rdkit.neo4j.utils.Converter;
3033

34+
import java.util.List;
35+
import java.util.stream.Collectors;
36+
import java.util.stream.Stream;
37+
3138
/**
3239
* ExactSearch class
3340
* Implements functionality for org.rdkit.search.exact.* procedures
@@ -62,12 +69,28 @@ public Stream<NodeWrapper> exactSearchSmiles(@Name("label") List<String> labelNa
6269
*/
6370
@Procedure(name = "org.rdkit.search.exact.mol", mode = Mode.READ)
6471
@Description("RDKit exact search on `mdlmol` property")
65-
public Stream<NodeWrapper> exactSearchMol(@Name("labels") List<String> labelNames, @Name("mol") String molBlock,
66-
@Name(value = "sanitize", defaultValue = "true") boolean sanitize) {
72+
public Stream<NodeWrapper> exactSearchMol(@Name("labels") List<String> labelNames, @Name("mol") String molBlock) {
6773
log.info("Exact search mol :: label=%s, molBlock=%s", labelNames, molBlock);
6874

69-
final String rdkitSmiles = converter.convertMolBlock(molBlock, sanitize).getCanonicalSmiles();
75+
Config config = ((GraphDatabaseAPI) db).getDependencyResolver().resolveDependency(Config.class);
76+
boolean sanitize = config.get(RDKitSettings.indexSanitize);
77+
78+
NodeParameters nodeParameters;
79+
try {
80+
nodeParameters = converter.convertMolBlock(molBlock, true);
81+
82+
} catch (MolSanitizeException e) {
83+
if (sanitize) {
84+
throw e;
85+
} else {
86+
nodeParameters = converter.convertMolBlock(molBlock, false);
87+
}
88+
}
89+
90+
final String rdkitSmiles = nodeParameters.getCanonicalSmiles();
7091
return findLabeledNodes(labelNames, NodeFields.CanonicalSmiles.getValue(), rdkitSmiles);
92+
93+
7194
}
7295

7396
/**

src/main/java/org/rdkit/neo4j/utils/Converter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,8 +232,8 @@ private LuceneQuery getLuceneQuery(final BitSet fp, final String delimiter) {
232232
*/
233233
private NodeParameters createMolBlock(final RWMol rwmol, boolean sanitize) {
234234
logger.debug("Construct default molBlock fields");
235+
rwmol.updatePropertyCache(false);
235236
final String rdkitSmiles = RDKFuncs.MolToSmiles(rwmol);
236-
rwmol.updatePropertyCache(sanitize);
237237
final String formula = RDKFuncs.calcMolFormula(rwmol);
238238
final double molecularWeight = RDKFuncs.calcExactMW(rwmol);
239239
final String inchi = RDKFuncs.MolToInchiKey(rwmol);

src/test/java/org/rdkit/neo4j/handlers/SmilesEventHandlerTest.java

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,7 @@ public void insertMolBlockTest() {
113113
+ "M END\n";
114114

115115
final String query = String.format("CREATE (c:Chemical:Structure {mdlmol: '%s'}) RETURN id(c) as id", mol);
116-
// final String canonicalSmiles = "COc1ccccc1";
117-
final String canonicalSmiles = "COC1=CC=CC=C1";
116+
final String canonicalSmiles = "COc1ccccc1";
118117
final String formula = "C7H8O";
119118
final String inchi_key = "RDOXTESZEPMUJZ-UHFFFAOYSA-N";
120119
final double molecularWeight = 108.057514876;
@@ -159,4 +158,75 @@ public void testInvalidMdMol() {
159158
" CREATE (n:Entity:Chemical:Compound:Structure { luri: 'test5', tag:'<test5>', preferred_name: 'aabbcc5', mdlmol: $molBlock})",
160159
Collections.singletonMap("molBlock", invalidMolBlock));
161160
}
161+
162+
@Test
163+
public void testBuggyMolBlock() {
164+
// this is a molblock causing an error directly in RDKIT if used with santize=false
165+
String buggyMolBlock="\n" +
166+
" RDKit 2D\n" +
167+
"\n" +
168+
" 27 30 0 0 0 0 0 0 0 0999 V2000\n" +
169+
" 8.2500 -3.8971 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
170+
" 7.5000 -2.5981 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" +
171+
" 8.2500 -1.2990 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
172+
" 6.0000 -2.5981 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
173+
" 5.2500 -1.2990 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
174+
" 3.7500 -1.2990 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" +
175+
" 3.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
176+
" 3.7500 1.2990 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n" +
177+
" 1.5000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
178+
" 0.7500 -1.2990 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
179+
" -0.7500 -1.2990 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
180+
" -1.5000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
181+
" -0.7500 1.2990 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
182+
" -1.5000 2.5981 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" +
183+
" -0.7500 3.8971 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
184+
" -1.5000 5.1962 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
185+
" -0.7500 6.4952 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
186+
" 0.7500 6.4952 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
187+
" 1.5000 7.7942 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
188+
" 3.0000 7.7942 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
189+
" 3.7500 9.0933 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" +
190+
" 3.7500 6.4952 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
191+
" 3.0000 5.1962 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
192+
" 1.5000 5.1962 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
193+
" 0.7500 3.8971 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
194+
" 1.5000 2.5981 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n" +
195+
" 0.7500 1.2990 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n" +
196+
" 1 2 1 0\n" +
197+
" 2 3 1 0\n" +
198+
" 2 4 1 0\n" +
199+
" 4 5 1 0\n" +
200+
" 5 6 1 0\n" +
201+
" 6 7 1 0\n" +
202+
" 7 8 2 0\n" +
203+
" 7 9 1 0\n" +
204+
" 9 10 2 0\n" +
205+
" 10 11 1 0\n" +
206+
" 11 12 2 0\n" +
207+
" 12 13 1 0\n" +
208+
" 13 14 2 0\n" +
209+
" 14 15 1 0\n" +
210+
" 15 16 2 0\n" +
211+
" 16 17 1 0\n" +
212+
" 17 18 2 0\n" +
213+
" 18 19 1 0\n" +
214+
" 19 20 2 0\n" +
215+
" 20 21 1 0\n" +
216+
" 20 22 1 0\n" +
217+
" 22 23 2 0\n" +
218+
" 23 24 1 0\n" +
219+
" 24 25 2 0\n" +
220+
" 25 26 1 0\n" +
221+
" 26 27 2 0\n" +
222+
" 27 9 1 0\n" +
223+
" 27 13 1 0\n" +
224+
" 25 15 1 0\n" +
225+
" 24 18 1 0\n" +
226+
"M END\n" +
227+
"\n" +
228+
"$$$$\n";
229+
graphDb.execute("CREATE (n:Entity:Chemical:Compound:Structure { luri: \"test2\", tag:\"~test2~\", preferred_name: \"aabbcc2\", mdlmol:$buggyMolBlock})",
230+
Collections.singletonMap("buggyMolBlock", buggyMolBlock));
231+
}
162232
}

src/test/java/org/rdkit/neo4j/procedures/ExactSearchTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,9 @@ public void callExactMolTest() {
8989

9090
graphDb.execute("CREATE (node:Chemical:Structure {mdlmol: $mol})", MapUtil.map("mol", mol));
9191

92-
final String expectedSmiles = "COC1=CC=CC=C1";
92+
final String expectedSmiles = "COc1ccccc1";
9393
try (Transaction tx = graphDb.beginTx()) {
94-
Result result = graphDb.execute("CALL org.rdkit.search.exact.mol($labels, $mol, false)", MapUtil.map("labels", defaultLabels, "mol", mol));
94+
Result result = graphDb.execute("CALL org.rdkit.search.exact.mol($labels, $mol)", MapUtil.map("labels", defaultLabels, "mol", mol));
9595
Map<String, Object> item = result.next();
9696
String smiles = (String) item.get("canonical_smiles");
9797
assertEquals(expectedSmiles, smiles);

0 commit comments

Comments
 (0)