Skip to content

Commit 3729816

Browse files
committed
Merge branch 'dev'
2 parents 6ebf2c2 + 0415db2 commit 3729816

51 files changed

Lines changed: 3364 additions & 460 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,5 @@ CHANGELOG.md
77
out/
88
neo4j-temp/
99
docker-compose.yml
10-
dependency-reduced-pom.xml
10+
dependency-reduced-pom.xml
11+
temp/

.travis.yml

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,32 @@ cache:
22
directories:
33
- $HOME/.m2
44

5+
#env:
6+
# - GRAVIS="https://raw.githubusercontent.com/DanySK/Gravis-CI/master/"
7+
58
matrix:
69
include:
7-
- os: windows
8-
language: bash
9-
before_install:
10-
- choco install jdk8 maven
11-
- export JAVA_HOME="C:\Program Files\Java\jdk1.8.0_211"
12-
- export PATH=$PATH:$JAVA_HOME\bin\:C:\ProgramData\chocolatey\lib\maven\
10+
# - os: windows
11+
# language: bash
12+
# before_install:
13+
# - curl "${GRAVIS}.install-jdk-travis.sh" --output .install-jdk-travis.sh
14+
# - source ~/.install-jdk-travis.sh
15+
# - curl "${GRAVIS}.install-maven-travis.sh" --output .install-maven-travis.sh
16+
# - source ~/.install-maven-travis.sh
17+
#
18+
## - choco install jdk8 maven
19+
## - export JAVA_HOME="C:\Program Files\Java\jdk1.8.0_211"
20+
## - export PATH=$PATH:"C:\Program Files\Java\jdk1.8.0_211\bin\":"C:\ProgramData\chocolatey\lib\maven\"
21+
# before_cache:
22+
# - curl "${GRAVIS}.clean_gradle_cache.sh" --output .clean_gradle_cache.sh
23+
# - bash .clean_gradle_cache.sh
24+
# cache:
25+
# directories:
26+
# # This avoids re-downloading the JDK every time, but Travis recommends not to do it
27+
# # - $HOME/.jabba/
28+
# # If you use Gradle, you may want to save some time with caching
29+
# - $HOME/.gradle/caches/
30+
# - $HOME/.gradle/wrapper/
1331
- os: linux
1432
language: java
1533
dist: trusty

README.md

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,43 @@ mvn org.apache.maven.plugins:maven-install-plugin:2.3.1:install-file \
2828
```
2929
2) Generate .jar file with all dependencies with `mvn package`
3030

31+
## Extension functionality
32+
33+
### Node labels: [`Chemical`, `Structure`] - strict rule (!)
34+
35+
* __Whenever a new node added with labels__, an `rdkit` event handler is applied and new node properties are constructed from `mdlmol` property.
36+
1) `canonical_smiles`
37+
2) `inchi`
38+
3) `formula`
39+
4) `molecular_weight`
40+
5) `fp` - bit-vector fingerprint in form of indexes of positive bits (`"1 4 19 23"`)
41+
6) `fp_ones` - count of positive bits
42+
7) `mdlmol`
43+
44+
* If the graph was fulfilled with nodes before the extension was loaded, it is possible to apply a procedure:
45+
`CALL org.rdkit.update(['Chemical', 'Structure'])` - which iterates through nodes with specified labels and creates properties described before.
46+
47+
* In order to speed up an exact search, create an index on top of `canonical_smiles` property
48+
49+
### User-defined procedures
50+
51+
1) `CALL org.rdkit.search.exact.smiles(['Chemical', 'Structure'], 'CC(=O)Nc1nnc(S(N)(=O)=O)s1')`
52+
2) `CALL org.rdkit.search.exact.mol(['Chemical', 'Structure'], '<mdlmol block>')`
53+
* RDKit provides functionality to use `exact search` on top of `smiles` and `mdlmol blocks`, returns a node which satisfies `canonical smiles`
54+
3) `CALL org.rdkit.update(['Chemical', 'Structure'])`
55+
* Update procedure (manual properties initialization from `mdlmol` property)
56+
4) `CALL org.rdkit.search.createIndex(['Chemical', 'Structure'])`
57+
* Create fulltext index (called `rdkitIndex`) on property `fp`, which is required for substructure search
58+
* Create index for `:Chemical(canonical_smiles)` property
59+
5) `CALL org.rdkit.search.deleteIndex()`
60+
* Delete fulltext index (called `rdkitIndex`) on property `fp`, which is required for substructure search
61+
* Delete index for `:Chemical(canonical_smiles)` property
62+
6) `CALL org.rdkit.search.substructure.smiles(['Chemical', 'Structure'], 'CC(=O)Nc1nnc(S(N)(=O)=O)s1')`
63+
* Subscture search based on smiles substructure, correct smiles is expected
3164

3265

3366
## Useful links:
3467
- https://github.com/neo4j/neo4j
3568
- https://github.com/neo4j-contrib/neo4j-lucene5-index
69+
- https://github.com/rdkit/org.rdkit.lucene
3670

pom.xml

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
<groupId>org.neo4j.rdkit</groupId>
88
<artifactId>rdkit-index</artifactId>
9-
<version>0.0.1</version>
9+
<version>0.0.3</version>
1010
<name>RDKit-Neo4j</name>
1111
<packaging>jar</packaging>
1212

@@ -96,7 +96,6 @@
9696
<groupId>org.projectlombok</groupId>
9797
<artifactId>lombok</artifactId>
9898
<version>${lombok.version}</version>
99-
<scope>provided</scope>
10099
</dependency>
101100

102101

@@ -124,6 +123,9 @@
124123
</includes>
125124
<targetPath>native</targetPath>
126125
</resource>
126+
<resource>
127+
<directory>src/main/resources</directory>
128+
</resource>
127129
</resources>
128130

129131
<plugins>
@@ -145,6 +147,14 @@
145147
<goals>
146148
<goal>shade</goal>
147149
</goals>
150+
<configuration>
151+
<artifactSet>
152+
<excludes>
153+
<!--<exclude>org.slf4j:*</exclude>-->
154+
<exclude>org.projectlombok:lombok</exclude>
155+
</excludes>
156+
</artifactSet>
157+
</configuration>
148158
</execution>
149159
</executions>
150160
</plugin>

src/main/java/org/neo4j/kernel/api/impl/fulltext/analyzer/providers/RDKit.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,21 @@
33
import org.apache.lucene.analysis.Analyzer;
44
import org.neo4j.graphdb.index.fulltext.AnalyzerProvider;
55
import org.neo4j.helpers.Service;
6-
import org.rdkit.neo4j.analyzer.RDKitAnalyzer;
6+
import org.rdkit.lucene.DefaultAnalyzerFactory;
7+
import org.slf4j.Logger;
8+
import org.slf4j.LoggerFactory;
79

810
@Service.Implementation( AnalyzerProvider.class )
911
public class RDKit extends AnalyzerProvider {
10-
11-
// todo: * The {@code jar} that includes this implementation must also contain a {@code META-INF/services/org.neo4j.graphdb.index.fulltext.AnalyzerProvider} file,
12-
// todo: * that contains the fully-qualified class names of all of the {@code AnalyzerProvider} implementations it contains.
12+
private static final Logger logger = LoggerFactory.getLogger(DefaultAnalyzerFactory.class);
1313

1414
public RDKit() {
1515
super("rdkit");
1616
}
1717

1818
@Override
1919
public Analyzer createAnalyzer() {
20-
return new RDKitAnalyzer();
20+
return new DefaultAnalyzerFactory().createAnalyzer();
2121
}
2222

2323
@Override
Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
/*
2+
* Copyright (C)2014, Novartis Institutes for BioMedical Research Inc.
3+
* All rights reserved.
4+
*
5+
* Redistribution and use in source and binary forms, with or without
6+
* modification, are permitted provided that the following conditions are
7+
* met:
8+
*
9+
* - Redistributions of source code must retain the above copyright
10+
* notice, this list of conditions and the following disclaimer.
11+
*
12+
* - Redistributions in binary form must reproduce the above
13+
* copyright notice, this list of conditions and the following
14+
* disclaimer in the documentation and/or other materials provided
15+
* with the distribution.
16+
*
17+
* - Neither the name of Novartis Institutes for BioMedical Research Inc.
18+
* nor the names of its contributors may be used to endorse or promote
19+
* products derived from this software without specific prior written permission.
20+
*
21+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22+
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23+
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24+
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25+
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26+
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27+
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28+
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29+
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30+
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31+
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32+
*/
33+
package org.rdkit.fingerprint;
34+
35+
import java.util.BitSet;
36+
import lombok.AllArgsConstructor;
37+
import lombok.Data;
38+
import lombok.NonNull;
39+
import lombok.val;
40+
import org.RDKit.ExplicitBitVect;
41+
import org.RDKit.RDKFuncs;
42+
import org.RDKit.ROMol;
43+
import org.RDKit.RWMol;
44+
import org.rdkit.neo4j.utils.RWMolCloseable;
45+
import org.slf4j.Logger;
46+
import org.slf4j.LoggerFactory;
47+
48+
/**
49+
* A fingerprint factory is an object that knows how to produce fingerprints for SMILES. It is used to calculate fingerprints for the search index as
50+
* well as for query structures when the index is searched. As some fingerprints, e.g. Avalon, support different optimizations we have two different
51+
* methods for the two different purposes.
52+
*
53+
* @author Manuel Schwarze
54+
*/
55+
56+
@Data
57+
@AllArgsConstructor
58+
public class DefaultFingerprintFactory implements FingerprintFactory {
59+
60+
//
61+
// Constants
62+
//
63+
64+
/**
65+
* The logger instance.
66+
*/
67+
private static final Logger logger = LoggerFactory.getLogger(DefaultFingerprintFactory.class);
68+
69+
//
70+
// Members
71+
//
72+
73+
/**
74+
* The settings to be used for calculating structure fingerprints with this factory.
75+
*/
76+
private final FingerprintSettings settingsStructure;
77+
78+
/**
79+
* The settings to be used for calculating query fingerprints with this factory.
80+
*/
81+
private final FingerprintSettings settingsQuery;
82+
83+
//
84+
// Constructors
85+
//
86+
87+
/**
88+
* Creates a new fingerprint factory based on the past in settings. Structure and query fingerprints are handled the same way. There is distinction
89+
* between them. To handle them differently, use the other constructor.
90+
*
91+
* @param settings Fingerprint settings. Must not be null.
92+
*/
93+
public DefaultFingerprintFactory(@NonNull final FingerprintSettings settings) {
94+
settingsStructure = settingsQuery = settings;
95+
}
96+
97+
//
98+
// Public Methods
99+
//
100+
101+
/**
102+
* Creates a fingerprint based on the passed in SMILES.
103+
*
104+
* @param strSmiles SMILES structure, preferably canonicalized by RDKit before. Must not be null.
105+
* @return Fingerprint as BitSet.
106+
*/
107+
@Override
108+
public BitSet createStructureFingerprint(final String strSmiles) {
109+
return createFingerprint(strSmiles, settingsStructure);
110+
}
111+
112+
/**
113+
* Creates a fingerprint based on the passed in SMILES.
114+
*
115+
* @param strSmiles SMILES structure, preferably canonicalized by RDKit before. Must not be null.
116+
* @return Fingerprint as BitSet.
117+
*/
118+
@Override
119+
public BitSet createQueryFingerprint(final String strSmiles) {
120+
return createFingerprint(strSmiles, settingsQuery);
121+
}
122+
123+
/**
124+
* Method for already opened RWMol to build fingerprint from Query settings.
125+
*
126+
* @param mol already opened RWMol object
127+
* @return Fingerprint as BitSet.
128+
*/
129+
public BitSet createQueryFingerprint(final RWMol mol) {
130+
return createFingerprint(mol, settingsQuery);
131+
}
132+
133+
/**
134+
* Method for already opened RWMol to build fingerprint from Structure settings.
135+
*
136+
* @param mol already opened RWMol object
137+
* @return Fingerprint as BitSet.
138+
*/
139+
public BitSet createStructureFingerprint(final RWMol mol) {
140+
return createFingerprint(mol, settingsStructure);
141+
}
142+
143+
//
144+
// Private Methods
145+
//
146+
147+
/**
148+
* Creates a fingerprint based on the passed in SMILES.
149+
*
150+
* @param strSmiles SMILES structure, preferably canonicalized by RDKit before. Must not be null. ! EXPECTED CANONICALIZED SMILES !
151+
* @param settings Fingerprint settings to be used.
152+
* @return Fingerprint as BitSet.
153+
*/
154+
private BitSet createFingerprint(@NonNull final String strSmiles, final FingerprintSettings settings) {
155+
156+
// todo: update code if other types are used
157+
158+
// Normally: ROMol objects are needed to calculate fingerprints
159+
// Create an ROMol object
160+
161+
// Performance trick, if SMILES is already canonicalized
162+
try (val mol = RWMolCloseable.from(RWMol.MolFromSmiles(strSmiles, 0, false))) {
163+
return createFingerprint(mol, settings);
164+
}
165+
}
166+
167+
/**
168+
* Method for already opened RWMol
169+
* @param mol - canonicalized
170+
* @param settings to build fingerprint from
171+
* @return BitSet from rwmol (fingerprint of `settings` type)
172+
*/
173+
private BitSet createFingerprint(final RWMol mol, final FingerprintSettings settings) {
174+
mol.updatePropertyCache();
175+
176+
// Calculate fingerprint
177+
return convert(settings.getRdkitFingerprintType().calculate(mol, settings));
178+
}
179+
180+
/**
181+
* Converts an RDKit bit vector into a Java BitSet object.
182+
*
183+
* @param rdkitBitVector RDKit (C++ based) bit vector. Can be null.
184+
* @return BitSet or null, if null was passed in.
185+
*/
186+
private BitSet convert(final ExplicitBitVect rdkitBitVector) {
187+
BitSet fingerprint = null;
188+
189+
if (rdkitBitVector != null) {
190+
final int iLength = (int) rdkitBitVector.getNumBits();
191+
fingerprint = new BitSet(iLength);
192+
for (int i = 0; i < iLength; i++) {
193+
if (rdkitBitVector.getBit(i)) {
194+
fingerprint.set(i);
195+
}
196+
}
197+
}
198+
199+
return fingerprint;
200+
}
201+
}

0 commit comments

Comments
 (0)