Skip to content

Commit 5ee1153

Browse files
committed
java-jsi-clus-rm initial commit
1 parent c565329 commit 5ee1153

42 files changed

Lines changed: 2727 additions & 9 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.circleci/config.yml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -150,14 +150,14 @@ jobs:
150150
name: "Test java-jsi-clus-fr"
151151
command: |
152152
PATH=$PATH:/$HOME/.captain/bin/ ./java-jsi-clus-fr/tests/test.sh
153-
# - run:
154-
# name: "Build java-jsi-clus-rm"
155-
# command: |
156-
# PATH=$PATH:/$HOME/.captain/bin/ ./java-jsi-clus-rm/build.sh
157-
# - run:
158-
# name: "Test java-jsi-clus-rm"
159-
# command: |
160-
# PATH=$PATH:/$HOME/.captain/bin/ ./java-jsi-clus-rm/tests/test.sh
153+
- run:
154+
name: "Build java-jsi-clus-rm"
155+
command: |
156+
PATH=$PATH:/$HOME/.captain/bin/ ./java-jsi-clus-rm/build.sh
157+
- run:
158+
name: "Test java-jsi-clus-rm"
159+
command: |
160+
PATH=$PATH:/$HOME/.captain/bin/ ./java-jsi-clus-rm/tests/test.sh
161161
- run:
162162
name: "Build python-jsi-hedwig"
163163
command: |

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,5 @@ algorithm-repository.Rproj
99
**/.Rhistory
1010
*.orig
1111
.Rproj.user
12+
/.metadata/
13+
/.recommenders/

build.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ else
2222
DOCKER="sudo docker"
2323
fi
2424

25-
IMAGES="r-summary-stats r-linear-regression java-rapidminer java-jsi-clus-pct java-jsi-clus-pct-ts java-jsi-clus-fire java-jsi-clus-fr python-jsi-hedwig python-jsi-hinmine"
25+
IMAGES="r-summary-stats r-linear-regression java-rapidminer java-jsi-clus-pct java-jsi-clus-rm java-jsi-clus-pct-ts java-jsi-clus-fire java-jsi-clus-fr python-jsi-hedwig python-jsi-hinmine"
2626

2727
commit_id="$(git rev-parse --short HEAD)"
2828

java-jsi-clus-rm/.bumpversion.cfg

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
[bumpversion]
2+
current_version = 0.0.1
3+
commit = True
4+
tag = True
5+
tag_name = java-jsi-clus-rm-{new_version}
6+
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)
7+
serialize = {major}.{minor}.{patch}
8+
9+
[bumpversion:file:pom.xml]
10+
search = <version>{current_version}</version><!-- bumpversion -->
11+
replace = <version>{new_version}</version><!-- bumpversion -->
12+

java-jsi-clus-rm/.dockerignore

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
.git/
2+
.idea/
3+
target/
4+
*~
5+
.settings
6+
.project
7+
deps/rm/
8+
deps/decompiled/

java-jsi-clus-rm/.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
target/
2+
.classpath
3+
.settings
4+
.project

java-jsi-clus-rm/Dockerfile

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
FROM hbpmip/java-base-build:3.5.2-jdk-8-0 as build-java-env
2+
3+
COPY pom.xml /project/pom.xml
4+
COPY src/ /project/src
5+
COPY deps/ /project/deps
6+
7+
RUN cp /usr/share/maven/ref/settings-docker.xml /root/.m2/settings.xml \
8+
&& mvn dependency:go-offline
9+
10+
# Repeating copy of the settings works better. I dunno why.
11+
RUN cp /usr/share/maven/ref/settings-docker.xml /root/.m2/settings.xml \
12+
&& mvn package site
13+
14+
FROM hbpmip/java-mip:0.5.7
15+
16+
MAINTAINER martin.breskvar@ijs.si
17+
18+
ENV DOCKER_IMAGE=java-jsi-clus-rm \
19+
FUNCTION=java-jsi-clus-rm \
20+
JAVA_CLASSPATH=${JAVA_CLASSPATH}:/opt/weka/props/:/usr/share/jars/jsi-clus-rm-deps.jar:/usr/share/jars/lib/*:/usr/share/jars/Redescription_mining_MW_ConstrainedGen1.jar:/usr/share/jars/CLUSNHMC.jar \
21+
JAVA_MAINCLASS=eu.humanbrainproject.mip.algorithms.jsi.clus.rm.Entrypoint \
22+
WEKA_HOME=/opt/weka
23+
24+
COPY docker/databases-props/ /opt/weka/databases-props/
25+
COPY --from=build-java-env /project/deps/ /usr/share/jars/
26+
COPY --from=build-java-env /project/target/jsi-clus-rm-deps.jar /usr/share/jars/jsi-clus-rm-deps.jar
27+
COPY --from=build-java-env /project/target/site/ /var/www/html/
28+
COPY src/ /src/
29+
COPY doc/ /src/doc/
30+
31+
RUN mkdir -p /opt/weka/props/weka/experiment \
32+
&& chown -R compute:compute /mip/ /opt/weka/ /usr/share/jars/
33+
34+
RUN chown -R compute:compute /src/ \
35+
&& chown -R root:www-data /var/www/html/
36+
37+
WORKDIR /usr/share/jars
38+
39+
LABEL org.label-schema.build-date=$BUILD_DATE \
40+
org.label-schema.name="hbpmip/java-jsi-clus-rm" \
41+
org.label-schema.description="PCT-based algorithm for Redescription Mining" \
42+
org.label-schema.url="https://github.com/LREN-CHUV/algorithm-repository" \
43+
org.label-schema.vcs-type="git" \
44+
org.label-schema.vcs-url="https://github.com/LREN-CHUV/algorithm-repository.git" \
45+
org.label-schema.vcs-ref=$VCS_REF \
46+
org.label-schema.version="$VERSION" \
47+
org.label-schema.vendor="JSI KT and IRB ZEL" \
48+
org.label-schema.license="GPLv3" \
49+
org.label-schema.docker.dockerfile="Dockerfile" \
50+
org.label-schema.schema-version="1.0"

java-jsi-clus-rm/README.md

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
[![JSI](https://img.shields.io/badge/JSI-KT-AF4C64.svg)](http://kt.ijs.si/)
2+
[![DockerHub](https://img.shields.io/badge/docker-hbpmip%2Fjava--jsi--clus--rm-008bb8.svg)](https://hub.docker.com/r/hbpmip/java-jsi-clus-rm/)
3+
[![ImageVersion](https://images.microbadger.com/badges/version/hbpmip/java-jsi-clus-rm.svg)](https://hub.docker.com/r/hbpmip/java-jsi-clus-rm/tags "hbpmip/java-jsi-clus-rm image tags")
4+
[![ImageLayers](https://images.microbadger.com/badges/image/hbpmip/java-jsi-clus-rm.svg)](https://microbadger.com/#/images/hbpmip/java-jsi-clus-rm "hbpmip/java-jsi-clus-rm on microbadger")
5+
6+
# hbpmip/java-jsi-clus-rm: Redescription Mining using Predictive Clustering from JSI and IRB
7+
8+
Implementation of the Redescription mining algorithm based on Predictive Clustering Trees.
9+
For more details see https://github.com/matmih/CLUS-RM-library.
10+
11+
## Usage
12+
13+
```sh
14+
docker run --rm --env [list of environment variables] hbpmip/java-jsi-clus-rm compute
15+
```
16+
17+
where the environment variables are:
18+
19+
* NODE: name of the node (machine) used for execution
20+
* JOB_ID: ID of the job.
21+
* IN_JDBC_DRIVER: org.postgresql.Driver
22+
* IN_JDBC_URL: URL to the input database, e.g. jdbc:postgresql://db:5432/features
23+
* IN_JDBC_USER: User for the input database
24+
* IN_JDBC_PASSWORD: Password for the input database
25+
* OUT_JDBC_DRIVER: org.postgresql.Driver
26+
* OUT_JDBC_URL: URL to the output database, jdbc:postgresql://db:5432/woken
27+
* OUT_JDBC_USER: User for the output database
28+
* OUT_JDBC_PASSWORD: Password for the output database
29+
* PARAM_covariables: Attributes contained in the first data view.
30+
* PARAM_variables: Attributes contained in the second data view.
31+
* PARAM_query: Query selecting the data to feed into the algorithm for training
32+
* MODEL_PARAM_minJS: Specify minimal redescription accuracy (measured with Jaccard index) required to return it to the user. Parameter values are contained in [0,1]. (default is MODEL_PARAM_minJS=0.5)
33+
* MODEL_PARAM_maxPval: Specify maximal redescription p-value required to return it to the user. Parameter values are contained in [0,1]. (default is MODEL_PARAM_maxPval=0.01)
34+
* MODEL_PARAM_MinSupport: Specify minimal redescription support required to return it to the user. Parameter values are contained in [1,|E|], where |E| denotes number of entities in the dataset. (This parameter MUST be defined by the user and is domain and data dependent).
35+
* MODEL_PARAM_MaxSupport: Specify maximal redescription support allowed. Parameter values are contained in [1,|E|], where |E| denotes number of entities in the dataset. (default is MODEL_PARAM_MaxSupport = |E|).
36+
* MODEL_PARAM_numRandomRestarts: Specify the number of random initialization steps performed by the CLUS-RM (the default is MODEL_PARAM_numRandomRestarts = 1).
37+
* MODEL_PARAM_numIterations: Specify the number of iterations (also called alternations) performed by the CLUS-RM (the default is MODEL_PARAM_numIterations = 10).
38+
* MODEL_PARAM_numRetRed: Specify the number of redescriptions to be returned by the CLUS-RM (the default is MODEL_PARAM_numRetRed = 50).
39+
* MODEL_PARAM_attributeImportanceW1: Specify the attribute importance, for attributes contained in view 1, used in constraint-based redescription mining (the default is MODEL_PARAM_attributeImportanceW1 = "none"). Possible values are: "none" - allow redescriptions with any attributes from view1, "suggested" - allow defining combinations of attributes that increase redescription score (redescriptions containing specified attributes are preferred), "soft" - only return redescriptions satisfying at least part of specified constraints to the user (redescriptions satisfying larger portion of constraint set are preferred), "hard" - only return redescriptions satisfying all constraints defined in one constraint set.
40+
* MODEL_PARAM_attributeImportanceW2: Specify the attribute importance, for attributes contained in view 2, used in constraint-based redescription mining (the default is MODEL_PARAM_attributeImportanceW1 = "none"). Possible values are: "none" - allow redescriptions with any attributes from view2, "suggested" - allow defining combinations of attributes that increase redescription score (redescriptions containing specified attributes are preferred), "soft" - only return redescriptions satisfying at least part of specified constraints to the user (redescriptions satisfying larger portion of constraint set are preferred), "hard" - only return redescriptions satisfying all constraints defined in one constraint set.
41+
* MODEL_PARAM_importantAttributesW1: defines constraint sets, for attributes contained in view 1, to be used in constraint-based redescription mining (default is MODEL_PARAM_importantAttributesW1=""). Constraints are specified in the format "{a;b;c},{a;d}", where a,b,c,d are some attributes contained in the first view (view1) of the data.
42+
* MODEL_PARAM_importantAttributesW2: defines constraint sets, for attributes contained in view 2, to be used in constraint-based redescription mining (default is MODEL_PARAM_importantAttributesW1=""). Constraints are specified in the format "{e;f;g},{h;i}", where e,f,g,h,i are some attributes contained in the second view (view2) of the data.

java-jsi-clus-rm/build.sh

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#!/usr/bin/env bash
2+
3+
set -o pipefail # trace ERR through pipes
4+
set -o errtrace # trace ERR through 'time command' and other functions
5+
set -o errexit ## set -e : exit the script if any statement returns a non-true return value
6+
7+
get_script_dir () {
8+
SOURCE="${BASH_SOURCE[0]}"
9+
10+
while [ -h "$SOURCE" ]; do
11+
DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )"
12+
SOURCE="$( readlink "$SOURCE" )"
13+
[[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE"
14+
done
15+
cd -P "$( dirname "$SOURCE" )"
16+
pwd
17+
}
18+
19+
cd "$(get_script_dir)"
20+
21+
if [[ $NO_SUDO || -n "$CIRCLECI" ]]; then
22+
CAPTAIN="captain"
23+
elif groups $USER | grep &>/dev/null '\bdocker\b'; then
24+
CAPTAIN="captain"
25+
else
26+
CAPTAIN="sudo captain"
27+
fi
28+
29+
BUILD_DATE=$(date -Iseconds) \
30+
VCS_REF=$(git describe --tags --dirty) \
31+
VERSION=$(git describe --tags --dirty) \
32+
$CAPTAIN build

java-jsi-clus-rm/captain.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
target_image:
2+
build: Dockerfile
3+
image: hbpmip/java-jsi-clus-rm
4+
pre:
5+
- echo "Preparing java-jsi-clus-rm"
6+
post:
7+
- echo "Finished java-jsi-clus-rm"

0 commit comments

Comments
 (0)