diff --git a/.env b/.env new file mode 100644 index 0000000..a602ba9 --- /dev/null +++ b/.env @@ -0,0 +1,2 @@ +CONF_REPO=https://github.com/VirtualFlyBrain/vfb-pipeline-config.git +CONF_BRANCH=dev diff --git a/config/collectdata/config.env b/config/collectdata/config.env new file mode 100644 index 0000000..981c676 --- /dev/null +++ b/config/collectdata/config.env @@ -0,0 +1,3 @@ +KBserver=http://192.168.0.1:7474 +KBuser=neo4j +KBpassword=password diff --git a/config/collectdata/shacl/kb.shacl b/config/collectdata/shacl/kb.shacl new file mode 100644 index 0000000..e04628f --- /dev/null +++ b/config/collectdata/shacl/kb.shacl @@ -0,0 +1,15 @@ +@prefix dash: . +@prefix rdf: . +@prefix rdfs: . +@prefix vfb: . +@prefix sh: . +@prefix xsd: . +@prefix FBbt: . + +vfb:DataSetCountShape + a sh:NodeShape ; + sh:targetNode FBbt:00050095 ; + sh:property [ + sh:path [ sh:inversePath rdf:type ] ; + sh:minCount 1 ; + ] . diff --git a/config/collectdata/shex/kb.shex b/config/collectdata/shex/kb.shex new file mode 100644 index 0000000..fb83ab9 --- /dev/null +++ b/config/collectdata/shex/kb.shex @@ -0,0 +1,9 @@ +PREFIX ex: +PREFIX xsd: +PREFIX vfb: +PREFIX foaf: +PREFIX dct: + +vfb:ImageShape { + dct:source IRI +} \ No newline at end of file diff --git a/config/collectdata/sparql/delete_blocked_entities.ru b/config/collectdata/sparql/delete_blocked_entities.ru new file mode 100644 index 0000000..392d9a3 --- /dev/null +++ b/config/collectdata/sparql/delete_blocked_entities.ru @@ -0,0 +1,18 @@ +PREFIX rdfs: +PREFIX owl: +PREFIX n2o: +PREFIX n2oc: +PREFIX dct: + +DELETE { + ?s ?blocked . + ?s ?p ?o . +} +WHERE { + ?s ?blocked . + ?s ?p ?o . + FILTER(?blocked=true) . + FILTER(isIRI(?s)) +} + +### EDIT: this was obsoleted in the end in favour of a cypher solution, see process.sh. \ No newline at end of file diff --git a/config/collectdata/sparql/delete_blocked_relations.ru b/config/collectdata/sparql/delete_blocked_relations.ru new file mode 100644 index 0000000..d97b42e --- /dev/null +++ b/config/collectdata/sparql/delete_blocked_relations.ru @@ -0,0 +1,30 @@ +PREFIX rdfs: +PREFIX owl: +PREFIX n2o: +PREFIX n2oc: +PREFIX dct: +PREFIX rdf: + + +DELETE { + ?s ?p ?o . + ?r rdf:type owl:Axiom ; + owl:annotatedSource ?s ; + owl:annotatedProperty ?p ; + owl:annotatedTarget ?o ; + ?blocked; + ?bp ?bo; + +} WHERE { + ?s ?p ?o . + ?r rdf:type owl:Axiom ; + owl:annotatedSource ?s ; + owl:annotatedProperty ?p ; + owl:annotatedTarget ?o ; + ?blocked; + ?bp ?bo; + + FILTER(?blocked=true) . +} + +### EDIT: this was obsoleted in the end in favour of a cypher solution, see process.sh. \ No newline at end of file diff --git a/config/collectdata/sparql/delete_embargoed_channels.ru b/config/collectdata/sparql/delete_embargoed_channels.ru new file mode 100644 index 0000000..dc41164 --- /dev/null +++ b/config/collectdata/sparql/delete_embargoed_channels.ru @@ -0,0 +1,31 @@ +PREFIX rdfs: +PREFIX owl: +PREFIX n2o: +PREFIX n2oc: +PREFIX dct: + +#Delete all ds:DataSet where ds.production is False +#Delete all i:Individual where (ds)-[:has_source]-(i:Individual)<-[:depicts]-(ch:Individual) WHERE ds.production is False + +DELETE { + ?channel ?channelrel ?channelval . +} + +WHERE { + + ?dataset n2o:nodeLabel ?nodelabel . # This selects all datasets + + OPTIONAL { + ?dataset n2oc:production ?production . + # n2oc:production is a bit brittle because IRI might be changed (risk!) + } + + ?image dct:source ?dataset . + ?channel ?image . # There does not always seem to be a channel + ?channel ?channelrel ?channelval . + + FILTER(?production=false || !bound(?production)) . + FILTER(?nodelabel="DataSet") +} + +### EDIT: this was obsoleted in the end in favour of a ROBOT solution, see process.sh. Using SPARQL this way is too memory consuming. \ No newline at end of file diff --git a/config/collectdata/sparql/delete_embargoed_datasets.ru b/config/collectdata/sparql/delete_embargoed_datasets.ru new file mode 100644 index 0000000..e25eee4 --- /dev/null +++ b/config/collectdata/sparql/delete_embargoed_datasets.ru @@ -0,0 +1,28 @@ +PREFIX rdfs: +PREFIX owl: +PREFIX n2o: +PREFIX n2oc: +PREFIX dct: + +#Delete all ds:DataSet where ds.production is False +#Delete all i:Individual where (ds)-[:has_source]-(i:Individual)<-[:depicts]-(ch:Individual) WHERE ds.production is False + +DELETE { + ?dataset ?dsrel ?dsval . +} + +WHERE { + + ?dataset n2o:nodeLabel ?nodelabel . # This selects all datasets + OPTIONAL { + ?dataset n2oc:production ?production . + # n2oc:production is a bit brittle because IRI might be changed (risk!) + } + + ?dataset ?dsrel ?dsval . + + FILTER(?production=false || !bound(?production)) . + FILTER(?nodelabel="DataSet") +} + +### EDIT: this was obsoleted in the end in favour of a ROBOT solution, see process.sh. Using SPARQL this way is too memory consuming. \ No newline at end of file diff --git a/config/collectdata/sparql/delete_embargoed_images.ru b/config/collectdata/sparql/delete_embargoed_images.ru new file mode 100644 index 0000000..9da80a4 --- /dev/null +++ b/config/collectdata/sparql/delete_embargoed_images.ru @@ -0,0 +1,35 @@ +PREFIX rdfs: +PREFIX owl: +PREFIX n2o: +PREFIX n2oc: +PREFIX dct: + +#Delete all ds:DataSet where ds.production is False +#Delete all i:Individual where (ds)-[:has_source]-(i:Individual)<-[:depicts]-(ch:Individual) WHERE ds.production is False + +DELETE { + ?image ?imgrel ?imgval . + ?imgval ?p1 ?o1 . +} + +WHERE { + + ?dataset n2o:nodeLabel ?nodelabel . # This selects all datasets + + OPTIONAL { + ?dataset n2oc:production ?production . + # n2oc:production is a bit brittle because IRI might be changed (risk!) + } + + ?image dct:source ?dataset . #in case a dataset does not have images yet this is an optional clause + ?image ?imgrel ?imgval . + OPTIONAL { + ?imgval ?p1 ?o1 . + FILTER (isBlank(?imgval)) + } + + FILTER(?production=false || !bound(?production)) . + FILTER(?nodelabel="DataSet") +} + +### EDIT: this was obsoleted in the end in favour of a ROBOT solution, see process.sh. Using SPARQL this way is too memory consuming. \ No newline at end of file diff --git a/config/collectdata/sparql/embargoed_datasets_dev.sparql b/config/collectdata/sparql/embargoed_datasets_dev.sparql new file mode 100644 index 0000000..185452c --- /dev/null +++ b/config/collectdata/sparql/embargoed_datasets_dev.sparql @@ -0,0 +1,25 @@ +PREFIX rdfs: +PREFIX owl: +PREFIX n2o: +PREFIX n2oc: +PREFIX dct: + +SELECT DISTINCT ?dataset + +WHERE { + + ?dataset n2o:nodeLabel ?nodelabel . # This selects all datasets + OPTIONAL { + ?dataset n2oc:production ?production . + # n2oc:production is a bit brittle because IRI might be changed (risk!) + } + + OPTIONAL { + ?dataset n2oc:staging ?staged . + } + + IF((staging=false || unbound(staging)) && (prod = false || unbound(prod)) ) -----> EMBARGO + + FILTER( (?production=false || !bound(?production)) && (?staged=false || !bound(?staged)) ) . + FILTER(?nodelabel="DataSet") +} \ No newline at end of file diff --git a/config/collectdata/sparql/embargoed_datasets_prod.sparql b/config/collectdata/sparql/embargoed_datasets_prod.sparql new file mode 100644 index 0000000..f6a6a9f --- /dev/null +++ b/config/collectdata/sparql/embargoed_datasets_prod.sparql @@ -0,0 +1,19 @@ +PREFIX rdfs: +PREFIX owl: +PREFIX n2o: +PREFIX n2oc: +PREFIX dct: + +SELECT DISTINCT ?dataset + +WHERE { + + ?dataset n2o:nodeLabel ?nodelabel . # This selects all datasets + OPTIONAL { + ?dataset n2oc:production ?production . + # n2oc:production is a bit brittle because IRI might be changed (risk!) + } + + FILTER(?production=false || !bound(?production)) . + FILTER(?nodelabel="DataSet") +} \ No newline at end of file diff --git a/config/collectdata/sparql/select_blocked_entities.sparql b/config/collectdata/sparql/select_blocked_entities.sparql new file mode 100644 index 0000000..e75b184 --- /dev/null +++ b/config/collectdata/sparql/select_blocked_entities.sparql @@ -0,0 +1,15 @@ +PREFIX rdfs: +PREFIX owl: +PREFIX n2o: +PREFIX n2oc: +PREFIX dct: + +SELECT ?s ?p ?o . +WHERE { + ?s ?blocked . + ?s ?p ?o . + FILTER(?blocked=true) . + FILTER(isIRI(?s)) +} + +### EDIT: this was obsoleted in the end in favour of a cypher solution, see process.sh. \ No newline at end of file diff --git a/config/collectdata/sparql/select_blocked_relations.sparql b/config/collectdata/sparql/select_blocked_relations.sparql new file mode 100644 index 0000000..65bc4fa --- /dev/null +++ b/config/collectdata/sparql/select_blocked_relations.sparql @@ -0,0 +1,22 @@ +PREFIX rdfs: +PREFIX owl: +PREFIX n2o: +PREFIX n2oc: +PREFIX dct: +PREFIX rdf: + + +SELECT ?s ?p ?o +WHERE { + ?s ?p ?o . + ?r rdf:type owl:Axiom ; + owl:annotatedSource ?s ; + owl:annotatedProperty ?p ; + owl:annotatedTarget ?o ; + ?blocked; + ?bp ?bo; + + FILTER(?blocked=true) . +} + +### EDIT: this was obsoleted in the end in favour of a cypher solution, see process.sh. \ No newline at end of file diff --git a/config/collectdata/sparql/select_embargoed_channels_dev.sparql b/config/collectdata/sparql/select_embargoed_channels_dev.sparql new file mode 100644 index 0000000..c426525 --- /dev/null +++ b/config/collectdata/sparql/select_embargoed_channels_dev.sparql @@ -0,0 +1,29 @@ +PREFIX rdfs: +PREFIX owl: +PREFIX n2o: +PREFIX n2oc: +PREFIX dct: + +#Delete all ds:DataSet where ds.production is False +#Delete all i:Individual where (ds)-[:has_source]-(i:Individual)<-[:depicts]-(ch:Individual) WHERE ds.production is False + +SELECT DISTINCT ?channel +WHERE { + + ?dataset n2o:nodeLabel ?nodelabel . # This selects all datasets + + OPTIONAL { + ?dataset n2oc:production ?production . + # n2oc:production is a bit brittle because IRI might be changed (risk!) + } + + OPTIONAL { + ?dataset n2oc:staging ?staged . + } + + ?image dct:source ?dataset . + ?channel ?image . # There does not always seem to be a channel + + FILTER( (?production=false || !bound(?production)) && (?staged=false || !bound(?staged)) ) . + FILTER(?nodelabel="DataSet") +} \ No newline at end of file diff --git a/config/collectdata/sparql/select_embargoed_channels_prod.sparql b/config/collectdata/sparql/select_embargoed_channels_prod.sparql new file mode 100644 index 0000000..57a7165 --- /dev/null +++ b/config/collectdata/sparql/select_embargoed_channels_prod.sparql @@ -0,0 +1,25 @@ +PREFIX rdfs: +PREFIX owl: +PREFIX n2o: +PREFIX n2oc: +PREFIX dct: + +#Delete all ds:DataSet where ds.production is False +#Delete all i:Individual where (ds)-[:has_source]-(i:Individual)<-[:depicts]-(ch:Individual) WHERE ds.production is False + +SELECT DISTINCT ?channel +WHERE { + + ?dataset n2o:nodeLabel ?nodelabel . # This selects all datasets + + OPTIONAL { + ?dataset n2oc:production ?production . + # n2oc:production is a bit brittle because IRI might be changed (risk!) + } + + ?image dct:source ?dataset . + ?channel ?image . # There does not always seem to be a channel + + FILTER(?production=false || !bound(?production)) . + FILTER(?nodelabel="DataSet") +} \ No newline at end of file diff --git a/config/collectdata/sparql/select_embargoed_datasets_dev.sparql b/config/collectdata/sparql/select_embargoed_datasets_dev.sparql new file mode 100644 index 0000000..cc63ca3 --- /dev/null +++ b/config/collectdata/sparql/select_embargoed_datasets_dev.sparql @@ -0,0 +1,26 @@ +PREFIX rdfs: +PREFIX owl: +PREFIX n2o: +PREFIX n2oc: +PREFIX dct: + +#Delete all ds:DataSet where ds.production is False +#Delete all i:Individual where (ds)-[:has_source]-(i:Individual)<-[:depicts]-(ch:Individual) WHERE ds.production is False + +SELECT DISTINCT ?dataset +WHERE { + + ?dataset n2o:nodeLabel ?nodelabel . # This selects all datasets + + OPTIONAL { + ?dataset n2oc:production ?production . + # n2oc:production is a bit brittle because IRI might be changed (risk!) + } + + OPTIONAL { + ?dataset n2oc:staging ?staged . + } + + FILTER( (?production=false || !bound(?production)) && (?staged=false || !bound(?staged)) ) . + FILTER(?nodelabel="DataSet") +} \ No newline at end of file diff --git a/config/collectdata/sparql/select_embargoed_datasets_prod.sparql b/config/collectdata/sparql/select_embargoed_datasets_prod.sparql new file mode 100644 index 0000000..f5e23e1 --- /dev/null +++ b/config/collectdata/sparql/select_embargoed_datasets_prod.sparql @@ -0,0 +1,22 @@ +PREFIX rdfs: +PREFIX owl: +PREFIX n2o: +PREFIX n2oc: +PREFIX dct: + +#Delete all ds:DataSet where ds.production is False +#Delete all i:Individual where (ds)-[:has_source]-(i:Individual)<-[:depicts]-(ch:Individual) WHERE ds.production is False + +SELECT DISTINCT ?dataset +WHERE { + + ?dataset n2o:nodeLabel ?nodelabel . # This selects all datasets + + OPTIONAL { + ?dataset n2oc:production ?production . + # n2oc:production is a bit brittle because IRI might be changed (risk!) + } + + FILTER(?production=false || !bound(?production)) . + FILTER(?nodelabel="DataSet") +} \ No newline at end of file diff --git a/config/collectdata/sparql/select_embargoed_images_dev.sparql b/config/collectdata/sparql/select_embargoed_images_dev.sparql new file mode 100644 index 0000000..5a6e134 --- /dev/null +++ b/config/collectdata/sparql/select_embargoed_images_dev.sparql @@ -0,0 +1,28 @@ +PREFIX rdfs: +PREFIX owl: +PREFIX n2o: +PREFIX n2oc: +PREFIX dct: + +#Delete all ds:DataSet where ds.production is False +#Delete all i:Individual where (ds)-[:has_source]-(i:Individual)<-[:depicts]-(ch:Individual) WHERE ds.production is False + +SELECT DISTINCT ?image +WHERE { + + ?dataset n2o:nodeLabel ?nodelabel . # This selects all datasets + + OPTIONAL { + ?dataset n2oc:production ?production . + # n2oc:production is a bit brittle because IRI might be changed (risk!) + } + + OPTIONAL { + ?dataset n2oc:staging ?staged . + } + + ?image dct:source ?dataset . + + FILTER( (?production=false || !bound(?production)) && (?staged=false || !bound(?staged)) ) . + FILTER(?nodelabel="DataSet") +} \ No newline at end of file diff --git a/config/collectdata/sparql/select_embargoed_images_prod.sparql b/config/collectdata/sparql/select_embargoed_images_prod.sparql new file mode 100644 index 0000000..705d17e --- /dev/null +++ b/config/collectdata/sparql/select_embargoed_images_prod.sparql @@ -0,0 +1,24 @@ +PREFIX rdfs: +PREFIX owl: +PREFIX n2o: +PREFIX n2oc: +PREFIX dct: + +#Delete all ds:DataSet where ds.production is False +#Delete all i:Individual where (ds)-[:has_source]-(i:Individual)<-[:depicts]-(ch:Individual) WHERE ds.production is False + +SELECT DISTINCT ?image +WHERE { + + ?dataset n2o:nodeLabel ?nodelabel . # This selects all datasets + + OPTIONAL { + ?dataset n2oc:production ?production . + # n2oc:production is a bit brittle because IRI might be changed (risk!) + } + + ?image dct:source ?dataset . + + FILTER(?production=false || !bound(?production)) . + FILTER(?nodelabel="DataSet") +} \ No newline at end of file diff --git a/config/collectdata/sparql/terms.sparql b/config/collectdata/sparql/terms.sparql new file mode 100644 index 0000000..e65a105 --- /dev/null +++ b/config/collectdata/sparql/terms.sparql @@ -0,0 +1,7 @@ +SELECT DISTINCT ?term +WHERE { + { ?s1 ?p1 ?term . } + UNION + { ?term ?p2 ?o2 . } + FILTER(isIRI(?term)) +} \ No newline at end of file diff --git a/config/collectdata/vfb_fullontologies.txt b/config/collectdata/vfb_fullontologies.txt new file mode 100644 index 0000000..9c7f1a7 --- /dev/null +++ b/config/collectdata/vfb_fullontologies.txt @@ -0,0 +1,8 @@ +http://purl.obolibrary.org/obo/dpo.owl +http://purl.obolibrary.org/obo/fbbt.owl +http://purl.obolibrary.org/obo/fbdv.owl +http://purl.obolibrary.org/obo/fbbt/vfb/vfb_ext.owl +http://purl.obolibrary.org/obo/ro.owl +http://purl.obolibrary.org/obo/geno.owl +http://virtualflybrain.org/data/VFB/OWL/neuprint_JRC_Hemibrain_1point1_connectomics.owl.gz +https://raw.githubusercontent.com/VirtualFlyBrain/VFB_connectomics_import/main/FAFB_CATMAID_import.owl diff --git a/config/collectdata/vfb_slices.txt b/config/collectdata/vfb_slices.txt new file mode 100644 index 0000000..c06baf6 --- /dev/null +++ b/config/collectdata/vfb_slices.txt @@ -0,0 +1,3 @@ +http://purl.obolibrary.org/obo/so.owl +http://purl.obolibrary.org/obo/go.owl +http://purl.obolibrary.org/obo/fbbi.owl \ No newline at end of file diff --git a/config/dumps/config.env b/config/dumps/config.env new file mode 100644 index 0000000..8c51ee1 --- /dev/null +++ b/config/dumps/config.env @@ -0,0 +1,5 @@ +SPARQL_ENDPOINT=http://ts.p2.virtualflybrain.org/rdf4j-server/repositories/vfb +VFB_CONFIG=https://raw.githubusercontent.com/VirtualFlyBrain/vfb-prod/master/neo4j2owl-config.yaml +DUMPS_SOLR="all preferred_roots deprecation_label image_names has_image" +DUMPS_PDB="all preferred_roots deprecation_label has_image has_neuron_connectivity has_region_connectivity subclass_of_intersection" +DUMPS_OWLERY="all" diff --git a/config/dumps/sparql/_dump_all.sparql b/config/dumps/sparql/_dump_all.sparql new file mode 100644 index 0000000..b92b0bc --- /dev/null +++ b/config/dumps/sparql/_dump_all.sparql @@ -0,0 +1,6 @@ +PREFIX : + +CONSTRUCT { ?x ?p ?y . } + +WHERE {?x ?p ?y .} + diff --git a/config/dumps/sparql/construct_all.sparql b/config/dumps/sparql/construct_all.sparql new file mode 100644 index 0000000..b92b0bc --- /dev/null +++ b/config/dumps/sparql/construct_all.sparql @@ -0,0 +1,6 @@ +PREFIX : + +CONSTRUCT { ?x ?p ?y . } + +WHERE {?x ?p ?y .} + diff --git a/config/dumps/sparql/construct_deprecation_label.sparql b/config/dumps/sparql/construct_deprecation_label.sparql new file mode 100644 index 0000000..2fff814 --- /dev/null +++ b/config/dumps/sparql/construct_deprecation_label.sparql @@ -0,0 +1,7 @@ +CONSTRUCT { + ?x "Deprecated" . +} +WHERE { + ?x true . + FILTER(isIRI(?x)) +} diff --git a/config/dumps/sparql/construct_has_image.sparql b/config/dumps/sparql/construct_has_image.sparql new file mode 100644 index 0000000..5352c28 --- /dev/null +++ b/config/dumps/sparql/construct_has_image.sparql @@ -0,0 +1,14 @@ +PREFIX owl: +PREFIX rdf: +PREFIX foaf: + +CONSTRUCT { + ?x "has_image" . +} +WHERE { + ?y foaf:depicts ?x . + ?y ?z . + ?x rdf:type owl:NamedIndividual . + ?y rdf:type owl:NamedIndividual . + ?z rdf:type owl:NamedIndividual . +} diff --git a/config/dumps/sparql/construct_has_neuron_connectivity.sparql b/config/dumps/sparql/construct_has_neuron_connectivity.sparql new file mode 100644 index 0000000..b41c48b --- /dev/null +++ b/config/dumps/sparql/construct_has_neuron_connectivity.sparql @@ -0,0 +1,9 @@ +PREFIX owl: +PREFIX rdf: + +CONSTRUCT { + ?x "has_neuron_connectivity" . +} +WHERE { + ?x | ?y . +} diff --git a/config/dumps/sparql/construct_has_region_connectivity.sparql b/config/dumps/sparql/construct_has_region_connectivity.sparql new file mode 100644 index 0000000..488f2dd --- /dev/null +++ b/config/dumps/sparql/construct_has_region_connectivity.sparql @@ -0,0 +1,10 @@ +PREFIX owl: +PREFIX rdf: + +CONSTRUCT { + ?x "has_region_connectivity" . +} +WHERE { + ?x | ?y . + ?x rdf:type owl:NamedIndividual . +} diff --git a/config/dumps/sparql/construct_image_names.sparql b/config/dumps/sparql/construct_image_names.sparql new file mode 100644 index 0000000..0a48992 --- /dev/null +++ b/config/dumps/sparql/construct_image_names.sparql @@ -0,0 +1,20 @@ +PREFIX rdfs: +PREFIX owl: +PREFIX n2o: +PREFIX n2oc: +PREFIX dct: + +CONSTRUCT { +?anatomical_indiv ?filename . +?anatomical_indiv ?thumbnail . +} +WHERE { + ?channel ?anatomical_indiv . + ?anno owl:annotatedSource ?channel . + ?anno owl:annotatedProperty . + ?anno n2oc:folder ?folder . + OPTIONAL { + ?anno n2oc:filename ?filename . + } + BIND(CONCAT(STR( ?folder ),"thumbnail.png") AS ?thumbnail ) . +} \ No newline at end of file diff --git a/config/dumps/sparql/construct_non_literal.sparql b/config/dumps/sparql/construct_non_literal.sparql new file mode 100644 index 0000000..248917e --- /dev/null +++ b/config/dumps/sparql/construct_non_literal.sparql @@ -0,0 +1,11 @@ +PREFIX : +PREFIX rdfs: + +CONSTRUCT { ?x ?p ?y . } + +WHERE { + ?x ?p ?y . + FILTER(!isLiteral(?y)) +} + +# LIMIT 5000 \ No newline at end of file diff --git a/config/dumps/sparql/construct_preferred_roots.sparql b/config/dumps/sparql/construct_preferred_roots.sparql new file mode 100644 index 0000000..68b58cd --- /dev/null +++ b/config/dumps/sparql/construct_preferred_roots.sparql @@ -0,0 +1,7 @@ +CONSTRUCT { + # ?y true . + ?y "preferred_root" . +} +WHERE { + ?x ?y . +} \ No newline at end of file diff --git a/config/dumps/sparql/construct_test.sparql b/config/dumps/sparql/construct_test.sparql new file mode 100644 index 0000000..e54e758 --- /dev/null +++ b/config/dumps/sparql/construct_test.sparql @@ -0,0 +1,23 @@ +PREFIX : +PREFIX rdfs: +PREFIX owl: + +CONSTRUCT { + ?x a owl:Class . + ?x owl:equivalentClass ?y . + ?x rdfs:label ?l . + ?x rdfs:comment ?c . + ?y rdfs:label ?l2 . +} + +WHERE { + ?x a owl:Class . + ?x rdfs:subClassOf ?y . + ?x rdfs:label ?l . + ?x rdfs:comment ?c . + ?y rdfs:label ?l2 . + FILTER(isIRI(?x)) + FILTER(isIRI(?y)) +} + +LIMIT 10000 \ No newline at end of file diff --git a/config/dumps/sparql/construct_test2.sparql b/config/dumps/sparql/construct_test2.sparql new file mode 100644 index 0000000..dc48f88 --- /dev/null +++ b/config/dumps/sparql/construct_test2.sparql @@ -0,0 +1,16 @@ +PREFIX : +PREFIX rdfs: +PREFIX owl: + +CONSTRUCT { + ?x ?p ?y . +} + +WHERE { + ?x ?p ?y . + FILTER(isIRI(?x)) + FILTER(isIRI(?p)) + FILTER(isIRI(?y)) +} + +LIMIT 10000 \ No newline at end of file diff --git a/config/knowledgebase/config.env b/config/knowledgebase/config.env new file mode 100644 index 0000000..711f80e --- /dev/null +++ b/config/knowledgebase/config.env @@ -0,0 +1 @@ +KB_DATA=http://data.virtualflybrain.org/archive/VFB-KB.tar.gz diff --git a/config/owlery/application.conf b/config/owlery/application.conf new file mode 100644 index 0000000..cfb2f6a --- /dev/null +++ b/config/owlery/application.conf @@ -0,0 +1,20 @@ +akka { + loglevel = INFO +} + +akka.http.server { + request-timeout = infinite + idle-timeout = infinite +} + +owlery { + port = 8080 + host = localhost + kbs = [ + { + name = vfb + location = "http://www.virtualflybrain.org/owl/vfb.owl" + reasoner = elk + } + ] +} diff --git a/config/owlery/config.env b/config/owlery/config.env new file mode 100644 index 0000000..e2e2214 --- /dev/null +++ b/config/owlery/config.env @@ -0,0 +1 @@ +OWLURL=https://github.com/VirtualFlyBrain/VFB_owl/blob/Current/src/owl/vfb.owl.gz?raw=true diff --git a/config/update-prod/config.env b/config/update-prod/config.env new file mode 100644 index 0000000..d70a575 --- /dev/null +++ b/config/update-prod/config.env @@ -0,0 +1,5 @@ +server=http://192.168.0.1:7473 +user=neo4j +password=password +IMPORT=http://192.168.0.1:8080/rdf4j-server/repositories/vfb?query=PREFIX+rdfs%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2000%2F01%2Frdf-schema%23%3E%0APREFIX+owl%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F2002%2F07%2Fowl%23%3E%0APREFIX+rdf%3A+%3Chttp%3A%2F%2Fwww.w3.org%2F1999%2F02%2F22-rdf-syntax-ns%23%3E%0APREFIX+obo%3A+%3Chttp%3A%2F%2Fpurl.obolibrary.org%2Fobo%2F%3E%0ACONSTRUCT+%7B%3Fx+%3Fy+%3Fz%7D%0AWHERE+%7B%0A%09%3Fx+%3Fy+%3Fz+.%0A%7D%0A +IMPORT_CONFIG=https://raw.githubusercontent.com/VirtualFlyBrain/vfb-prod/master/neo4j2owl-config.yaml diff --git a/config/updatetriplestore/config.env b/config/updatetriplestore/config.env new file mode 100644 index 0000000..6c5188b --- /dev/null +++ b/config/updatetriplestore/config.env @@ -0,0 +1 @@ +SERVER=http://192.168.0.1:8080 diff --git a/config/validatekb/config.env b/config/validatekb/config.env new file mode 100644 index 0000000..981c676 --- /dev/null +++ b/config/validatekb/config.env @@ -0,0 +1,3 @@ +KBserver=http://192.168.0.1:7474 +KBuser=neo4j +KBpassword=password diff --git a/docker-compose-local.yml b/docker-compose-local.yml new file mode 100644 index 0000000..85f0dc4 --- /dev/null +++ b/docker-compose-local.yml @@ -0,0 +1,206 @@ +version: '3.3' +# networks: +# dockernet: +# external: +# name: dockernet +#networks: +# internal: +# driver: bridge +# Dependencies: + +# vfb-kb +## vfb-kb2kb +### vfb-validatekb +#### vfb-collectdata + +# vfb-triplestore +## vfb-updatetriplestore + +# vfb-dumps +# vfb-owlery + +# vfb-prod +## vfb-updateprod + + +services: + vfb-kb: + # image: virtualflybrain/docker-neo4j-knowledgebase:neo2owl + # image: virtualflybrain/docker-vfb-neo4j:enterprise + image: virtualflybrain/vfb-prod:kb + build: + context: ../docker-neo4j-knowledgebase + dockerfile: Dockerfile + args: + - CONF_REPO=${CONF_REPO} + - CONF_BRANCH=${CONF_BRANCH} + # deploy: + # replicas: 1 + # resources: + # limits: + # cpus: "1" + # memory: 6G + # restart_policy: + # condition: on-failure + environment: + - NEO4J_AUTH=neo4j/neo + - NEO4J_dbms_read__only=true + - NEO4J_dbms_memory_heap_maxSize=15G + - NEO4J_dbms_memory_heap_initial__size=1G + #- NEO4J_dbms_memory_pagecache_size=20G + ports: + - 7474:7474 + - 7687:7687 + deploy: + resources: + limits: + memory: 25G + reservations: + memory: 1G + vfb-triplestore: + image: matentzn/vfb-pipeline-triplestore + environment: + - JVM_PARAMS=-Xms1G -Xmx5G + - RDF4J_DATA=/data + ports: + - 8080:8080 + vfb-prod: + image: matentzn/vfb-prod:latest + build: + context: ../vfb-prod + dockerfile: Dockerfile + environment: + - NEO4J_AUTH=neo4j/neo + - NEO4J_dbms_read__only=false + - NEO4J_dbms_memory_heap_maxSize=3G + - NEO4J_dbms_memory_heap_initial__size=1G + ports: + - 7473:7474 + - 7686:7687 + # networks: + # - dockernet + healthcheck: + test: [ "CMD", "wget", "-O", "-", "http://vfb-prod:7473" ] + interval: 18s + timeout: 12s + retries: 3 + # vfb-kb2kb: + # image: matentzn/vfb-pipeline-kb2kb:latest + # depends_on: + # - vfb-kb + # links: + # - vfb-kb + # environment: + # - KBpassword=neo4j/neo + # - KBserver=http://vfb-kb:7474 +# networks: +# - dockernet + vfb-validatekb: + image: matentzn/vfb-pipeline-validatekb:latest + build: + context: ../vfb-pipeline-validatekb + dockerfile: Dockerfile + args: + - CONF_REPO=${CONF_REPO} + - CONF_BRANCH=${CONF_BRANCH} + depends_on: + - vfb-kb + links: + - vfb-kb + environment: + - KBpassword=neo4j/neo + - KBserver=http://vfb-kb:7474 +# networks: +# - dockernet + vfb-collectdata: + image: matentzn/vfb-pipeline-collectdata:latest + build: + context: ../vfb-pipeline-collectdata + dockerfile: Dockerfile + args: + - CONF_REPO=${CONF_REPO} + - CONF_BRANCH=${CONF_BRANCH} + depends_on: + - vfb-kb + - vfb-validatekb + links: + - vfb-kb + environment: + - KBserver=http://vfb-kb:7474 + volumes: + - vfb_data:/out +# networks: +# - dockernet + vfb-updatetriplestore: + image: matentzn/vfb-pipeline-updatetriplestore:latest + build: + context: ../vfb-pipeline-updatetriplestore + dockerfile: Dockerfile + args: + - CONF_REPO=${CONF_REPO} + - CONF_BRANCH=${CONF_BRANCH} + depends_on: + - vfb-collectdata + - vfb-triplestore + links: + - vfb-triplestore + environment: + - SERVER=http://vfb-triplestore:8080 + volumes: + - vfb_data:/data +# networks: +# - dockernet + vfb-updateprod: + image: matentzn/vfb-pipeline-update-prod:latest + build: + context: ../vfb-pipeline-update-prod + dockerfile: Dockerfile + args: + - CONF_REPO=${CONF_REPO} + - CONF_BRANCH=${CONF_BRANCH} + depends_on: + - vfb-dumps + - vfb-prod + links: + - vfb-dumps + - vfb-prod + environment: + - password=neo4j/neo + - server=http://vfb-prod:7474 +# networks: +# - dockernet + vfb-owlery: + image: phenoscape/owlery + build: + context: ../owlery-vfb + dockerfile: Dockerfile + args: + - CONF_REPO=${CONF_REPO} + - CONF_BRANCH=${CONF_BRANCH} + depends_on: + - vfb-dumps + links: + - vfb-dumps + ports: + - 80:8080 +# networks: +# - dockernet + vfb-dumps: + image: matentzn/vfb-pipeline-dumps:latest + build: + context: ../vfb-pipeline-dumps + dockerfile: Dockerfile + args: + - CONF_REPO=${CONF_REPO} + - CONF_BRANCH=${CONF_BRANCH} + depends_on: + - vfb-updatetriplestore + - vfb-triplestore + links: + - vfb-triplestore + volumes: + - vfb_data:/out + # networks: + # - dockernet +volumes: + vfb_data: diff --git a/docker-compose.yml b/docker-compose.yml index 35f8a63..e4c1c2e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -16,7 +16,7 @@ version: '3.3' # vfb-triplestore ## vfb-updatetriplestore -# vfb-integration-api +# vfb-dumps # vfb-owlery # vfb-prod @@ -25,7 +25,15 @@ version: '3.3' services: vfb-kb: - image: virtualflybrain/docker-neo4j-knowledgebase:neo2owl + # image: virtualflybrain/docker-neo4j-knowledgebase:neo2owl + # image: virtualflybrain/docker-vfb-neo4j:enterprise + image: virtualflybrain/vfb-prod:kb + build: + context: docker-neo4j-knowledgebase + dockerfile: Dockerfile + args: + - CONF_REPO=${CONF_REPO} + - CONF_BRANCH=${CONF_BRANCH} # deploy: # replicas: 1 # resources: @@ -36,26 +44,36 @@ services: # condition: on-failure environment: - NEO4J_AUTH=neo4j/neo - - NEO4J_dbms_read__only=false - - NEO4J_dbms_memory_heap_maxSize=6G - - NEO4J_dbms_memory_heap_initial__size=1G + - NEO4J_dbms_read__only=true + - NEO4J_dbms_memory_heap_maxSize=15G + - NEO4J_dbms_memory_heap_initial__size=1G + #- NEO4J_dbms_memory_pagecache_size=20G ports: - 7474:7474 - 7687:7687 + deploy: + resources: + limits: + memory: 25G + reservations: + memory: 1G vfb-triplestore: image: yyz1989/rdf4j:latest environment: - JVM_PARAMS=-Xms1G -Xmx5G - - RDF4J_DATA=/data + - RDF4J_DATA=/data ports: - 8080:8080 vfb-prod: image: matentzn/vfb-prod:latest + build: + context: vfb-prod + dockerfile: Dockerfile environment: - NEO4J_AUTH=neo4j/neo - NEO4J_dbms_read__only=false - NEO4J_dbms_memory_heap_maxSize=3G - - NEO4J_dbms_memory_heap_initial__size=1G + - NEO4J_dbms_memory_heap_initial__size=1G ports: - 7473:7474 - 7686:7687 @@ -66,21 +84,27 @@ services: interval: 18s timeout: 12s retries: 3 - vfb-kb2kb: - image: matentzn/vfb-pipeline-kb2kb:latest - depends_on: - - vfb-kb - links: - - vfb-kb - environment: - - KBpassword=neo4j/neo - - KBserver=http://vfb-kb:7474 + # vfb-kb2kb: + # image: matentzn/vfb-pipeline-kb2kb:latest + # depends_on: + # - vfb-kb + # links: + # - vfb-kb + # environment: + # - KBpassword=neo4j/neo + # - KBserver=http://vfb-kb:7474 # networks: # - dockernet vfb-validatekb: image: matentzn/vfb-pipeline-validatekb:latest + build: + context: vfb-pipeline-validatekb + dockerfile: Dockerfile + args: + - CONF_REPO=${CONF_REPO} + - CONF_BRANCH=${CONF_BRANCH} depends_on: - - vfb-kb2kb + - vfb-kb links: - vfb-kb environment: @@ -90,12 +114,18 @@ services: # - dockernet vfb-collectdata: image: matentzn/vfb-pipeline-collectdata:latest + build: + context: vfb-pipeline-collectdata + dockerfile: Dockerfile + args: + - CONF_REPO=${CONF_REPO} + - CONF_BRANCH=${CONF_BRANCH} depends_on: + - vfb-kb - vfb-validatekb links: - vfb-kb environment: - - KBpassword=neo4j/neo - KBserver=http://vfb-kb:7474 volumes: - vfb_data:/out @@ -103,6 +133,12 @@ services: # - dockernet vfb-updatetriplestore: image: matentzn/vfb-pipeline-updatetriplestore:latest + build: + context: vfb-pipeline-updatetriplestore + dockerfile: Dockerfile + args: + - CONF_REPO=${CONF_REPO} + - CONF_BRANCH=${CONF_BRANCH} depends_on: - vfb-collectdata - vfb-triplestore @@ -116,42 +152,55 @@ services: # - dockernet vfb-updateprod: image: matentzn/vfb-pipeline-update-prod:latest + build: + context: vfb-pipeline-update-prod + dockerfile: Dockerfile + args: + - CONF_REPO=${CONF_REPO} + - CONF_BRANCH=${CONF_BRANCH} depends_on: - - vfb-integration-api + - vfb-dumps - vfb-prod links: - - vfb-integration-api + - vfb-dumps - vfb-prod environment: - password=neo4j/neo - server=http://vfb-prod:7474 - - IMPORT=http://vfb-integration-api:5000/prod # networks: # - dockernet vfb-owlery: - image: virtualflybrain/owlery-vfb:latest + image: phenoscape/owlery + build: + context: owlery-vfb + dockerfile: Dockerfile + args: + - CONF_REPO=${CONF_REPO} + - CONF_BRANCH=${CONF_BRANCH} depends_on: - - vfb-integration-api + - vfb-dumps links: - - vfb-integration-api + - vfb-dumps ports: - 80:8080 - environment: - - OWLURL=http://vfb-integration-api:5000/owlery # networks: # - dockernet - vfb-integration-api: - image: matentzn/vfb-integration-api:latest + vfb-dumps: + image: matentzn/vfb-pipeline-dumps:latest + build: + context: vfb-pipeline-dumps + dockerfile: Dockerfile + args: + - CONF_REPO=${CONF_REPO} + - CONF_BRANCH=${CONF_BRANCH} depends_on: - vfb-updatetriplestore - vfb-triplestore links: - vfb-triplestore - ports: - - 5000:5000 - environment: - - SPARQLENDPOINT=http://vfb-triplestore:8080/rdf4j-server/repositories/vfb?query= + volumes: + - vfb_data:/out # networks: # - dockernet volumes: - vfb_data: \ No newline at end of file + vfb_data: