Skip to content

Commit 52ec0fc

Browse files
committed
ZOO-Project-DRU Helm chart and Application version bump
Add dask-gateway as optional subchart dependency (dask.enabled) Handle CloudEvents following current pubsub spec relying on the configured sink (k_sink) if set Add stageout-file.yaml CWL wrapper to stage files Use the ZOO-Project-DRU image tagged dru-b567b1dff96b9f720215aaa4da69dbd3a7ffe031 (using custom versions of Python modules) Upgrade calrissian Docker image from terradue/calrissian:0.12.0 to upstream calrissian/calrissian:latest
1 parent 47a9c47 commit 52ec0fc

7 files changed

Lines changed: 393 additions & 37 deletions

File tree

skaffold.yaml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,28 @@ profiles:
165165
port: 9001
166166
localPort: 9001
167167

168+
# Profile: dask
169+
# Activates dask-gateway deployment as a subchart of ZOO-Project-DRU.
170+
# The dask-gateway Traefik proxy will be available at:
171+
# http://traefik-zoo-project-dru-dask-gateway.zoo.svc.cluster.local:80
172+
# Combine with other profiles, e.g.: skaffold dev -p dask
173+
- name: dask
174+
patches:
175+
- op: add
176+
path: /deploy/helm/releases/0/setValues
177+
value:
178+
dask.enabled: "true"
179+
dask-gateway.traefik.service.type: "ClusterIP"
180+
# Use setValueTemplates to allow Skaffold image tag substitution for a custom worker image.
181+
# Override gateway.backend.image.name and gateway.backend.image.tag here if needed.
182+
# Example for eoap custom worker:
183+
# - op: add
184+
# path: /deploy/helm/releases/0/setValueTemplates
185+
# value:
186+
# dask-gateway.gateway.backend.image.name: ghcr.io/eoap/dev-platform-eoap/dask-gateway-worker:0.1.0
187+
# dask-gateway.gateway.backend.image.tag: "{{.IMAGE_TAG_ghcr_io_eoap_dev_platform_eoap_dask_gateway_worker}}"
188+
# dask-gateway.gateway.backend.imagePullSecrets[0].name: kaniko-secret
189+
168190
portForward:
169191

170192
# ZOO-Project services

zoo-project-dru/Chart.yaml

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,13 @@ type: application
2323
# This is the chart version. This version number should be incremented each time you make changes
2424
# to the chart and its templates, including the app version.
2525
# Versions are expected to follow Semantic Versioning (https://semver.org/)
26-
version: 0.9.2
26+
version: 0.9.3
2727

2828
# This is the version number of the application being deployed. This version number should be
2929
# incremented each time you make changes to the application. Versions are not expected to
3030
# follow Semantic Versioning. They should reflect the version the application is using.
3131
# It is recommended to use it with quotes.
32-
appVersion: "0.2.40"
32+
appVersion: "0.2.41"
3333

3434
keywords:
3535
- workflows
@@ -70,13 +70,23 @@ dependencies:
7070
repository: https://prometheus-community.github.io/helm-charts
7171
condition: monitoring.enabled
7272

73+
- name: dask-gateway
74+
version: "2024.1.0"
75+
repository: https://helm.dask.org/
76+
condition: dask.enabled
77+
7378
annotations:
7479
artifacthub.io/changes: |
75-
- kind: fixed
76-
description: "Fix issue with the WES support using zoo-wes-runner < 0.2.1"
77-
links:
78-
- name: GitHub Commit
79-
url: https://github.com/ZOO-Project/zoo-wes-runner/commit/c3298222a85e184d953e25b51e2c9e9a633e920d
80+
- kind: added
81+
description: "Add dask-gateway as optional subchart dependency (dask.enabled)"
82+
- kind: added
83+
description: "Handle CloudEvents following current pubsub spec relying on the configured sink (k_sink) if set"
84+
- kind: added
85+
description: "Add stageout-file.yaml CWL wrapper to stage files"
86+
- kind: changed
87+
description: "Use the ZOO-Project-DRU image tagged dru-b567b1dff96b9f720215aaa4da69dbd3a7ffe031 (using custom versions of Python modules)"
88+
- kind: added
89+
description: "Upgrade calrissian Docker image from terradue/calrissian:0.12.0 to upstream calrissian/calrissian:latest"
8090
8191
artifacthub.io/license: Apache-2.0
8292

zoo-project-dru/files/bin/publish.py

Lines changed: 45 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,35 +20,36 @@
2020
#print(data)
2121

2222
from urllib import parse
23+
params=parse.parse_qs(os.environ["QUERY_STRING"])
2324

24-
# Ensure that only the zoofpm pod is allowed to invoke the publish CGI script
25-
config = configparser.ConfigParser()
26-
config.read("/usr/lib/cgi-bin/main.cfg")
27-
driver = ogr.GetDriverByName("PostgreSQL")
28-
connection_string=""
29-
for key in config["database"]:
30-
if key != "type" and key != "schema":
31-
connection_string += f"{key}={config['database'][key]} "
32-
ds=driver.Open(f"PG:{connection_string}")
33-
layer=ds.ExecuteSQL("SELECT host from servers")
34-
if layer is not None and layer.GetFeatureCount() > 0:
35-
hasAddress = False
36-
for i in range(layer.GetFeatureCount()):
37-
host = layer.GetFeature(i).GetFieldAsString("host")
38-
if os.environ["REMOTE_ADDR"] == host:
39-
print("You are allowed to publish this job", file=sys.stderr)
40-
hasAddress = True
41-
break
42-
if not(hasAddress):
25+
if "jobid" in params:
26+
# Ensure that only the zoofpm pod is allowed to invoke the publish CGI script
27+
config = configparser.ConfigParser()
28+
config.read("/usr/lib/cgi-bin/main.cfg")
29+
driver = ogr.GetDriverByName("PostgreSQL")
30+
connection_string=""
31+
for key in config["database"]:
32+
if key != "type" and key != "schema":
33+
connection_string += f"{key}={config['database'][key]} "
34+
ds=driver.Open(f"PG:{connection_string}")
35+
layer=ds.ExecuteSQL("SELECT host from servers")
36+
if layer is not None and layer.GetFeatureCount() > 0:
37+
hasAddress = False
38+
for i in range(layer.GetFeatureCount()):
39+
host = layer.GetFeature(i).GetFieldAsString("host")
40+
if os.environ["REMOTE_ADDR"] == host:
41+
print("You are allowed to publish this job", file=sys.stderr)
42+
hasAddress = True
43+
break
44+
if not(hasAddress):
45+
print("You are not allowed to publish this job", file=sys.stderr)
46+
sys.exit(0)
47+
else:
4348
print("You are not allowed to publish this job", file=sys.stderr)
4449
sys.exit(0)
45-
else:
46-
print("You are not allowed to publish this job", file=sys.stderr)
47-
sys.exit(0)
4850

4951
{{- if .Values.redis.enabled }}
5052
try:
51-
params=parse.parse_qs(os.environ["QUERY_STRING"])
5253
r=None
5354
if "ZOO_REDIS_HOST" in os.environ:
5455
r = redis.Redis(host=os.environ["ZOO_REDIS_HOST"], port=6379, db=0)
@@ -63,23 +64,41 @@
6364
{{- end }}
6465

6566
{{- if .Values.notifications.enabled }}
67+
ce_name_prefix="org.ogc.api.process."
68+
ce_name_job_prefix=ce_name_prefix + "job."
6669
k_sink = os.environ.get("K_SINK", None)
6770
k_ce_overrides_str = os.environ.get("K_CE_OVERRIDES", None)
6871
configp = configparser.ConfigParser()
6972
configp.read("/usr/lib/cgi-bin/oas.cfg")
7073
try:
7174
if k_sink is not None:
72-
params=parse.parse_qs(os.environ["QUERY_STRING"])
7375
#print(params,file=sys.stderr)
7476
#print(f"k_sink: {k_sink}",file=sys.stderr)
7577
# TODO: detect the userid and add it to the URLs
7678
data = json.loads(data)
7779
host_name = configp["openapi"]["rootUrl"]
7880
root_path = configp["openapi"]["rootPath"]
81+
82+
if "operation" in params:
83+
operation = params["operation"][0]
84+
attributes = {
85+
"type": ce_name_job_prefix + operation,
86+
"source": f"{host_name}/{root_path}/job/{job_id}",
87+
}
88+
if operation != "undeploy":
89+
attributes["dataschema"] = "https://schemas.opengis.net/ogcapi/processes/part1/1.0/openapi/schemas/link.yaml"
90+
attributes["datacontenttype"] = "application/json"
91+
else:
92+
attributes["datacontenttype"] = "text/plain"
93+
event = CloudEvent(attributes, data)
94+
headers, body = to_binary(event)
95+
requests.post(k_sink, data=body, headers=headers)
96+
sys.exit(0)
97+
7998
try:
8099
job_id = data["id"]
81100
attributes = {
82-
"type": "org.eoepca.ogc-api-processes-notification.job.update",
101+
"type": ce_name_job_prefix + "update",
83102
"source": f"{host_name}/{root_path}/job/{job_id}",
84103
"datacontenttype": "application/json",
85104
"dataschema": "https://schemas.opengis.net/ogcapi/processes/part1/1.0/openapi/schemas/statusInfo.yaml",
@@ -89,7 +108,7 @@
89108
data = data["stac"]["value"]
90109
job_id = data["id"]
91110
attributes = {
92-
"type": "org.eoepca.ogc-api-processes-notification.job.result",
111+
"type": ce_name_job_prefix + "result",
93112
"source": f"{host_name}/{root_path}/job/{job_id}/results",
94113
"datacontenttype": "application/json",
95114
#"dataschema": "https://schemas.opengis.net/ogcapi/processes/part1/1.0/openapi/schemas/results.yaml",
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
cwlVersion: v1.2
2+
3+
class: CommandLineTool
4+
id: stageout-file
5+
6+
doc: "Stage-out a file to S3 storage"
7+
8+
inputs:
9+
file:
10+
doc: "The file to stage out"
11+
label: "File to stage out"
12+
type: File
13+
14+
STAGEOUT_OUTPUT:
15+
doc: "The S3 output path (e.g., s3://bucket-name)"
16+
label: "S3 output path"
17+
type: string
18+
19+
process:
20+
doc: "The process ID"
21+
label: "Process ID"
22+
type: string
23+
24+
STAGEOUT_AWS_ACCESS_KEY_ID:
25+
doc: "AWS access key ID"
26+
label: "AWS access key ID"
27+
type: string
28+
29+
STAGEOUT_AWS_SECRET_ACCESS_KEY:
30+
doc: "AWS secret access key"
31+
label: "AWS secret access key"
32+
type: string
33+
34+
STAGEOUT_AWS_REGION:
35+
doc: "AWS region"
36+
label: "AWS region"
37+
type: string
38+
39+
STAGEOUT_AWS_SERVICEURL:
40+
doc: "AWS service URL (for S3-compatible storage)"
41+
label: "AWS service URL"
42+
type: string
43+
44+
outputs:
45+
s3_file_output:
46+
doc: "The S3 URI of the staged-out file"
47+
label: "S3 file URI"
48+
type: https://raw.githubusercontent.com/eoap/schemas/main/string_format.yaml#URI
49+
outputBinding:
50+
glob: file-uri.txt
51+
loadContents: true
52+
outputEval: |
53+
${
54+
return { "value": self[0].contents.trim(), "type": "https://raw.githubusercontent.com/eoap/schemas/main/string_format.yaml#URI" };
55+
}
56+
57+
stdout: file-uri.txt
58+
59+
baseCommand:
60+
- python
61+
- stage.py
62+
63+
arguments:
64+
- $(inputs.file.path)
65+
- $(inputs.STAGEOUT_OUTPUT)
66+
- ${
67+
var firstPart = (Math.random() * 46656) | 0;
68+
var secondPart = (Math.random() * 46656) | 0;
69+
firstPart = ("000" + firstPart.toString(36)).slice(-3);
70+
secondPart = ("000" + secondPart.toString(36)).slice(-3);
71+
return inputs.process + "-" + firstPart + secondPart;
72+
}
73+
74+
requirements:
75+
NetworkAccess:
76+
networkAccess: true
77+
78+
SchemaDefRequirement:
79+
types:
80+
- $import: https://raw.githubusercontent.com/eoap/schemas/main/string_format.yaml
81+
82+
DockerRequirement:
83+
dockerPull: ghcr.io/eoap/mastering-app-package/stage:1.0.0
84+
85+
InlineJavascriptRequirement: {}
86+
87+
EnvVarRequirement:
88+
envDef:
89+
STAGEOUT_AWS_ACCESS_KEY_ID: $(inputs.STAGEOUT_AWS_ACCESS_KEY_ID)
90+
STAGEOUT_AWS_SECRET_ACCESS_KEY: $(inputs.STAGEOUT_AWS_SECRET_ACCESS_KEY)
91+
STAGEOUT_AWS_REGION: $(inputs.STAGEOUT_AWS_REGION)
92+
STAGEOUT_AWS_SERVICEURL: $(inputs.STAGEOUT_AWS_SERVICEURL)
93+
94+
ResourceRequirement: {}
95+
96+
InitialWorkDirRequirement:
97+
listing:
98+
- entryname: stage.py
99+
entry: |-
100+
import os
101+
import sys
102+
import boto3
103+
import botocore
104+
from urllib.parse import urlparse
105+
106+
file_path = sys.argv[1]
107+
bucket_uri = sys.argv[2]
108+
subfolder = sys.argv[3]
109+
if not bucket_uri.startswith("s3://"):
110+
bucket_uri = "s3://" + bucket_uri
111+
112+
print(f"file_path: {file_path}", file=sys.stderr)
113+
print(f"bucket_uri: {bucket_uri}", file=sys.stderr)
114+
print(f"subfolder: {subfolder}", file=sys.stderr)
115+
116+
aws_access_key_id = os.environ["STAGEOUT_AWS_ACCESS_KEY_ID"]
117+
aws_secret_access_key = os.environ["STAGEOUT_AWS_SECRET_ACCESS_KEY"]
118+
region_name = os.environ["STAGEOUT_AWS_REGION"]
119+
endpoint_url = os.environ["STAGEOUT_AWS_SERVICEURL"]
120+
121+
# Parse bucket URI (e.g., s3://bucket-name or s3://bucket-name/path)
122+
parsed = urlparse(bucket_uri)
123+
bucket = parsed.netloc
124+
base_path = parsed.path.lstrip("/") if parsed.path else ""
125+
126+
# Create S3 client
127+
client = boto3.client(
128+
"s3",
129+
aws_access_key_id=aws_access_key_id,
130+
aws_secret_access_key=aws_secret_access_key,
131+
endpoint_url=endpoint_url,
132+
region_name=region_name,
133+
)
134+
135+
# Get file name from path
136+
file_name = os.path.basename(file_path)
137+
138+
print(f"Base path {base_path}", file=sys.stderr)
139+
140+
# Construct S3 key
141+
if base_path:
142+
s3_key = os.path.normpath(f"{base_path}/{subfolder}/{file_name}")
143+
else:
144+
s3_key = os.path.normpath(f"{subfolder}/{file_name}")
145+
146+
# Remove leading slashes from s3_key
147+
s3_key = s3_key.lstrip("/")
148+
bucket = bucket.lstrip("/")
149+
print(f"Target s3://{bucket}/", file=sys.stderr)
150+
print(f"Target s3://{bucket}/{s3_key}", file=sys.stderr)
151+
152+
153+
# Upload file to S3
154+
print(f"Uploading {file_path} to s3://{bucket}/{s3_key}", file=sys.stderr)
155+
client.upload_file(file_path, bucket, s3_key)
156+
157+
# Construct and return the S3 URI
158+
s3_uri = f"s3://{bucket}/{s3_key}"
159+
print(f"File staged out to: {s3_uri}", file=sys.stderr)
160+
print(s3_uri)

zoo-project-dru/templates/dp-zoofpm.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,11 @@ spec:
201201
value: {{ $v }}
202202
{{- end }}
203203

204+
{{- if .Values.dask.enabled }}
205+
- name: DASK_GATEWAY_URL
206+
value: {{ if .Values.dask.gatewayUrl }}{{ .Values.dask.gatewayUrl }}{{ else }}{{ printf "http://traefik-%s-dask-gateway.%s.svc.cluster.local:80" .Release.Name .Release.Namespace }}{{ end }}
207+
{{- end }}
208+
204209
volumeMounts:
205210
- name: zoo-project-config
206211
mountPath: /usr/lib/cgi-bin/main.cfg
@@ -250,6 +255,10 @@ spec:
250255
- name: cwlwrapper-config
251256
mountPath: /assets/stagein-file.yaml
252257
subPath: stagein-file.yaml
258+
259+
- name: cwlwrapper-config
260+
mountPath: /assets/stageout-file.yaml
261+
subPath: stageout-file.yaml
253262
{{- end }}
254263

255264
- name: ades-processing-services

0 commit comments

Comments
 (0)