Skip to content

Commit 3282f63

Browse files
authored
Merge pull request #99 from roocs/cli
Added features for using `daops` with EOEPCA: - command-line interface and tests: `daops/cli.py` and `tests/test_cli.py` - Docker file: `Dockerfile` - CWL file: `app-package.cwl`
2 parents 0271f08 + a4d3585 commit 3282f63

17 files changed

Lines changed: 1029 additions & 28 deletions

File tree

.github/workflows/main.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ jobs:
2525
# pip install flake8 black pytest
2626
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
2727
if [ -f requirements_dev.txt ]; then pip install -r requirements_dev.txt; fi
28+
# pip install the package, command-line unit tests work
29+
pip install --no-deps -e .
2830
# - name: Lint with flake8
2931
# run: flake8 daops tests
3032
# if: matrix.python-version == 3.8

Dockerfile

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
##=================================================================================
2+
##
3+
## EXAMPLE USAGE
4+
##
5+
## $ docker build -t daops .
6+
## $ mkdir ~/container-outputs
7+
## $ docker run -it \
8+
## --mount type=bind,source=$HOME/container-outputs,target=/outputs \
9+
## daops
10+
##
11+
## # id=cmip5.output1.INM.inmcm4.rcp45.mon.ocean.Omon.r1i1p1.latest.zostoga
12+
## # path=/root/.mini-esgf-data/test_data/badc/cmip5/data/$(echo $id | tr / .)
13+
## # ncdump -h $path/*.nc | grep UNLIMITED
14+
## time = UNLIMITED ; // (1140 currently)
15+
## # rm /outputs/*.nc
16+
## # daops subset --output-dir /outputs --time=2010-1-1/2015-1-1 $id
17+
## # ncdump -h /outputs/*.nc | grep UNLIMITED
18+
## time = UNLIMITED ; // (60 currently)
19+
## # exit
20+
##
21+
## $ ls ~/container-outputs/
22+
## zostoga_mon_inmcm4_rcp45_r1i1p1_20100116-20141216.nc
23+
##
24+
##=================================================================================
25+
26+
FROM ubuntu:20.04
27+
28+
SHELL ["/bin/bash", "-c"]
29+
30+
ENV BASH_ENV=~/.bashrc \
31+
MAMBA_ROOT_PREFIX=/srv/conda \
32+
PATH=$PATH:/srv/conda/envs/daops/bin
33+
34+
35+
# ==== Install apt-packages and micromamba ====
36+
37+
RUN apt-get update && \
38+
apt-get install -y ca-certificates ttf-dejavu file wget bash bzip2 git && \
39+
wget -qO- https://micromamba.snakepit.net/api/micromamba/linux-64/latest | tar -xvj bin/micromamba --strip-components=1 && \
40+
./micromamba shell init -s bash -p ~/micromamba && \
41+
apt-get clean autoremove --yes && \
42+
cp ./micromamba /usr/bin && \
43+
rm -fr /srv/conda/pkgs
44+
45+
46+
# ==== Set up conda environment from yml file ====
47+
48+
ARG tmp_env=/tmp/environment.yml
49+
ADD environment.yml $tmp_env
50+
RUN micromamba create -f $tmp_env && \
51+
rm -fr $tmp_env /srv/conda/pkgs
52+
53+
54+
# ==== Clone the data repo ====
55+
56+
ARG data_dir=/root/.mini-esgf-data
57+
ARG data_repo_url=https://github.com/roocs/mini-esgf-data
58+
ARG data_repo_branch=master
59+
RUN git clone $data_repo_url $data_dir && \
60+
cd $data_dir && \
61+
git checkout $data_repo_branch && \
62+
rm -fr .git
63+
64+
65+
# ==== Set up the roocs.ini file with paths pointing to the data repo ====
66+
# ==== and ensure that ROOCS_CONFIG environment variable points to it ====
67+
68+
ARG config_file=/root/roocs.ini
69+
ARG config_tmpl=/tmp/roocs.ini.tmpl
70+
COPY roocs.ini.tmpl $config_tmpl
71+
RUN sed "s,DATA_DIR,$data_dir,g" $config_tmpl > $config_file && \
72+
rm $config_tmpl && \
73+
echo "export ROOCS_CONFIG=$config_file" >> /root/.bashrc
74+
75+
76+
# ==== Install the daops app ====
77+
78+
ARG tmp_install_dir=/tmp/daops-install
79+
RUN mkdir $tmp_install_dir
80+
COPY . $tmp_install_dir
81+
RUN cd $tmp_install_dir && \
82+
/srv/conda/envs/daops/bin/python setup.py install && \
83+
rm -fr $tmp_install_dir && \
84+
echo "export USE_PYGEOS=0" >> /root/.bashrc
85+
86+
# ==== Create a directory that we can bind-mount ====
87+
RUN mkdir /outputs
88+
89+
90+
# ==== Some tidying up (NB further apt-install not possible after this) ====
91+
92+
RUN rm -fr /var/lib/{apt,dpkg,cache,log}

app-package.cwl

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
$graph:
2+
3+
- class: Workflow
4+
doc: Runs daops subsetting process
5+
id: daops
6+
requirements:
7+
- class: ScatterFeatureRequirement
8+
inputs:
9+
area:
10+
doc: Area
11+
label: Area
12+
type: string[]
13+
time:
14+
doc: Time
15+
label: Time
16+
type: string[]
17+
time_components:
18+
doc: Time Components
19+
label: Time Components
20+
type: string[]
21+
level:
22+
doc: Level
23+
label: Level
24+
type: string[]
25+
output_format:
26+
doc: Output Format
27+
label: Output Format
28+
type: string[]
29+
file_namer:
30+
doc: File Namer
31+
label: File Namer
32+
type: string[]
33+
output_dir:
34+
doc: Output dir
35+
label: Output dir
36+
type: string[]
37+
collection:
38+
doc: Collection
39+
label: Collection
40+
type: string[]
41+
label: data-aware operations (daops)
42+
outputs:
43+
- id: wf_outputs
44+
outputSource:
45+
- step_1/results
46+
type:
47+
Directory[]
48+
49+
steps:
50+
step_1:
51+
in:
52+
area: area
53+
time: time
54+
time_components: time_components
55+
level: level
56+
output_format: output_format
57+
file_namer: file_namer
58+
output_dir: output_dir
59+
collection: collection
60+
out:
61+
- results
62+
run: '#clt'
63+
scatter: [area, time, time_components, level, output_format, file_namer, output_dir, collection]
64+
scatterMethod: flat_crossproduct
65+
66+
- baseCommand: daops
67+
class: CommandLineTool
68+
69+
id: clt
70+
71+
arguments:
72+
- --area
73+
- valueFrom: $( inputs.area )
74+
- --time
75+
- valueFrom: ${ inputs.time }
76+
- --time-components
77+
- valueFrom: ${ inputs.time_components }
78+
- --levels
79+
- valueFrom: ${ inputs.levels }
80+
- --output-format
81+
- valueFrom: ${ inputs.output_format }
82+
- --file-namer
83+
- valueFrom: ${ inputs.file_namer }
84+
- --output-dir
85+
- valueFrom: ${ inputs.output_dir }
86+
- --collection
87+
- valueFrom: ${ inputs.collection }
88+
89+
inputs:
90+
area:
91+
type: string
92+
time:
93+
type: string
94+
time_components:
95+
type: string
96+
level:
97+
type: string
98+
output_format:
99+
type: string
100+
file_namer:
101+
type: string
102+
output_dir:
103+
type: string
104+
collection:
105+
type: string
106+
107+
outputs:
108+
results:
109+
outputBinding:
110+
glob: .
111+
type: Directory
112+
requirements:
113+
EnvVarRequirement:
114+
envDef:
115+
PATH: /bin:/srv/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
116+
ResourceRequirement: {}
117+
InlineJavascriptRequirement: {}
118+
DockerRequirement:
119+
dockerPull: iwi/daops:0.0.1
120+
#stderr: std.err
121+
#stdout: std.out
122+
123+
cwlVersion: v1.0
124+
125+
$namespaces:
126+
s: https://schema.org/
127+
s:softwareVersion: 0.3.0
128+
schemas:
129+
- http://schema.org/version/9.0/schemaorg-current-http.rdf

daops/cli.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
"""Console script for daops."""
2+
3+
__author__ = """Alan Iwi"""
4+
__contact__ = 'alan.iwi@stfc.ac.uk'
5+
__copyright__ = "Copyright 2023 United Kingdom Research and Innovation"
6+
__license__ = "BSD - see LICENSE file in top-level package directory"
7+
8+
import os
9+
import sys
10+
import argparse
11+
import dateutil.parser
12+
import configparser
13+
14+
from daops.ops.subset import subset
15+
from roocs_utils.utils.file_utils import FileMapper
16+
17+
def parse_args():
18+
19+
parser = argparse.ArgumentParser()
20+
sub_parsers = parser.add_subparsers()
21+
sub_parsers.required = True
22+
23+
parser_subset = sub_parsers.add_parser('subset', help='subset data')
24+
parser_subset.add_argument('--area', '-a', type=str,
25+
help=('area in format w,s,e,n. Hint: if w is negative, include an "=" sign '
26+
'e.g. --area=-10,...'))
27+
parser_subset.add_argument('--time', '-t', type=str, metavar='time_window',
28+
help='time window e.g. 1999-01-01T00:00:00/2100-12-30T00:00:00')
29+
parser_subset.add_argument('--time-components', '-c', type=str,
30+
help="time components e.g. month:dec,jan,feb or 'year:1970,1980|month:01,02,03'")
31+
parser_subset.add_argument('--levels', '-l', type=str,
32+
help=('comma-separated list of levels (e.g. 500,1000,2000) '
33+
'or slash-separated range (e.g. 50/2000 for 50 to 2000)'))
34+
parser_subset.add_argument('--output-format', '-f', type=str, metavar='format',
35+
choices=('netcdf', 'nc', 'zarr'), default='netcdf')
36+
parser_subset.add_argument('--file-namer', '-F', type=str,
37+
choices=('simple', 'standard'), default='standard')
38+
parser_subset.add_argument('--output-dir', '-d', type=str, metavar='output_directory', required=True)
39+
parser_subset.add_argument('collection', type=str, nargs='+', default=list)
40+
41+
return parser.parse_args()
42+
43+
44+
def get_params(args):
45+
46+
collection = args.collection if len(args.collection) == 1 else FileMapper(args.collection)
47+
48+
return {'collection': collection,
49+
'time': args.time,
50+
'time_components': args.time_components,
51+
'area': args.area,
52+
'level': args.levels,
53+
'output_type': args.output_format,
54+
'output_dir': args.output_dir,
55+
'file_namer': args.file_namer,
56+
'apply_fixes': False
57+
}
58+
59+
60+
def check_env():
61+
"""
62+
Check that ROOCS_CONFIG points to a valid config file
63+
(although for certain types of invalid file, in fact main is never called,
64+
so exit might not always be graceful in these cases).
65+
Call this after get_params() so that 'help' still works even if this is not set.
66+
"""
67+
config_env_var = 'ROOCS_CONFIG'
68+
c = configparser.ConfigParser()
69+
try:
70+
ret = c.read(os.environ[config_env_var])
71+
except (KeyError, configparser.Error):
72+
ret = None
73+
if not ret:
74+
print(f'Environment variable {config_env_var} must contain the path name of a config file in ini format')
75+
sys.exit(1)
76+
77+
78+
def main():
79+
args = parse_args()
80+
params = get_params(args)
81+
check_env()
82+
ret = subset(**params)
83+
for uri in ret.file_uris:
84+
print(uri)
85+
86+
87+
if __name__ == "__main__":
88+
sys.exit(main()) # pragma: no cover

daops/fix_utils/decadal_utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ def get_time_calendar(ds_id, ds):
3535

3636

3737
def get_lead_times(ds_id, ds):
38-
3938
start_date = datetime.fromisoformat(get_start_date(ds_id, ds))
4039

4140
cal = get_time_calendar(ds_id, ds)

daops/utils/consolidate.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@ def get_files_matching_time_range(time_param, file_paths):
8686

8787
# Handle times differently depending on the type of time parameter
8888
if time_param.type == "interval":
89-
9089
tp_start, tp_end = time_param.get_bounds()
9190
req_start_year = get_year(tp_start, default=-99999999)
9291
req_end_year = get_year(tp_end, default=999999999)
@@ -98,7 +97,6 @@ def get_files_matching_time_range(time_param, file_paths):
9897
files_in_time_range.append(fpath)
9998

10099
elif time_param.type == "series":
101-
102100
# Get requested years and match to files whose years intersect
103101
req_years = {to_year(tm) for tm in time_param.asdict().get("time_values", [])}
104102

@@ -135,7 +133,6 @@ def consolidate(collection, **kwargs):
135133
time_param = kwargs.get("time")
136134

137135
for dset in collection:
138-
139136
if not catalog:
140137
file_paths = dset_to_filepaths(dset, force=True)
141138

daops/utils/fixer.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ def _gather_fixes(self, content):
3636
"""Gathers pre and post processing fixes together"""
3737
if content["_source"]["fixes"]:
3838
for fix in content["_source"]["fixes"]:
39-
4039
ref_implementation = fix["reference_implementation"]
4140
func = locate(ref_implementation)
4241

daops/utils/normalise.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ def normalise(collection, apply_fixes=True):
1919
norm_collection = collections.OrderedDict()
2020

2121
for dset, file_paths in collection.items():
22-
2322
ds = open_dataset(dset, file_paths, apply_fixes)
2423
norm_collection[dset] = ds
2524

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ cftime
55
netcdf4
66
elasticsearch>=8.0.1
77
clisops>=0.9.1
8-
# clisops @ git+https://github.com/roocs/clisops.git@master#egg=clisops
8+
## clisops @ git+https://github.com/roocs/clisops.git@master#egg=clisops
99
roocs-utils>=0.6.2
1010
# logging
1111
loguru>=0.5.3

roocs.ini.tmpl

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
[project:cmip5]
2+
base_dir = DATA_DIR/test_data/badc/cmip5/data/cmip5
3+
4+
[project:cmip6]
5+
base_dir = DATA_DIR/test_data/badc/cmip6/data/CMIP6
6+
7+
[project:cordex]
8+
base_dir = DATA_DIR/test_data/badc/cordex/data/cordex
9+
10+
[project:c3s-cmip5]
11+
base_dir = DATA_DIR/test_data/gws/nopw/j04/cp4cds1_vol1/data/c3s-cmip5
12+
13+
[project:c3s-cmip6]
14+
base_dir = DATA_DIR/test_data/badc/cmip6/data/CMIP6
15+
16+
[project:c3s-cordex]
17+
base_dir = DATA_DIR/test_data/gws/nopw/j04/cp4cds1_vol1/data/c3s-cordex

0 commit comments

Comments
 (0)