Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@ prune docs

# Exclude project config files
exclude mkdocs.yml
exclude .pre-commit.yml
exclude .pre-commit-config.yaml
exclude .gitignore
11 changes: 6 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ Results from both file- and dataset-level checks are aggregated, summarized, and
### Currently supported checkers

While `esgf-qa` has been primarily developed for workflows assessing compliance with WCRP project data specifications
(e.g., CMIP, CORDEX), it can also be used for general CF-compliance testing and easily extended to support any
`cc-plugin` and projects following CORDEX- or CMIP-style CMOR table conventions.
(e.g., CMIP, CORDEX), it can also be used for general CF-compliance testing and generally supports any
`cc-plugin`. It can be easily extended to support any projects following CORDEX- or CMIP-style CMOR table conventions.

| Standard | Checker Name |
| ---------------------------------------------------------------------------------------------------- | ------------ |
Expand Down Expand Up @@ -79,15 +79,16 @@ esgvoc status
## Usage

```shell
$ esgqa [-h] [-o <OUTPUT_DIR>] [-t <TEST>] [-O OPTION] [-i <INFO>] [-r] [-C] <parent_dir>
$ esgqa [-h] [-P <parallel_processes>] [-o <OUTPUT_DIR>] [-t <TEST>] [-O OPTION] [-i <INFO>] [-r] [-C] <parent_dir>
```

- positional arguments:
- `parent_dir`: Parent directory to scan for netCDF-files to check
- options:
- `-h, --help`: show this help message and exit
- `-P, --parallel_processes`: Specify the maximum number of parallel processes. Default: 0 (= number of cores).
- `-o, --output_dir OUTPUT_DIR`: Directory to store QA results. Needs to be non-existing or empty or from previous QA run. If not specified, will store results in `./cc-qa-check-results/YYYYMMDD-HHmm_<hash>`.
- `-t, --test TEST`: The test to run (`'wcrp_cmip6:latest'`, `'wcrp_cordex_cmip6:latest'` or `'cf:<version>'`, can be specified multiple times, eg.: `'-t wcrp_cmip6:latest -t cf:1.7'`) - default: running latest CF checks `'cf:latest'`.
- `-t, --test TEST`: The test to run (`'wcrp_cmip6:latest'`, `'wcrp_cordex_cmip6:latest'` or `'cf:<version>'`, can be specified multiple times, eg.: `'-t wcrp_cmip6:latest -t cf:1.7'`) - default: running latest CF checks `'cf:latest'`. If the version is omitted, `latest` will be used.
- `-O, --option OPTION`: Additional options to be passed to the checkers. Format: `'<checker>:<option_name>[:<option_value>]'`. Multiple invocations possible.
- `-i, --info INFO`: Information used to tag the QA results, eg. the simulation id to identify the checked run. Suggested is the original experiment-id you gave the run.
- `-r, --resume`: Specify to continue a previous QC run. Requires the `<output_dir>` argument to be set.
Expand All @@ -96,7 +97,7 @@ $ esgqa [-h] [-o <OUTPUT_DIR>] [-t <TEST>] [-O OPTION] [-i <INFO>] [-r] [-C] <pa
### Example Usage

```shell
$ esgqa -t wcrp_cordex_cmip6:latest -t cf:1.11 -o QA_results/IAEVALL02_2025-10-20 -i "IAEVALL02" ESGF_Buff/IAEVALL02/CORDEX-CMIP6
$ esgqa -P 8 -t wcrp_cordex_cmip6:latest -t cf:1.11 -o QA_results/IAEVALL02_2025-10-20 -i "IAEVALL02" ESGF_Buff/IAEVALL02/CORDEX-CMIP6
```

To resume at a later date, eg. if the QA run did not finish in time or more files have been added to the `<parent_dir>`
Expand Down
51 changes: 36 additions & 15 deletions esgf_qa/_constants.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
from datetime import timedelta

# Collection of supported checkers
# Mapping of checker names to project names for better readability
checker_dict = {
"cc6": "CORDEX-CMIP6",
"cf": "CF-Conventions",
"mip": "MIP",
"plugin_cmip6": "CMIP6",
# "wcrp-cmip5": "CMIP5",
"wcrp_cmip6": "CMIP6",
# "wcrp_cmip7": "CMIP7-AFT",
# "wcrp_cmip7": "CMIP7",
# "wcrp_cmip7aft: "CMIP7-AFT",
"wcrp_cmip7": "CMIP7",
# "wcrp_cordex": "CORDEX",
"wcrp_cordex_cmip6": "CORDEX-CMIP6",
# "obs4mips": "Obs4MIPs",
Expand All @@ -24,17 +23,39 @@
}
checker_release_versions = {}

# DRS parent directory names
DRS_path_parent = {
"CMIP5": "CMIP5",
"CMIP6": "CMIP6",
"CMIP7": "CMIP7",
"CMIP7-AFT": "CMIP7",
"CORDEX": "CORDEX",
"CORDEX-CMIP6": "CORDEX-CMIP6",
"Obs4MIPs": "Obs4MIPs",
"Input4MIPs": "Input4MIPs",
}
# Checkers for which consistency checks should be run
checker_supporting_consistency_checks = [
"wcrp_cmip7",
"wcrp_cmip6",
"wcrp_cordex_cmip6",
"cc6",
"mip",
]

# DRS parent directory names (for identifying project root and building dataset id)
supported_project_ids = [
"cmip7",
"cmip6plus",
"cmip6",
"cmip5",
"cordex",
"cordex-cmip6",
"cordex-fpsconv",
"obs4mips",
"input4mips",
"c3scordex",
"c3scmip5",
"c3scmip6",
"c3s-ipcc-ar6-atlas",
"c3satlas",
"c3s-cica-atlas",
"c3satlas_v1",
"c3s-atlas-dataset",
"c3satlas_v2",
"eerie",
"happi",
"cosmo-rea",
]

# Definition of maximum permitted deviations from the given frequency
deltdic = {}
Expand Down
9 changes: 5 additions & 4 deletions esgf_qa/cluster_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def update(self, result_dict, dsid, file_name):
"errors"
].items():
self.summary["error"][
f"[{checker_dict[checker]}] " + function_name
f"[{checker_dict.get(checker, checker)}] " + function_name
][error_msg][dsid].append(file_name)
else:
score, max_score = result_dict[checker][test]["value"]
Expand All @@ -110,7 +110,7 @@ def update(self, result_dict, dsid, file_name):
if score < max_score: # test outcome: fail
for msg in msgs:
self.summary["fail"][weight][
f"[{checker_dict[checker]}] " + test
f"[{checker_dict.get(checker, checker)}] " + test
][msg][dsid].append(file_name)

def update_ds(self, result_dict, dsid):
Expand All @@ -132,15 +132,16 @@ def update_ds(self, result_dict, dsid):
].items():
for file_name in errdict["files"]:
self.summary["error"][
f"[{checker_dict_ext[checker]}] " + function_name
f"[{checker_dict_ext.get(checker, checker)}] "
+ function_name
][errdict["msg"]][dsid].append(file_name)
else:
weight = result_dict[checker][test].get("weight", 3)
fails = result_dict[checker][test].get("msgs", {})
for msg, file_names in fails.items():
for file_name in file_names:
self.summary["fail"][weight][
f"[{checker_dict_ext[checker]}] " + test
f"[{checker_dict_ext.get(checker, checker)}] " + test
][msg][dsid].append(file_name)

def sort(self):
Expand Down
Loading
Loading