Skip to content

Commit e55aec1

Browse files
committed
implement "cue-verified" field as a check that a yaml file has been
verified by the cue schema
1 parent 471f5e8 commit e55aec1

File tree

7 files changed

+198
-4
lines changed

7 files changed

+198
-4
lines changed

R/guide.R

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,22 @@
88
#' type, as well as instructions how to create and test the validity, please
99
#' see the vignettes.
1010
#'
11+
#' @param verify_hash If \code{TRUE}, checks that the guide file contains a
12+
#' \code{cue.verified} field with a valid SHA256 hash, confirming it was
13+
#' signed by \code{validate_and_sign.sh} after successful CUE validation.
14+
#' Issues a warning when the field is absent; aborts when the field is
15+
#' present but malformed. Does not recompute the hash (use
16+
#' \code{verify_guide.sh} in \code{data-raw/} for full hash verification).
17+
#' Defaults to \code{FALSE}.
1118
#' @export
1219
#'
13-
read_guide <- function(path) {
20+
read_guide <- function(path, verify_hash = FALSE) {
1421
guide <- yaml::read_yaml(path)
22+
23+
if (verify_hash) {
24+
check_cue_hash(path, guide)
25+
}
26+
1527
check_guide(guide)
1628

1729
if ("translations" %in% names(guide)) {

R/utils.R

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,45 @@
1+
#' Verify the CUE validation hash embedded in a guide file
2+
#'
3+
#' Checks that the \code{cue.verified} field is present and correctly formatted.
4+
#' This confirms the guide was processed by \code{validate_and_sign.sh} after
5+
#' passing CUE schema validation. The hash itself is not recomputed from R;
6+
#' use \code{verify_guide.sh} in \code{data-raw/} for full hash verification.
7+
#'
8+
#' @param path Path to the guide YAML file (used only in messages)
9+
#' @param guide Already-parsed guide list (from \code{yaml::read_yaml})
10+
#' @return Invisibly \code{NULL}; called for its side effects (warnings/errors)
11+
#' @noRd
12+
#'
13+
check_cue_hash <- function(path, guide) {
14+
stored_hash <- guide[["cue.verified"]]
15+
16+
if (is.null(stored_hash)) {
17+
rlang::warn(
18+
c(
19+
"!" = glue::glue("Guide file has no 'cue.verified' field: {path}"),
20+
"i" = "Run validate_and_sign.sh to validate with CUE and embed a verification hash."
21+
),
22+
use_cli_format = TRUE
23+
)
24+
return(invisible(NULL))
25+
}
26+
27+
if (!grepl("^sha256:[a-f0-9]{64}$", stored_hash)) {
28+
rlang::abort(glue::glue(
29+
"The 'cue.verified' field has an invalid format in {path}.\n",
30+
" Found: '{stored_hash}'\n",
31+
" Expected: 'sha256:<64 lowercase hex characters>'"
32+
))
33+
}
34+
35+
rlang::inform(
36+
c("v" = glue::glue("Guide CUE-verified: {stored_hash}")),
37+
use_cli_format = TRUE
38+
)
39+
40+
invisible(NULL)
41+
}
42+
143
#' Create a table from a list of key-value pairs
244
#' @param kvlist A list of key-value pairs
345
#' @param guide A data guide

data-raw/excelguide_schema.cue

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// usage: cue vet -c schema.cue file.yml
1+
// usage: cue vet -c excelguide_schema.cue file.yml
22

33
// Excel Data Guide Schema
44
// This schema validates Excel template guide files that describe how to extract
@@ -13,6 +13,21 @@
1313
"locations"!: [...#Location]
1414
"translations": [...#Translation]
1515

16+
// Optional field added by validate_and_sign.sh after successful CUE validation.
17+
// Format: sha256:<64 lowercase hex characters>
18+
"cue.verified"?: =~"^sha256:[a-f0-9]{64}$"
19+
20+
// Validate that exactly one location uses the reserved varname ".template"
21+
// (list comprehension filters locations; [_] asserts exactly one match exists)
22+
// _templateLocations: [for loc in locations if loc.varname == ".template" {loc}]
23+
// _templateLocations: [_]
24+
// NOTE: the constraint "at least one location must have varname == '.template'" cannot
25+
// be expressed here. CUE comprehensions require a closed, concrete list to iterate
26+
// over, but "locations" is an open list type ([...#Location]). Additionally,
27+
// string-labelled fields ("locations"!) are not resolvable as identifier references
28+
// inside a for expression, which causes a "reference not found" error at vet time.
29+
// This constraint is enforced at runtime by check_guide() in R/guide.R instead.
30+
1631
// Version constraint: must be in major.minor format (e.g., "1.0", "2.3")
1732
#Version: =~"^\\d+\\.\\d+$"
1833

data-raw/shell.nix

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,6 @@ in
1111
pkgs.mkShellNoCC {
1212
packages = with pkgs; [
1313
cue
14+
yq-go # YAML processor used by validate_and_sign.sh and verify_guide.sh
1415
];
1516
}

data-raw/validate_and_sign.sh

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
#!/usr/bin/env bash
2+
# validate_and_sign.sh
3+
#
4+
# Validates a guide YAML file against the CUE schema and, on success, embeds a
5+
# SHA256 hash of the validated content into the file as the 'cue.verified' field.
6+
# Running this script a second time on an already-signed file is safe: the
7+
# existing 'cue.verified' field is stripped before hashing so the hash is stable.
8+
#
9+
# Usage:
10+
# ./validate_and_sign.sh <guide.yml>
11+
#
12+
# Example:
13+
# ./validate_and_sign.sh guide_competition_1_0_source.yml
14+
#
15+
# Requirements: cue, yq (go-yq), sha256sum
16+
# Install via the accompanying shell.nix: nix-shell
17+
18+
set -euo pipefail
19+
20+
FILE="${1:?Usage: validate_and_sign.sh <guide.yml>}"
21+
SCHEMA="$(dirname "$0")/excelguide_schema.cue"
22+
23+
# --------------------------------------------------------------------------- #
24+
# Sanity checks
25+
# --------------------------------------------------------------------------- #
26+
27+
if [ ! -f "$FILE" ]; then
28+
echo "Error: file not found: $FILE" >&2
29+
exit 1
30+
fi
31+
32+
if [ ! -f "$SCHEMA" ]; then
33+
echo "Error: schema not found: $SCHEMA" >&2
34+
exit 1
35+
fi
36+
37+
for cmd in cue yq sha256sum; do
38+
if ! command -v "$cmd" &>/dev/null; then
39+
echo "Error: required command not found: $cmd" >&2
40+
echo " Start a nix-shell in this directory to get all dependencies." >&2
41+
exit 1
42+
fi
43+
done
44+
45+
# --------------------------------------------------------------------------- #
46+
# Prepare a stripped temporary copy (without any existing cue.verified field)
47+
# --------------------------------------------------------------------------- #
48+
49+
TMPFILE=$(mktemp /tmp/guide_XXXXXX.yml)
50+
trap 'rm -f "$TMPFILE"' EXIT
51+
52+
yq 'del(.["cue.verified"])' "$FILE" > "$TMPFILE"
53+
54+
# --------------------------------------------------------------------------- #
55+
# CUE validation
56+
# --------------------------------------------------------------------------- #
57+
58+
echo "Validating ${FILE} ..."
59+
60+
if ! cue vet -c "$SCHEMA" "$TMPFILE"; then
61+
echo ""
62+
echo "CUE validation FAILED — 'cue.verified' was NOT embedded." >&2
63+
exit 1
64+
fi
65+
66+
# --------------------------------------------------------------------------- #
67+
# Hash the yq-normalised content and embed it in the original file
68+
#
69+
# The hash is computed over the yq-normalised YAML (without cue.verified).
70+
# yq normalises whitespace, quoting style, and key order within scalars,
71+
# making the hash stable across cosmetic edits to the original file.
72+
# --------------------------------------------------------------------------- #
73+
74+
HASH=$(sha256sum "$TMPFILE" | cut -d' ' -f1)
75+
76+
yq -i ".\"cue.verified\" = \"sha256:${HASH}\"" "$FILE"
77+
78+
echo "Validation passed."
79+
echo "Embedded: sha256:${HASH}"

data-raw/verify_guide.sh

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/usr/bin/env bash
2+
# verify_guide.sh
3+
# Verifies that a guide YAML has been CUE-validated and has not changed since
4+
# it was signed by validate_and_sign.sh.
5+
#
6+
# Usage: ./verify_guide.sh <guide.yml>
7+
# Exit code 0: valid and unmodified
8+
# Exit code 1: missing signature or hash mismatch
9+
10+
set -euo pipefail
11+
12+
FILE="${1:?Usage: verify_guide.sh <guide.yml>}"
13+
14+
if [ ! -f "$FILE" ]; then
15+
echo "Error: file not found: $FILE" >&2
16+
exit 1
17+
fi
18+
19+
# Extract the stored hash value
20+
STORED_HASH=$(yq '."cue.verified" // ""' "$FILE")
21+
22+
if [ -z "$STORED_HASH" ] || [ "$STORED_HASH" = "null" ]; then
23+
echo "FAIL: no 'cue.verified' field found in $FILE" >&2
24+
echo " Run validate_and_sign.sh to validate with CUE and embed a hash." >&2
25+
exit 1
26+
fi
27+
28+
# Strip the cue.verified field and recompute the hash, identical to validate_and_sign.sh
29+
TMPFILE=$(mktemp /tmp/guide_XXXXXX.yml)
30+
trap 'rm -f "$TMPFILE"' EXIT
31+
32+
yq 'del(.["cue.verified"])' "$FILE" > "$TMPFILE"
33+
CURRENT_HASH="sha256:$(sha256sum "$TMPFILE" | cut -d' ' -f1)"
34+
35+
# Compare stored vs recomputed
36+
if [ "$STORED_HASH" = "$CURRENT_HASH" ]; then
37+
echo "OK: $FILE"
38+
echo " Signed with: $STORED_HASH"
39+
else
40+
echo "FAIL: $FILE — hash mismatch, file was modified after signing." >&2
41+
echo " stored: $STORED_HASH" >&2
42+
echo " current: $CURRENT_HASH" >&2
43+
exit 1
44+
fi

tests/testthat/fixtures/guide_competition_1_0.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
guide.version: '1.0'
1+
guide.version: "1.0"
22
template.name: competition
3-
template.min.version: '9.3'
3+
template.min.version: "9.3"
44
template.max.version: ~
55
plate.format: 96
66
locations:
@@ -142,3 +142,4 @@ translations:
142142
short: itm2Mw
143143
- long: Run identifier 2
144144
short: run2ID
145+
cue.verified: sha256:872121d48d04cb6d17c29f3ffff7af432fa619f641757a423e2bb0dc64deab00

0 commit comments

Comments
 (0)