Skip to content

Commit a7ca935

Browse files
committed
Add Harbor eval environment for deterministic-forecast skill
- Add Dockerfile with uv + Python 3.13 + eccodes - Add bootstrap.sh for pre_agent_setup with uv sync - Add config.yml with Harbor execution policy
1 parent 2c1406f commit a7ca935

3 files changed

Lines changed: 100 additions & 0 deletions

File tree

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Harbor execution policy for earth2studio-deterministic-forecast live evals.
2+
#
3+
# Default (no --copy-repo): ACES stages the skill under /workspace/skills/ and
4+
# copies only repo files explicitly linked from SKILL.md outside the skill tree.
5+
# Agents typically deliver under /workspace/output/. Grading is trajectory +
6+
# LLM-as-judge (accuracy, behavior_check) — no live pytest against the repo.
7+
#
8+
# Optional --copy-repo: also copies the full git tree to /workspace/repo so
9+
# bootstrap can uv sync and agents can run pytest/make lint in a real checkout.
10+
# Use this when validating end-to-end dev workflow, not for routine pass/fail.
11+
#
12+
# Local run:
13+
# nv-base agent-eval skills/earth2studio-deterministic-forecast \
14+
# -a claude-code,codex -o ./eval-results/
15+
#
16+
# Validate before committing:
17+
# astra-skill-eval validate ./skills/earth2studio-deterministic-forecast
18+
19+
schema_version: 1
20+
21+
harbor:
22+
custom_dockerfile_mode: preserve
23+
base_image_mode: disabled
24+
n_attempts: 2
25+
pass_threshold: 0.60
26+
stop_on_pass: false
27+
n_concurrent: 4
28+
timeout_multiplier: 4.0
29+
pre_agent_setup:
30+
- /usr/local/bin/e2s-eval-bootstrap
31+
32+
skill_workspace:
33+
mode: isolated
34+
35+
grading:
36+
mode: aces_default
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim
2+
3+
# Lightweight Earth2Studio dev sandbox for Harbor skill evals (Python 3.13 + uv).
4+
RUN apt-get -o Acquire::Retries=3 update && \
5+
apt-get -o Acquire::Retries=3 install -y --no-install-recommends \
6+
bash \
7+
build-essential \
8+
ca-certificates \
9+
curl \
10+
git \
11+
jq \
12+
make \
13+
ripgrep
14+
15+
# Install eccodes
16+
RUN apt-get update && apt-get install -y --no-install-recommends \
17+
libeccodes-tools \
18+
libeccodes-dev \
19+
&& rm -rf /var/lib/apt/lists/*
20+
21+
ENV UV_LINK_MODE=copy
22+
ENV UV_PYTHON=3.13
23+
# NV-ACES default repo location
24+
ENV EARTH2STUDIO_ROOT=/workspace/repo
25+
26+
RUN mkdir -p /workspace/skills /workspace/input /workspace/output \
27+
/logs/verifier /logs/agent
28+
29+
WORKDIR /workspace/repo
30+
31+
COPY setup/bootstrap.sh /usr/local/bin/e2s-eval-bootstrap
32+
RUN chmod +x /usr/local/bin/e2s-eval-bootstrap
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
#!/usr/bin/env bash
2+
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES.
3+
# SPDX-License-Identifier: Apache-2.0
4+
#
5+
# Harbor pre_agent_setup / healthcheck: uv sync when a repo checkout is present.
6+
# Default ACES runs stage only the skill; pass --copy-repo to populate /workspace/repo.
7+
8+
set -euo pipefail
9+
10+
REPO_ROOT="${EARTH2STUDIO_ROOT:-/workspace/repo}"
11+
12+
if [[ ! -f "${REPO_ROOT}/pyproject.toml" ]]; then
13+
echo "e2s-eval-bootstrap: no repo at ${REPO_ROOT}; skipping uv sync (skill-only eval mode)" >&2
14+
exit 0
15+
fi
16+
17+
cd "${REPO_ROOT}"
18+
19+
export UV_LINK_MODE=copy
20+
export UV_PYTHON=3.13
21+
22+
uv venv --python 3.13
23+
uv sync --group dev --extra data
24+
uv run pre-commit install --install-hooks
25+
26+
cat >/etc/profile.d/e2s-eval.sh <<EOF
27+
export EARTH2STUDIO_ROOT=${REPO_ROOT}
28+
export PATH="${REPO_ROOT}/.venv/bin:\${PATH}"
29+
cd ${REPO_ROOT}
30+
EOF
31+
32+
echo "e2s-eval-bootstrap: ready at ${REPO_ROOT} ($(uv run python --version))"

0 commit comments

Comments
 (0)