File tree Expand file tree Collapse file tree
skills/earth2studio-deterministic-forecast/evals Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1+ # Harbor execution policy for earth2studio-deterministic-forecast live evals.
2+ #
3+ # Default (no --copy-repo): ACES stages the skill under /workspace/skills/ and
4+ # copies only repo files explicitly linked from SKILL.md outside the skill tree.
5+ # Agents typically deliver under /workspace/output/. Grading is trajectory +
6+ # LLM-as-judge (accuracy, behavior_check) — no live pytest against the repo.
7+ #
8+ # Optional --copy-repo: also copies the full git tree to /workspace/repo so
9+ # bootstrap can uv sync and agents can run pytest/make lint in a real checkout.
10+ # Use this when validating end-to-end dev workflow, not for routine pass/fail.
11+ #
12+ # Local run:
13+ # nv-base agent-eval skills/earth2studio-deterministic-forecast \
14+ # -a claude-code,codex -o ./eval-results/
15+ #
16+ # Validate before committing:
17+ # astra-skill-eval validate ./skills/earth2studio-deterministic-forecast
18+
19+ schema_version : 1
20+
21+ harbor :
22+ custom_dockerfile_mode : preserve
23+ base_image_mode : disabled
24+ n_attempts : 2
25+ pass_threshold : 0.60
26+ stop_on_pass : false
27+ n_concurrent : 4
28+ timeout_multiplier : 4.0
29+ pre_agent_setup :
30+ - /usr/local/bin/e2s-eval-bootstrap
31+
32+ skill_workspace :
33+ mode : isolated
34+
35+ grading :
36+ mode : aces_default
Original file line number Diff line number Diff line change 1+ FROM ghcr.io/astral-sh/uv:python3.13-bookworm-slim
2+
3+ # Lightweight Earth2Studio dev sandbox for Harbor skill evals (Python 3.13 + uv).
4+ RUN apt-get -o Acquire::Retries=3 update && \
5+ apt-get -o Acquire::Retries=3 install -y --no-install-recommends \
6+ bash \
7+ build-essential \
8+ ca-certificates \
9+ curl \
10+ git \
11+ jq \
12+ make \
13+ ripgrep
14+
15+ # Install eccodes
16+ RUN apt-get update && apt-get install -y --no-install-recommends \
17+ libeccodes-tools \
18+ libeccodes-dev \
19+ && rm -rf /var/lib/apt/lists/*
20+
21+ ENV UV_LINK_MODE=copy
22+ ENV UV_PYTHON=3.13
23+ # NV-ACES default repo location
24+ ENV EARTH2STUDIO_ROOT=/workspace/repo
25+
26+ RUN mkdir -p /workspace/skills /workspace/input /workspace/output \
27+ /logs/verifier /logs/agent
28+
29+ WORKDIR /workspace/repo
30+
31+ COPY setup/bootstrap.sh /usr/local/bin/e2s-eval-bootstrap
32+ RUN chmod +x /usr/local/bin/e2s-eval-bootstrap
Original file line number Diff line number Diff line change 1+ #! /usr/bin/env bash
2+ # SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES.
3+ # SPDX-License-Identifier: Apache-2.0
4+ #
5+ # Harbor pre_agent_setup / healthcheck: uv sync when a repo checkout is present.
6+ # Default ACES runs stage only the skill; pass --copy-repo to populate /workspace/repo.
7+
8+ set -euo pipefail
9+
10+ REPO_ROOT=" ${EARTH2STUDIO_ROOT:-/ workspace/ repo} "
11+
12+ if [[ ! -f " ${REPO_ROOT} /pyproject.toml" ]]; then
13+ echo " e2s-eval-bootstrap: no repo at ${REPO_ROOT} ; skipping uv sync (skill-only eval mode)" >&2
14+ exit 0
15+ fi
16+
17+ cd " ${REPO_ROOT} "
18+
19+ export UV_LINK_MODE=copy
20+ export UV_PYTHON=3.13
21+
22+ uv venv --python 3.13
23+ uv sync --group dev --extra data
24+ uv run pre-commit install --install-hooks
25+
26+ cat > /etc/profile.d/e2s-eval.sh << EOF
27+ export EARTH2STUDIO_ROOT=${REPO_ROOT}
28+ export PATH="${REPO_ROOT} /.venv/bin:\$ {PATH}"
29+ cd ${REPO_ROOT}
30+ EOF
31+
32+ echo " e2s-eval-bootstrap: ready at ${REPO_ROOT} ($( uv run python --version) )"
You can’t perform that action at this time.
0 commit comments