From 0fa017b11d41a1b2fa4660d3ad1e7eb2c1de90cf Mon Sep 17 00:00:00 2001 From: Lasse Benninga Date: Fri, 22 May 2026 12:20:49 +0200 Subject: [PATCH 1/7] feat: scaffold Week 5 assignment with real autograder Replace the stub test.sh (always-pass, score 0) with a 7-level static analysis grader. Scaffold code has NotImplementedError stubs and TODO placeholders that score 27/100 (fail); a complete solution scores 100/100. Grader levels: 1 (15 pts) - required files present 2 (15 pts) - Dockerfile layer order and base image 3 (15 pts) - pinned dependencies 4 (20 pts) - CI triggers, ruff, pytest, docker build 5 (15 pts) - env-var config, no NotImplementedError 6 (10 pts) - ACR screenshot present and non-trivial 7 (10 pts) - AI_ASSIST.md filled in, no TODO placeholders Also adds: student README, devcontainer.json (Docker + Azure CLI), src/pipeline.py starter, tests/test_pipeline.py, Dockerfile stub, requirements.txt stub, ci.yml stub, AI_ASSIST.md template. --- .devcontainer/devcontainer.json | 11 ++ .github/workflows/ci.yml | 30 +++++ .hyf/test.sh | 176 +++++++++++++++++++++++-- AI_ASSIST.md | 25 ++++ Dockerfile | 24 ++++ README.md | 64 +++++++-- task-1/task 1 files => assets/.gitkeep | 0 requirements.txt | 13 ++ task-2/task 2 files => src/__init__.py | 0 src/pipeline.py | 60 +++++++++ tests/__init__.py | 0 tests/test_pipeline.py | 64 +++++++++ 12 files changed, 449 insertions(+), 18 deletions(-) create mode 100644 .devcontainer/devcontainer.json create mode 100644 .github/workflows/ci.yml mode change 100644 => 100755 .hyf/test.sh create mode 100644 AI_ASSIST.md create mode 100644 Dockerfile rename task-1/task 1 files => assets/.gitkeep (100%) create mode 100644 requirements.txt rename task-2/task 2 files => src/__init__.py (100%) create mode 100644 src/pipeline.py create mode 100644 tests/__init__.py create mode 100644 tests/test_pipeline.py diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..13ae470 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,11 @@ +{ + "name": "Week 5: Containers & CI/CD", + "image": "mcr.microsoft.com/devcontainers/python:3.11", + "features": { + "ghcr.io/devcontainers/features/docker-in-docker:2": {}, + "ghcr.io/devcontainers/features/azure-cli:1": {} + }, + "postCreateCommand": "pip install -r requirements.txt", + "forwardPorts": [], + "remoteUser": "vscode" +} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..229eedf --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,30 @@ +# Task 5: Build a CI workflow that runs on pull requests and pushes to main. +# +# See the assignment chapter for the required steps and commands. +# Fill in the TODO values below. + +name: CI + +on: + push: + branches: ["TODO-replace-with-main"] + pull_request: + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Install dependencies + run: pip install -r requirements.txt + - name: Lint + run: echo "TODO implement this step" + - name: Format + run: echo "TODO implement this step" + - name: Test + run: echo "TODO implement this step" + - name: Build image + run: echo "TODO implement this step" diff --git a/.hyf/test.sh b/.hyf/test.sh old mode 100644 new mode 100755 index ee037fc..e470834 --- a/.hyf/test.sh +++ b/.hyf/test.sh @@ -1,13 +1,173 @@ #!/usr/bin/env bash +# Week 5 autograder: static analysis only (no Docker or Azure required in CI). +# Each level adds points toward 100; passing score is 60. set -euo pipefail -# Run your test scripts here. -# Auto grade tool will execute this file within the .hyf working directory. -# The result should be stored in score.json file with the format shown below. -cat << EOF > score.json +REPO_ROOT="$(cd "$(dirname "$0")/.." && pwd)" +score=0 +details=() + +pass() { details+=("PASS: $1"); } +fail() { details+=("FAIL: $1"); } + +# ── Level 1 (15 pts): required files exist ────────────────────────────────── +l1=0 +for f in Dockerfile "src/pipeline.py" "tests/test_pipeline.py" "AI_ASSIST.md"; do + if [[ -f "$REPO_ROOT/$f" ]]; then + ((l1 += 3)) + else + fail "missing $f" + fi +done +# ci.yml +if ls "$REPO_ROOT/.github/workflows/"*.yml 2>/dev/null | grep -q .; then + ((l1 += 2)) +else + fail "missing .github/workflows/*.yml" +fi +# requirements.txt or pyproject.toml +if [[ -f "$REPO_ROOT/requirements.txt" ]] || [[ -f "$REPO_ROOT/pyproject.toml" ]]; then + ((l1 += 1)) +else + fail "missing requirements.txt or pyproject.toml" +fi +((score += l1)) +pass "Level 1: required files ($l1/15 pts)" + +# ── Level 2 (15 pts): Dockerfile correctness ──────────────────────────────── +l2=0 +df="$REPO_ROOT/Dockerfile" +if [[ -f "$df" ]]; then + if grep -qiE "^FROM\s+python:3\.11" "$df"; then + ((l2 += 5)); pass "Dockerfile uses python:3.11 base image" + else + fail "Dockerfile does not use python:3.11-slim base image" + fi + + # Dependency copy must appear before source copy (cache-friendly order) + req_line=$(grep -n "COPY.*requirements" "$df" | head -1 | cut -d: -f1 || echo 0) + src_line=$(grep -n "COPY.*src" "$df" | head -1 | cut -d: -f1 || echo 9999) + if [[ "$req_line" -gt 0 && "$req_line" -lt "$src_line" ]]; then + ((l2 += 7)); pass "Dockerfile copies requirements before source (cache-friendly)" + else + fail "Dockerfile does not copy requirements before source code" + fi + + if grep -qE "^CMD" "$df"; then + ((l2 += 3)); pass "Dockerfile has a CMD instruction" + else + fail "Dockerfile missing CMD instruction" + fi +fi +((score += l2)) +pass "Level 2: Dockerfile ($l2/15 pts)" + +# ── Level 3 (15 pts): pinned dependencies ─────────────────────────────────── +l3=0 +if [[ -f "$REPO_ROOT/requirements.txt" ]]; then + pinned=$(grep -cE "^[a-zA-Z].*==" "$REPO_ROOT/requirements.txt" || true) + if [[ "$pinned" -ge 1 ]]; then + ((l3 += 10)); pass "requirements.txt has $pinned pinned package(s)" + else + fail "requirements.txt has no pinned packages (use package==version)" + fi +fi +if [[ -f "$REPO_ROOT/uv.lock" ]]; then + ((l3 += 5)); pass "uv.lock present (full dependency tree pinned)" +elif [[ "$l3" -ge 10 ]]; then + ((l3 += 5)); pass "requirements.txt pins satisfied (no uv.lock needed)" +fi +((score += l3)) +pass "Level 3: pinned dependencies ($l3/15 pts)" + +# ── Level 4 (20 pts): CI workflow ──────────────────────────────────────────── +l4=0 +ci_file=$(ls "$REPO_ROOT/.github/workflows/"*.yml 2>/dev/null | head -1 || true) +if [[ -n "$ci_file" ]]; then + grep -q "pull_request" "$ci_file" && { ((l4 += 4)); pass "ci.yml triggers on pull_request"; } || fail "ci.yml missing pull_request trigger" + grep -q '"main"' "$ci_file" && { ((l4 += 4)); pass "ci.yml triggers on push to main"; } || fail "ci.yml missing push to main trigger" + grep -q "ruff check" "$ci_file" && { ((l4 += 3)); pass "ci.yml runs ruff check (lint)"; } || fail "ci.yml missing ruff check step" + grep -q "ruff format" "$ci_file" && { ((l4 += 3)); pass "ci.yml runs ruff format (format check)"; } || fail "ci.yml missing ruff format step" + grep -q "pytest" "$ci_file" && { ((l4 += 3)); pass "ci.yml runs pytest"; } || fail "ci.yml missing pytest step" + grep -q "docker build" "$ci_file" && { ((l4 += 3)); pass "ci.yml runs docker build"; } || fail "ci.yml missing docker build step" +fi +((score += l4)) +pass "Level 4: CI workflow ($l4/20 pts)" + +# ── Level 5 (15 pts): env-var configuration ────────────────────────────────── +l5=0 +py="$REPO_ROOT/src/pipeline.py" +if [[ -f "$py" ]]; then + if grep -qE "os\.(environ|getenv)" "$py"; then + ((l5 += 10)); pass "pipeline.py reads config from os.environ/os.getenv" + else + fail "pipeline.py does not read from os.environ or os.getenv" + fi + if ! grep -q "NotImplementedError" "$py"; then + ((l5 += 5)); pass "pipeline.py has no NotImplementedError stubs remaining" + else + fail "pipeline.py still contains NotImplementedError" + fi +fi +((score += l5)) +pass "Level 5: env-var config ($l5/15 pts)" + +# ── Level 6 (10 pts): ACR screenshot ──────────────────────────────────────── +l6=0 +screenshot="$REPO_ROOT/assets/acr_push_week5.png" +if [[ -f "$screenshot" ]]; then + size=$(wc -c < "$screenshot") + if [[ "$size" -gt 1024 ]]; then + ((l6 += 10)); pass "assets/acr_push_week5.png present and non-trivial (${size} bytes)" + else + fail "assets/acr_push_week5.png exists but looks empty (${size} bytes)" + fi +else + fail "assets/acr_push_week5.png missing (Task 6 deliverable)" +fi +((score += l6)) +pass "Level 6: ACR screenshot ($l6/10 pts)" + +# ── Level 7 (10 pts): AI_ASSIST.md content ────────────────────────────────── +l7=0 +ai="$REPO_ROOT/AI_ASSIST.md" +if [[ -f "$ai" ]]; then + chars=$(wc -c < "$ai") + has_prompt=$(grep -c "## The prompt" "$ai" || true) + has_code=$(grep -c "## The code" "$ai" || true) + has_changed=$(grep -c "## What I changed" "$ai" || true) + has_todo=$(grep -c "^TODO:" "$ai" || true) + + if [[ "$has_prompt" -ge 1 && "$has_code" -ge 1 && "$has_changed" -ge 1 ]]; then + ((l7 += 5)); pass "AI_ASSIST.md has all three required sections" + else + fail "AI_ASSIST.md missing one or more required sections" + fi + if [[ "$chars" -gt 500 && "$has_todo" -eq 0 ]]; then + ((l7 += 5)); pass "AI_ASSIST.md is filled in (${chars} chars, no TODO placeholders)" + else + fail "AI_ASSIST.md still contains TODO placeholders or is too short (${chars} chars)" + fi +fi +((score += l7)) +pass "Level 7: AI report ($l7/10 pts)" + +# ── Final result ───────────────────────────────────────────────────────────── +passing_score=60 +pass_flag="false" +[[ "$score" -ge "$passing_score" ]] && pass_flag="true" + +echo "" +echo "=== Week 5 Autograder Results ===" +for line in "${details[@]}"; do echo " $line"; done +echo "" +echo "Score: $score / 100 (passing: $passing_score)" +echo "Pass: $pass_flag" + +cat > "$(dirname "$0")/score.json" << JSON { - "score": 0, - "pass": true, - "passingScore": 0 + "score": $score, + "pass": $pass_flag, + "passingScore": $passing_score } -EOF +JSON diff --git a/AI_ASSIST.md b/AI_ASSIST.md new file mode 100644 index 0000000..22fefa8 --- /dev/null +++ b/AI_ASSIST.md @@ -0,0 +1,25 @@ +# AI Assist Report + +> Task 7: Fill in all three sections below. Your reflection should be specific — +> describe exactly what you asked, what the AI returned, and what you changed. +> "The AI fixed it" is not enough detail. + +## The prompt I gave + + + +TODO: paste your prompt here. + +## The code or suggestion it returned + + + +```python +# TODO: paste the AI-generated code here +``` + +## What I changed after reviewing it + + + +TODO: describe your review and any changes you made. diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..357c108 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,24 @@ +# Task 3: Write a cache-friendly Dockerfile. +# +# Requirements (in order): +# 1. Use python:3.11-slim as the base image. +# 2. Copy requirements.txt BEFORE copying source code. +# 3. Install dependencies from requirements.txt. +# 4. Copy src/ into the image. +# 5. Set the CMD to run the pipeline: python -m src.pipeline +# +# Replace each TODO comment with the correct Dockerfile instruction. + +# TODO: set the base image +FROM TODO + +WORKDIR /app + +# TODO: copy requirements.txt (before source — this keeps the install layer cached) + +# TODO: install dependencies + +# TODO: copy source code + +# TODO: set the command that runs when the container starts +CMD ["TODO"] diff --git a/README.md b/README.md index 96ce7bc..e9c9285 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,61 @@ -# [Track] week X assignment -HackYourFuture week X assignment -The Week X assignment for the HackYourFuture can be found at the following link: [TODO: Assignment url in the learning platform] +# Week 5 Assignment: Containerize and Ship +You have a Python pipeline. Your job is to make it reproducible, containerized, and shippable through CI. -## Implementation Instructions +## Project structure -Provide clear instructions on how trainees should implement the tasks. +```text +week5-container-assignment/ +├── .github/ +│ └── workflows/ +│ └── ci.yml ← Task 5: CI workflow (fill in the TODO steps) +├── src/ +│ └── pipeline.py ← Task 1 & 4: pipeline logic and env-var config +├── tests/ +│ └── test_pipeline.py ← tests that must pass before you touch the Dockerfile +├── Dockerfile ← Task 3: write a cache-friendly Dockerfile +├── requirements.txt ← Task 2: pin all dependencies +└── AI_ASSIST.md ← Task 7: document your LLM usage +``` -### Task 1 -Instructions for Task 1 +## Open in Codespaces -### Task 2 -Instructions for Task 2 +> 💻 [Open in GitHub Codespaces](https://github.com/codespaces/new/HackYourFuture/data-assignment-week-5) -... +Docker and the Azure CLI are pre-installed. Run `az login --use-device-code` before Task 6. +## Tasks at a glance + +| Task | What you do | +|---|---| +| 1: Choose a Pipeline | The starter `src/pipeline.py` has three functions with `raise NotImplementedError`. Implement them so the tests pass. | +| 2: Pin Dependencies | Fill in `requirements.txt` with pinned versions (`package==version`). | +| 3: Dockerfile | Complete the `Dockerfile` following the TODO comments. | +| 4: Configuration | `get_config()` must read `API_KEY` from the environment and raise a clear error if it is missing. | +| 5: CI Workflow | Replace the `echo "TODO"` steps in `ci.yml` with real commands. | +| 6: Push to ACR | Add Azure login + ACR push steps to your workflow; screenshot the result. | +| 7: AI Report | Fill in `AI_ASSIST.md` with your LLM prompt, the suggestion, and what you changed. | + +## How to run locally + +```bash +python -m venv .venv && source .venv/bin/activate +pip install -r requirements.txt +API_KEY=test pytest -q +``` + +## How to run in Docker (after completing Task 3) + +```bash +docker build -t my-pipeline:1.0 . +docker run --rm -e API_KEY=test my-pipeline:1.0 +``` + +## Submitting + +1. Create a branch: `git switch -c week5/your-name` +2. Commit your work. +3. Push and open a Pull Request against `main`. +4. Share the PR URL with your teacher. + +See the [full assignment instructions](https://hackyourfuture.github.io/datatrack/week-5/assignment) for Task 6 (ACR push) and the grading rubric. diff --git a/task-1/task 1 files b/assets/.gitkeep similarity index 100% rename from task-1/task 1 files rename to assets/.gitkeep diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..42299cf --- /dev/null +++ b/requirements.txt @@ -0,0 +1,13 @@ +# Task 2: Pin every dependency your pipeline uses. +# +# Format: package==version +# Example: requests==2.31.0 +# +# Find the current version of any package: +# pip show +# +# Always include pytest and ruff: +# pytest== +# ruff== +# +# Add your pinned dependencies below: diff --git a/task-2/task 2 files b/src/__init__.py similarity index 100% rename from task-2/task 2 files rename to src/__init__.py diff --git a/src/pipeline.py b/src/pipeline.py new file mode 100644 index 0000000..84e8087 --- /dev/null +++ b/src/pipeline.py @@ -0,0 +1,60 @@ +""" +Week 5 assignment: containerised data pipeline. + +Tasks: +- Task 1: confirm this script runs locally before touching the Dockerfile. +- Task 4: read all configuration from environment variables (no hardcoded values). + +Replace every `raise NotImplementedError` below with a real implementation. +""" + +import logging +from pathlib import Path + +logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s") +logger = logging.getLogger(__name__) + + +def get_config() -> dict: + """ + Return configuration read from environment variables. + + Required variable: API_KEY + Optional variable: OUTPUT_DIR (default "output") + + Raise RuntimeError with a clear message if a required variable is missing. + """ + raise NotImplementedError("Task 4: read API_KEY and OUTPUT_DIR from the environment") + + +def fetch_data(api_key: str) -> list[dict]: + """ + Simulate fetching records from an external API. + + Return a list of at least one dict representing a record. + In a real pipeline you would call requests.get(...) here. + """ + raise NotImplementedError("Task 1: return at least one sample record") + + +def save_results(records: list[dict], output_dir: Path) -> None: + """ + Write each record as a line to output_dir/results.txt. + + Create output_dir if it does not exist. + Log the number of records written. + """ + raise NotImplementedError("Task 1: write records to output_dir/results.txt") + + +def run() -> None: + config = get_config() + logger.info("starting pipeline") + records = fetch_data(config["api_key"]) + output_dir = Path(config["output_dir"]) + save_results(records, output_dir) + logger.info("pipeline complete") + + +if __name__ == "__main__": + run() diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py new file mode 100644 index 0000000..85a9ac1 --- /dev/null +++ b/tests/test_pipeline.py @@ -0,0 +1,64 @@ +"""Tests for the Week 5 pipeline.""" + +import os +from pathlib import Path + +import pytest + +from src.pipeline import fetch_data, get_config, save_results + + +class TestGetConfig: + def test_returns_api_key_from_env(self, monkeypatch): + monkeypatch.setenv("API_KEY", "test-key-123") + monkeypatch.delenv("OUTPUT_DIR", raising=False) + config = get_config() + assert config["api_key"] == "test-key-123" + + def test_uses_default_output_dir(self, monkeypatch): + monkeypatch.setenv("API_KEY", "test-key-123") + monkeypatch.delenv("OUTPUT_DIR", raising=False) + config = get_config() + assert config["output_dir"] == "output" + + def test_reads_custom_output_dir(self, monkeypatch): + monkeypatch.setenv("API_KEY", "test-key-123") + monkeypatch.setenv("OUTPUT_DIR", "/tmp/myout") + config = get_config() + assert config["output_dir"] == "/tmp/myout" + + def test_raises_when_api_key_missing(self, monkeypatch): + monkeypatch.delenv("API_KEY", raising=False) + with pytest.raises((RuntimeError, KeyError, SystemExit)): + get_config() + + +class TestFetchData: + def test_returns_list(self): + records = fetch_data("any-key") + assert isinstance(records, list) + + def test_returns_at_least_one_record(self): + records = fetch_data("any-key") + assert len(records) >= 1 + + def test_records_are_dicts(self): + records = fetch_data("any-key") + assert all(isinstance(r, dict) for r in records) + + +class TestSaveResults: + def test_creates_output_dir(self, tmp_path): + output_dir = tmp_path / "new_dir" + save_results([{"id": 1}], output_dir) + assert output_dir.exists() + + def test_writes_results_file(self, tmp_path): + save_results([{"id": 1}, {"id": 2}], tmp_path) + results_file = tmp_path / "results.txt" + assert results_file.exists() + + def test_file_contains_records(self, tmp_path): + save_results([{"id": 1}, {"id": 2}], tmp_path) + content = (tmp_path / "results.txt").read_text() + assert len(content.strip().splitlines()) >= 2 From 3913a72891ec46f59255de7016d0317ee2b689aa Mon Sep 17 00:00:00 2001 From: Lasse Benninga Date: Fri, 22 May 2026 13:13:17 +0200 Subject: [PATCH 2/7] feat(autograder): add .gitignore hygiene check for Python cache files Adds warn() helper to the autograder and a zero-point .gitignore check that flags missing __pycache__/, *.pyc, and .env entries with a clear message explaining why each matters. Also adds Python-specific entries to the scaffold .gitignore (the inherited template was Node.js only). --- .gitignore | 14 ++++++++++++++ .hyf/test.sh | 20 ++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/.gitignore b/.gitignore index 2b76d7c..ec8f344 100644 --- a/.gitignore +++ b/.gitignore @@ -156,3 +156,17 @@ dist vite.config.js.timestamp-* vite.config.ts.timestamp-* + +# Python +__pycache__/ +*.pyc +*.pyo +*.pyd +.Python +.venv/ +venv/ +.env +*.egg-info/ +dist/ +build/ +output/ diff --git a/.hyf/test.sh b/.hyf/test.sh index e470834..89f706f 100755 --- a/.hyf/test.sh +++ b/.hyf/test.sh @@ -9,6 +9,7 @@ details=() pass() { details+=("PASS: $1"); } fail() { details+=("FAIL: $1"); } +warn() { details+=("WARN: $1"); } # ── Level 1 (15 pts): required files exist ────────────────────────────────── l1=0 @@ -34,6 +35,25 @@ fi ((score += l1)) pass "Level 1: required files ($l1/15 pts)" +# ── .gitignore hygiene (0 pts, warnings only) ──────────────────────────────── +gi="$REPO_ROOT/.gitignore" +if [[ ! -f "$gi" ]]; then + warn ".gitignore is missing — add one so __pycache__/ and *.pyc are not committed" +else + if ! grep -q "__pycache__" "$gi"; then + warn ".gitignore is missing __pycache__/ — Python cache dirs should not be committed" + fi + if ! grep -q "\*.pyc" "$gi"; then + warn ".gitignore is missing *.pyc — compiled Python files should not be committed" + fi + if ! grep -q "\.env" "$gi"; then + warn ".gitignore is missing .env — secret files should not be committed" + fi + if grep -qE "^__pycache__/$" "$gi" && grep -qE "^\*\.pyc$" "$gi" && grep -qE "^\.env$" "$gi"; then + pass ".gitignore correctly excludes __pycache__/, *.pyc, and .env" + fi +fi + # ── Level 2 (15 pts): Dockerfile correctness ──────────────────────────────── l2=0 df="$REPO_ROOT/Dockerfile" From 9aecb76fe4a2469ce6046d293d2e3a3233f1754c Mon Sep 17 00:00:00 2001 From: Lasse Benninga Date: Fri, 22 May 2026 13:47:26 +0200 Subject: [PATCH 3/7] refactor(autograder): source grader_lib instead of inlining helpers Replaces the inline pass/fail/warn helpers with a source of grader_lib.sh. Adds check_no_print_statements and check_gitignore_python from the lib. Scoring ladder and output unchanged (27/100 scaffold, 100/100 solution). --- .hyf/grader_lib.sh | 250 +++++++++++++++++++++++++++++++++++++++++++++ .hyf/test.sh | 58 +++-------- 2 files changed, 267 insertions(+), 41 deletions(-) create mode 100644 .hyf/grader_lib.sh diff --git a/.hyf/grader_lib.sh b/.hyf/grader_lib.sh new file mode 100644 index 0000000..3478d4f --- /dev/null +++ b/.hyf/grader_lib.sh @@ -0,0 +1,250 @@ +#!/usr/bin/env bash +# grader_lib.sh — shared helpers for HYF Data Track autograders. +# Source this at the top of test.sh: +# source "$(dirname "$0")/grader_lib.sh" +# +# Provides: pass(), fail(), warn(), print_results(), write_score(), +# and a set of common static-analysis checks derived from recurring +# PR review patterns across cohort c55. + +_grader_details=() + +pass() { _grader_details+=("✓ PASS $1"); } +fail() { _grader_details+=("✗ FAIL $1"); } +warn() { _grader_details+=("⚠ WARN $1"); } + +print_results() { + local header="${1:-Autograder Results}" + echo "" + echo "=== $header ===" + for line in "${_grader_details[@]}"; do echo " $line"; done + echo "" +} + +write_score() { + # write_score [] + local score="$1" + local passing="$2" + local outfile="${3:-$(dirname "${BASH_SOURCE[0]}")/score.json}" + local pass_flag="false" + [[ "$score" -ge "$passing" ]] && pass_flag="true" + cat > "$outfile" << JSON +{ + "score": $score, + "pass": $pass_flag, + "passingScore": $passing +} +JSON + echo "Score: $score / 100 (passing: $passing) pass=$pass_flag" +} + +# ── Common static-analysis checks ──────────────────────────────────────────── +# Each function: returns 0 on pass, 1 on fail/warn (for caller logic). +# All feedback goes through pass()/fail()/warn() so it appears in print_results. + +check_no_print_statements() { + # Usage: check_no_print_statements [label] + # Flags bare print() calls that should be logging calls. + local dir="${1:-.}" + local label="${2:-$dir}" + local found + found=$(grep -rn "^[[:space:]]*print(" "$dir" --include="*.py" 2>/dev/null | grep -v "# noqa" || true) + if [[ -n "$found" ]]; then + local count + count=$(echo "$found" | wc -l | tr -d ' ') + warn "$label: $count print() call(s) found — use logging.info/warning/error instead (see Week 1 Ch1)" + return 1 + fi + return 0 +} + +check_no_notimplemented() { + # Usage: check_no_notimplemented [label] + # Flags NotImplementedError stubs left in after implementation. + local dir="${1:-.}" + local label="${2:-$dir}" + local found + found=$(grep -rn "raise NotImplementedError" "$dir" --include="*.py" 2>/dev/null || true) + if [[ -n "$found" ]]; then + fail "$label: raise NotImplementedError still present — remove stubs before submitting" + return 1 + fi + return 0 +} + +check_no_relative_imports() { + # Usage: check_no_relative_imports [label] + # Flags `from .module import x` in scripts not inside a proper package. + # Relative imports break the grader: python3 src/cleaner.py fails with + # "attempted relative import with no known parent package". + local dir="${1:-.}" + local label="${2:-$dir}" + local found + found=$(grep -rn "^from \." "$dir" --include="*.py" 2>/dev/null || true) + if [[ -n "$found" ]]; then + fail "$label: relative import found (from .module) — use absolute: 'from src.module import x'" + return 1 + fi + return 0 +} + +check_no_logging_in_utils() { + # Usage: check_no_logging_in_utils + # utils.py should be pure helpers; logging config belongs in the entry point. + local file="${1:-task-1/src/utils.py}" + if [[ ! -f "$file" ]]; then return 0; fi + if grep -qE "logging\.basicConfig|logging\.getLogger" "$file"; then + warn "$file: logging.basicConfig/getLogger found — logging setup belongs in cleaner.py or the entry-point, not in utils" + return 1 + fi + return 0 +} + +check_gitignore_python() { + # Usage: check_gitignore_python [] + # Warns when Python cache patterns are absent from .gitignore. + local gi="${1:-.gitignore}" + if [[ ! -f "$gi" ]]; then + warn ".gitignore is missing — add one so __pycache__/ and *.pyc are not committed" + return 1 + fi + local ok=true + if ! grep -q "__pycache__" "$gi"; then + warn ".gitignore missing __pycache__/ — Python bytecode cache dirs should not be committed" + ok=false + fi + if ! grep -qE "^\*\.pyc$|^.*\*\.pyc" "$gi"; then + warn ".gitignore missing *.pyc — compiled Python files should not be committed" + ok=false + fi + if ! grep -qE "^\.env$|^\.env\b" "$gi"; then + warn ".gitignore missing .env — secret files should not be committed" + ok=false + fi + [[ "$ok" = true ]] && pass ".gitignore correctly excludes __pycache__/, *.pyc, and .env" +} + +check_screenshot_is_png() { + # Usage: check_screenshot_is_png [] + # Awards full credit for .png, warns (and still credits) for .jpg/.jpeg, + # zero for missing. Matches the pattern flagged in c55 PR reviews. + local expected_png="$1" + local dir + dir="$(dirname "$expected_png")" + local base + base="$(basename "$expected_png" .png)" + + if [[ -s "$expected_png" ]]; then + pass "screenshot is $expected_png (.png format ✓)" + return 0 + fi + for ext in jpg jpeg; do + if [[ -s "$dir/$base.$ext" ]]; then + warn "screenshot is .$ext but should be .png — rename to $base.png (partial credit still given)" + return 1 + fi + done + fail "screenshot missing: $expected_png not found" + return 2 +} + +check_silent_zero_in_except() { + # Usage: check_silent_zero_in_except + # Detects the pattern: try: x = compute() / except: x = 0 + # which silently corrupts data instead of skipping or raising. + local file="$1" + if [[ ! -f "$file" ]]; then return 0; fi + local found + found=$(python3 - "$file" 2>/dev/null << 'PY' +import ast, sys +try: + tree = ast.parse(open(sys.argv[1]).read()) +except SyntaxError: + sys.exit(0) +for node in ast.walk(tree): + if isinstance(node, ast.ExceptHandler): + for stmt in node.body: + if isinstance(stmt, ast.Assign): + if isinstance(stmt.value, ast.Constant) and stmt.value.value == 0: + print(f"line {stmt.lineno}: '{ast.unparse(stmt)}' — sets field to 0 in except block (silent data corruption)") +PY +) + if [[ -n "$found" ]]; then + warn "$file: silent 0-assignment in except block — skip the row or raise instead of setting to 0:\n $found" + return 1 + fi + return 0 +} + +check_exception_logged() { + # Usage: check_exception_logged + # Warns when except blocks log/print a message but don't include the + # exception variable (e, err, exc), meaning the error type is lost. + local dir="${1:-.}" + local found + found=$(python3 - "$dir" 2>/dev/null << 'PY' +import ast, os, sys +issues = [] +for root, _, files in os.walk(sys.argv[1]): + for fname in files: + if not fname.endswith(".py"): + continue + path = os.path.join(root, fname) + try: + tree = ast.parse(open(path).read()) + except SyntaxError: + continue + for node in ast.walk(tree): + if not isinstance(node, ast.ExceptHandler): + continue + exc_var = node.name # e.g. "e" in `except ValueError as e` + if not exc_var: + continue + for stmt in node.body: + for call in ast.walk(stmt): + if not isinstance(call, ast.Call): + continue + # Is it a logging.* or print call? + func = call.func + is_log = (isinstance(func, ast.Attribute) and + isinstance(func.value, ast.Name) and + func.value.id == "logging") + is_print = isinstance(func, ast.Name) and func.id == "print" + if not (is_log or is_print): + continue + # Does the call reference the exception variable? + src = ast.unparse(call) + if exc_var not in src: + issues.append(f"{path}:{call.lineno}: log message doesn't include exception variable '{exc_var}' — add it for easier debugging") +if issues: + for i in issues[:3]: # cap at 3 to keep output readable + print(i) +PY +) + if [[ -n "$found" ]]; then + warn "exception variable not included in log message (harder to debug):\n $found" + return 1 + fi + return 0 +} + +check_ruff() { + # Usage: check_ruff [