baskduf
diff --git a/‎ROADMAP.md‎
Lines changed: 3 additions & 3 deletions b/‎ROADMAP.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/decisions/0004-link-failure-memory-to-regression-checks.md‎
Lines changed: 10 additions & 6 deletions b/‎docs/decisions/0004-link-failure-memory-to-regression-checks.md‎
Lines changed: 10 additions & 6 deletions
diff --git a/‎docs/examples/task-outcomes/005-make-just-command-validation.yaml‎
Lines changed: 78 additions & 0 deletions b/‎docs/examples/task-outcomes/005-make-just-command-validation.yaml‎
Lines changed: 78 additions & 0 deletions
diff --git a/‎scripts/check_effectiveness_plan.py‎
Lines changed: 153 additions & 3 deletions b/‎scripts/check_effectiveness_plan.py‎
Lines changed: 153 additions & 3 deletions
@@ -106,9 +106,9 @@ guidance, Next.js App Router notes, failure-memory verification, decision-memory
 warnings, deterministic behavior gate placement, and trigger-based task outcome
 evidence. Useful next additions include:
 
-- command existence validation for `make`, `just`, Maven, Gradle, Go, and other
-  profile-relevant task runners referenced by failure-memory or effectiveness
-  records
+- command existence validation beyond package scripts, root `make` targets, and
+  root `just` recipes for Maven, Gradle, Go, and other profile-relevant task
+  runners referenced by failure-memory or effectiveness records
 - more fixture-backed examples for provider-specific request shape, response
   envelopes, redaction, zero-result behavior, and provider errors
 - clearer ADR and failure-record boundary examples for small changes so
 
@@ -78,17 +78,21 @@ post-push, or unknown.
   JavaScript package scripts and avoids passing a root command only because a
   nested workspace package has the same script, but it still does not prove that
   the script asserts the specific failure axis.
+- Root `make target` and `just recipe` checks are verified against checked-in
+  root Makefile variants and justfile variants. This closes the same
+  fake-command gap for common task-runner commands, but it still does not parse
+  included makefiles, option-heavy invocations such as `make -C app check`, or
+  prove that the target asserts the specific failure axis.
 - Other command-shaped checks are still recognized mostly by shape. The checker
-  does not yet verify that `make`, `just`, Python module commands, Gradle,
-  Maven, Go, Rust, .NET, or other task-runner commands exist in the target
-  configuration.
+  does not yet verify that Python module commands, Gradle, Maven, Go, Rust,
+  .NET, or other task-runner commands exist in the target configuration.
 - Monorepo and workspace-specific commands need explicit target adaptation when
   the intended command is not runnable from the repository root.
 - Detection-link validation is regex-based. It blocks known non-committal
   phrases, but future wording may require additional test cases.
-- Generic command coverage is still biased toward common JavaScript and Python
-  commands. Add explicit coverage before relying on this gate for Go, Rust,
-  Java, .NET, or Gradle-heavy targets.
+- Generic command coverage is still biased toward common JavaScript, Make,
+  Just, and Python-shaped commands. Add explicit coverage before relying on
+  this gate for Go, Rust, Java, .NET, or Gradle-heavy targets.
 - Target repositories with pre-existing non-kit `docs/failures/*.md` schemas
   may need adoption-specific adaptation instead of blindly applying the generic
   checker.
 
@@ -0,0 +1,78 @@
+schema_version: 1
+
+target:
+  repository: baskduf/harness-starter-kit
+  repository_ref: codex/make-just-command-validation working tree based on e98b6f40f134bcacbab0bb9cb12178f1e071bb63
+  stack_or_framework: Python and Markdown harness kit
+  date: 2026-06-06
+  agent_or_model: Codex
+  reviewer: Codex primary agent plus read-only subagent reviewer
+
+task:
+  id: make-just-command-validation
+  run_id: harness-starter-kit-005
+  prompt_summary: Extend command existence validation for root make targets and just recipes.
+  prompt_ref: current Codex thread request to proceed with option 1 and review findings until resolved
+  prompt_hash: not recorded
+  comparable_task_group: harness-maintenance
+  condition: harnessed-only
+  expected_boundary:
+    - ROADMAP.md
+    - docs/decisions/0004-link-failure-memory-to-regression-checks.md
+    - docs/examples/task-outcomes/**
+    - scripts/check_effectiveness_plan.py
+    - scripts/check_failure_memory.py
+    - templates/generic/scripts/check_effectiveness_plan.py
+    - templates/generic/scripts/check_failure_memory.py
+    - tests/test_check_effectiveness_plan.py
+    - tests/test_check_failure_memory.py
+    - tests/test_repository_hygiene.py
+  known_failure_mode: Failure memory or task outcome evidence can cite fake make or just commands that look concrete but are not declared in the target repository.
+
+harness_context:
+  harness_doctor_score: previously 98/100, not treated as effectiveness proof
+  harness_source:
+    kit_url: https://github.com/baskduf/harness-starter-kit
+    kit_commit: e98b6f40f134bcacbab0bb9cb12178f1e071bb63
+    source_tracking_ref: none; this repository is the kit source
+  relevant_instructions:
+    - AGENTS.md
+    - docs/decisions/0004-link-failure-memory-to-regression-checks.md
+    - ROADMAP.md
+  relevant_constraints:
+    - python3 -m unittest tests.test_check_failure_memory tests.test_check_effectiveness_plan tests.test_repository_hygiene
+    - python3 -m py_compile scripts/check_failure_memory.py scripts/check_effectiveness_plan.py templates/generic/scripts/check_failure_memory.py templates/generic/scripts/check_effectiveness_plan.py
+    - python3 scripts/check_effectiveness_plan.py
+    - python3 scripts/check_failure_memory.py
+  relevant_memory_records:
+    - docs/decisions/0004-link-failure-memory-to-regression-checks.md
+    - docs/decisions/0006-trigger-task-outcome-evidence-for-substantial-harness-work.md
+    - docs/failures/0005-failure-memory-was-not-linked-to-regression-checks.md
+    - docs/failures/0007-dogfood-first-pass-failures-lacked-memory-decision.md
+
+outcome:
+  files_changed:
+    - ROADMAP.md
+    - docs/decisions/0004-link-failure-memory-to-regression-checks.md
+    - docs/examples/task-outcomes/005-make-just-command-validation.yaml
+    - scripts/check_effectiveness_plan.py
+    - scripts/check_failure_memory.py
+    - templates/generic/scripts/check_effectiveness_plan.py
+    - templates/generic/scripts/check_failure_memory.py
+    - tests/test_check_effectiveness_plan.py
+    - tests/test_check_failure_memory.py
+  wrong_file_edits: 0
+  repeated_known_mistake: false
+  verification_command: python3 -m unittest tests.test_check_failure_memory tests.test_check_effectiveness_plan tests.test_repository_hygiene && python3 -m py_compile scripts/check_failure_memory.py scripts/check_effectiveness_plan.py templates/generic/scripts/check_failure_memory.py templates/generic/scripts/check_effectiveness_plan.py
+  first_pass_verification:
+    result: failed_then_passed
+  drift_violations_detected: []
+  human_rework_minutes: 0
+  reverted_files: []
+  notes: The validation is intentionally scoped to root Makefile and justfile variants; option-heavy or included-file task runners remain follow-up scope. Primary-agent review fixed a test assertion indentation issue; subagent review found and the final loop fixed task-outcome false-inclusion validation, GNU makefile precedence, make variable assignments including dotted values, just default-parameter parsing, and an inline-code backtick regex miss before final validation.
+
+follow_up:
+  harness_change_needed: false
+  decision_or_failure_record: docs/decisions/0004-link-failure-memory-to-regression-checks.md; no failure record because this closed a known validation gap rather than fixing a recurring failed check from this run.
+  include_in_effectiveness_report: false
+  include_in_comparable_product_task_count: false
@@ -97,7 +97,11 @@
     re.compile(r"\b(?:tests?|specs?|fixtures?|scripts?)/[^\s,.;)]+"),
     re.compile(r"`?\.github/workflows/[^\s,.;)`]+`?"),
     re.compile(r"\b(?:npm|pnpm|yarn|bun)\s+run\s+[\w:./-]+"),
-    re.compile(r"\b(?:make|just)\s+[\w:./-]+"),
+    re.compile(
+        r"\bmake(?:\s+[\w.-]+=[^\s,;)`\]}]+)*\s+(?!-)[\w:./-]+"
+        r"(?=$|[\s,.;)`\]}])"
+    ),
+    re.compile(r"\bjust\s+(?!-)[\w:./-]+"),
     re.compile(r"\bpython3?\s+(?:-m\s+[\w.:-]+|scripts?/[^\s,.;)]+)"),
     re.compile(r"\bpytest\s+(?:-[\w-]+|tests?/[^\s,.;)]+|[\w/.-]+)"),
     re.compile(r"\b(?:vitest|jest|ruff|mypy|eslint)\s+[\w/.:@-]+"),
@@ -116,6 +120,13 @@
 PACKAGE_SCRIPT_COMMAND_RE = re.compile(
     r"\b(?P<manager>npm|pnpm|yarn|bun)\s+run\s+(?P<script>[\w:./-]+)"
 )
+MAKE_COMMAND_RE = re.compile(
+    r"\bmake(?:\s+[\w.-]+=[^\s,;)`\]}]+)*\s+(?!-)(?P<target>[\w:./-]+)"
+    r"(?=$|[\s,.;)`\]}])"
+)
+JUST_COMMAND_RE = re.compile(r"\bjust\s+(?!-)(?P<recipe>[\w:./-]+)")
+MAKEFILE_NAMES = ("GNUmakefile", "makefile", "Makefile")
+JUSTFILE_NAMES = ("justfile", "Justfile", ".justfile")
 
 FAILURE_RECORD_RE = re.compile(
     r"`?(docs/failures/[^\s,;)`]+)`?",
@@ -429,6 +440,10 @@ def normalize_package_script(value: str) -> str:
     return value.rstrip(".,;)]}")
 
 
+def normalize_command_target(value: str) -> str:
+    return value.rstrip(".,;)]}")
+
+
 def root_package_scripts(root: Path) -> set[str]:
     package_json = root / "package.json"
     if not package_json.exists():
@@ -443,6 +458,71 @@ def root_package_scripts(root: Path) -> set[str]:
     return {str(name) for name in package_scripts}
 
 
+def root_make_targets(root: Path) -> set[str]:
+    targets: set[str] = set()
+    path = next(
+        (root / name for name in MAKEFILE_NAMES if (root / name).exists()),
+        None,
+    )
+    if path is None:
+        return targets
+    try:
+        lines = path.read_text(encoding="utf-8").splitlines()
+    except (OSError, UnicodeDecodeError):
+        return targets
+    for raw_line in lines:
+        if not raw_line or raw_line[:1].isspace():
+            continue
+        line = raw_line.split("#", 1)[0].rstrip()
+        if ":" not in line:
+            continue
+        target_part, rule_part = line.split(":", 1)
+        if not target_part.strip() or "=" in target_part:
+            continue
+        if rule_part.lstrip().startswith("="):
+            continue
+        for target in target_part.split():
+            if target and "%" not in target and not target.startswith("."):
+                targets.add(target)
+    return targets
+
+
+def root_just_recipes(root: Path) -> set[str]:
+    recipes: set[str] = set()
+    for name in JUSTFILE_NAMES:
+        path = root / name
+        if not path.exists():
+            continue
+        try:
+            lines = path.read_text(encoding="utf-8").splitlines()
+        except (OSError, UnicodeDecodeError):
+            continue
+        for raw_line in lines:
+            if not raw_line or raw_line[:1].isspace():
+                continue
+            line = raw_line.split("#", 1)[0].rstrip()
+            alias_match = re.match(r"alias\s+(?P<name>[\w.-]+)\s*:=", line)
+            if alias_match is not None:
+                recipes.add(alias_match.group("name"))
+                continue
+            if ":" not in line:
+                continue
+            recipe_part, rule_part = line.split(":", 1)
+            if not recipe_part.strip():
+                continue
+            if rule_part.lstrip().startswith("="):
+                continue
+            recipe_part = recipe_part.strip()
+            while recipe_part.startswith("[") and "]" in recipe_part:
+                recipe_part = recipe_part.split("]", 1)[1].strip()
+            if not recipe_part:
+                continue
+            recipe = recipe_part.split()[0].lstrip("@")
+            if recipe and not recipe.startswith("["):
+                recipes.add(recipe)
+    return recipes
+
+
 def missing_package_script_commands(root: Path, value: str | None) -> list[str]:
     if value is None:
         return []
@@ -463,6 +543,38 @@ def missing_package_script_commands(root: Path, value: str | None) -> list[str]:
     ]
 
 
+def missing_make_commands(root: Path, value: str | None) -> list[str]:
+    if value is None:
+        return []
+    commands = sorted(
+        {
+            normalize_command_target(match.group("target"))
+            for match in MAKE_COMMAND_RE.finditer(value)
+        }
+    )
+    if not commands:
+        return []
+
+    targets = root_make_targets(root)
+    return [f"make {target}" for target in commands if target not in targets]
+
+
+def missing_just_commands(root: Path, value: str | None) -> list[str]:
+    if value is None:
+        return []
+    commands = sorted(
+        {
+            normalize_command_target(match.group("recipe"))
+            for match in JUST_COMMAND_RE.finditer(value)
+        }
+    )
+    if not commands:
+        return []
+
+    recipes = root_just_recipes(root)
+    return [f"just {recipe}" for recipe in commands if recipe not in recipes]
+
+
 def says_no_failure_record(value: str | None) -> bool:
     if value is None:
         return False
@@ -626,6 +738,26 @@ def validate_adoption_report(root: Path, path: Path, text: str) -> list[Finding]
                         ),
                     )
                 )
+            for command in missing_make_commands(root, detection_value):
+                findings.append(
+                    Finding(
+                        path,
+                        (
+                            "failure-memory detection references missing "
+                            f"Makefile target: {command}"
+                        ),
+                    )
+                )
+            for command in missing_just_commands(root, detection_value):
+                findings.append(
+                    Finding(
+                        path,
+                        (
+                            "failure-memory detection references missing "
+                            f"justfile recipe: {command}"
+                        ),
+                    )
+                )
 
     return findings
 
@@ -646,7 +778,7 @@ def validate_effectiveness_report(path: Path, text: str) -> list[Finding]:
     return findings
 
 
-def validate_task_outcome(path: Path, text: str) -> list[Finding]:
+def validate_task_outcome(root: Path, path: Path, text: str) -> list[Finding]:
     report_include_value = yaml_field_value(text, "include_in_effectiveness_report")
     comparable_count_value = yaml_field_value(
         text, "include_in_comparable_product_task_count"
@@ -679,6 +811,22 @@ def validate_task_outcome(path: Path, text: str) -> list[Finding]:
             )
         )
 
+    verification_command = yaml_field_value(text, "verification_command")
+    for command in missing_make_commands(root, verification_command):
+        findings.append(
+            Finding(
+                path,
+                f"task outcome verification references missing Makefile target: {command}",
+            )
+        )
+    for command in missing_just_commands(root, verification_command):
+        findings.append(
+            Finding(
+                path,
+                f"task outcome verification references missing justfile recipe: {command}",
+            )
+        )
+
     truthy_include_fields = [
         field
         for field in TASK_OUTCOME_INCLUDE_FIELDS
@@ -790,7 +938,9 @@ def check_effectiveness_plan(root: Path, require_report: bool) -> int:
             findings.extend(validate_effectiveness_report(path, text))
 
     for path in iter_task_outcomes(root):
-        findings.extend(validate_task_outcome(path, path.read_text(encoding="utf-8")))
+        findings.extend(
+            validate_task_outcome(root, path, path.read_text(encoding="utf-8"))
+        )
 
     for finding in findings:
         print(f"{finding.path.relative_to(root)}: {finding.message}")