Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions docs/evidence/h2-output-cloud-geometry-20260525.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,13 @@ class-ordered sampling effect。
如果需要推进,最多释放一个有界 order-control / reseeded / interleaved
response-cache scout,用来判断该强信号是否跨 class-order 控制保留。

当前允许的最小脚本改动仅限于生成这个控制 cache:
`scripts/run_h2_response_strength_validation.py --seed-offset-policy shared-position`。
该模式会让 member / nonmember 使用相同 per-position seed offset,并在
`summary.json` 中标记 `order_control_scout = true`。它只用于重新评估
class-ordered sampling effect,不代表 admission,也不得直接生成 Platform /
Runtime row。

## Decision

`candidate complementary signal / order-control required / no admitted row`。
Expand Down
41 changes: 38 additions & 3 deletions scripts/run_h2_response_strength_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,15 @@ def parse_args() -> argparse.Namespace:
parser.add_argument("--bootstrap-iters", type=int, default=200)
parser.add_argument("--frequency-cutoff", type=float, default=0.5)
parser.add_argument("--primary-scorer", choices=PRIMARY_SCORERS, default="raw_h2_logistic")
parser.add_argument(
"--seed-offset-policy",
choices=("class-ordered", "shared-position"),
default="class-ordered",
help=(
"class-ordered preserves the original member/nonmember contiguous seed offsets; "
"shared-position uses the same per-position seed sequence for both classes as an order-control scout."
),
)
parser.add_argument("--no-save-responses", action="store_true")
return parser.parse_args()

Expand All @@ -81,6 +90,16 @@ def _validate_args(args: argparse.Namespace) -> list[int]:
return timesteps


def seed_offsets_for_policy(policy: str, *, member_count: int) -> tuple[int, int]:
"""Return member and nonmember seed offsets for the requested control policy."""

if policy == "class-ordered":
return 0, int(member_count)
if policy == "shared-position":
return 0, 0
raise ValueError(f"Unknown seed offset policy: {policy}")


def main() -> int:
args = parse_args()
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
Expand Down Expand Up @@ -127,6 +146,10 @@ def main() -> int:
packet_indices=nonmember_indices,
batch_size=args.batch_size,
)
member_seed_offset, nonmember_seed_offset = seed_offsets_for_policy(
args.seed_offset_policy,
member_count=len(member_indices),
)

member_inputs, member_responses, member_distances = collect_strength_responses(
model,
Expand All @@ -137,7 +160,7 @@ def main() -> int:
repeats=args.repeats,
denoise_stride=args.denoise_stride,
seed=args.seed,
sample_offset=0,
sample_offset=member_seed_offset,
)
nonmember_inputs, nonmember_responses, nonmember_distances = collect_strength_responses(
model,
Expand All @@ -148,7 +171,7 @@ def main() -> int:
repeats=args.repeats,
denoise_stride=args.denoise_stride,
seed=args.seed,
sample_offset=len(member_indices),
sample_offset=nonmember_seed_offset,
)

labels = np.concatenate(
Expand Down Expand Up @@ -216,6 +239,7 @@ def main() -> int:
raw_best_simple_auc=raw_best_simple_auc,
)
validation_passed = bool(validation_gate["validation_passed"])
is_order_control_scout = args.seed_offset_policy != "class-ordered"

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The logic for determining if a run is an order_control_scout is slightly brittle. Currently, it assumes any policy other than the default class-ordered is a scout. If additional policies are added in the future that are intended for admission (not just scouting), this logic will incorrectly mark them as scouts. It would be safer to explicitly check for the known scout policies.

Suggested change
is_order_control_scout = args.seed_offset_policy != "class-ordered"
is_order_control_scout = args.seed_offset_policy == "shared-position"

summary = {
"status": "ready",
"task": "H2 response-strength validation",
Expand All @@ -235,6 +259,9 @@ def main() -> int:
"repeats": int(args.repeats),
"denoise_stride": int(args.denoise_stride),
"seed": int(args.seed),
"seed_offset_policy": args.seed_offset_policy,
"member_seed_offset": int(member_seed_offset),
"nonmember_seed_offset": int(nonmember_seed_offset),
"frequency_cutoff": float(args.frequency_cutoff),
"primary_scorer": args.primary_scorer,
},
Expand All @@ -257,11 +284,19 @@ def main() -> int:
"validation_gate": validation_gate,
"validation_passed": bool(validation_passed),
"promotion_allowed": False,
"order_control_scout": bool(is_order_control_scout),
},
"verdict": "positive but bounded validation" if validation_passed else "negative but useful",
"verdict": (
"order-control scout generated"
if is_order_control_scout
else "positive but bounded validation"
if validation_passed
else "negative but useful"
),
Comment on lines +289 to +295

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The verdict string currently prioritizes the scout status over the validation result. While scouts are indeed not for admission, it might be useful for debugging and clarity to include whether the scout run actually passed the validation gate in the verdict string itself, rather than just in the validation_passed boolean field.

Suggested change
"verdict": (
"order-control scout generated"
if is_order_control_scout
else "positive but bounded validation"
if validation_passed
else "negative but useful"
),
"verdict": (
f"order-control scout generated ({'passed' if validation_passed else 'failed'})"
if is_order_control_scout
else "positive but bounded validation"
if validation_passed
else "negative but useful"
),

"notes": [
"This runner is a stable successor to archived H2 X-run scripts.",
"A positive result remains candidate evidence until cross-asset or stronger black-box comparator review.",
"Non-default seed offset policies are control-cache scouts only and do not admit a Runtime job.",
],
}
except Exception as exc:
Expand Down
26 changes: 26 additions & 0 deletions tests/test_run_h2_response_strength_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import unittest
from unittest.mock import patch

from scripts.run_h2_response_strength_validation import parse_args, seed_offsets_for_policy


class RunH2ResponseStrengthValidationTests(unittest.TestCase):
def test_default_seed_offset_policy_preserves_existing_cache_shape(self) -> None:
with patch("sys.argv", ["run_h2_response_strength_validation.py"]):
args = parse_args()

self.assertEqual(args.seed_offset_policy, "class-ordered")

def test_class_ordered_policy_preserves_original_offsets(self) -> None:
self.assertEqual(seed_offsets_for_policy("class-ordered", member_count=512), (0, 512))

def test_shared_position_policy_uses_same_offsets_for_both_classes(self) -> None:
self.assertEqual(seed_offsets_for_policy("shared-position", member_count=512), (0, 0))

def test_unknown_policy_is_rejected(self) -> None:
with self.assertRaisesRegex(ValueError, "Unknown seed offset policy"):
seed_offsets_for_policy("bad-policy", member_count=512)


if __name__ == "__main__":
unittest.main()
Loading