Skip to content

Commit 5c0d593

Browse files
authored
Add claim boundary language checks in assurance.py
1 parent 47f1781 commit 5c0d593

1 file changed

Lines changed: 160 additions & 0 deletions

File tree

ix/assurance.py

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from __future__ import annotations
44

5+
import ast as py_ast
56
from collections.abc import Mapping
67
from dataclasses import dataclass, field
78
from typing import Any, Literal
@@ -489,6 +490,7 @@ def _check_one_cognition_attempt(self, attempt: AttemptBlock) -> list[AssuranceC
489490
)
490491
)
491492

493+
checks.extend(self._check_claim_boundary_language(attempt))
492494
checks.extend(self._check_handoff_contracts(attempt.name, handoffs))
493495

494496
if obligations:
@@ -548,6 +550,164 @@ def _presence_check(
548550
{"attempt": attempt},
549551
)
550552

553+
def _check_claim_boundary_language(self, attempt: AttemptBlock) -> list[AssuranceCheck]:
554+
non_goal_texts = [
555+
self._contract_text(child.text)
556+
for child in attempt.statements
557+
if isinstance(child, NonGoalStatement)
558+
]
559+
claim_boundary_texts = [
560+
self._contract_text(child.text)
561+
for child in attempt.statements
562+
if isinstance(child, ClaimBoundaryStatement)
563+
]
564+
review_texts = [
565+
self._contract_text(child.reason)
566+
for child in attempt.statements
567+
if isinstance(child, RequireApprovalStatement)
568+
]
569+
purpose_texts = [
570+
self._contract_text(child.text)
571+
for child in attempt.statements
572+
if isinstance(child, PurposeStatement)
573+
]
574+
all_boundary_texts = purpose_texts + non_goal_texts + claim_boundary_texts + review_texts
575+
576+
checks: list[AssuranceCheck] = []
577+
578+
if any(self._declares_agi_claim_restriction(text) for text in non_goal_texts):
579+
checks.append(
580+
AssuranceCheck(
581+
"cognition_contract.agi_claim_restriction.present",
582+
"pass",
583+
f"Attempt `{attempt.name}` explicitly blocks AGI self-claiming.",
584+
{"attempt": attempt.name},
585+
)
586+
)
587+
else:
588+
checks.append(
589+
AssuranceCheck(
590+
"cognition_contract.agi_claim_restriction.missing",
591+
"fail",
592+
f"Attempt `{attempt.name}` must include a non_goal blocking AGI claims.",
593+
{"attempt": attempt.name},
594+
)
595+
)
596+
597+
if any(self._declares_research_candidate_boundary(text) for text in claim_boundary_texts):
598+
checks.append(
599+
AssuranceCheck(
600+
"cognition_contract.research_boundary.present",
601+
"pass",
602+
f"Attempt `{attempt.name}` is bounded as a research candidate.",
603+
{"attempt": attempt.name},
604+
)
605+
)
606+
else:
607+
checks.append(
608+
AssuranceCheck(
609+
"cognition_contract.research_boundary.missing",
610+
"fail",
611+
f"Attempt `{attempt.name}` must declare a research-candidate claim boundary.",
612+
{"attempt": attempt.name},
613+
)
614+
)
615+
616+
prohibited = [
617+
text
618+
for text in all_boundary_texts
619+
if self._contains_prohibited_agi_claim(text)
620+
]
621+
if prohibited:
622+
checks.append(
623+
AssuranceCheck(
624+
"cognition_contract.prohibited_claim_language.present",
625+
"fail",
626+
f"Attempt `{attempt.name}` contains prohibited AGI-certification language.",
627+
{
628+
"attempt": attempt.name,
629+
"matched_text": prohibited,
630+
},
631+
)
632+
)
633+
else:
634+
checks.append(
635+
AssuranceCheck(
636+
"cognition_contract.prohibited_claim_language.absent",
637+
"pass",
638+
f"Attempt `{attempt.name}` contains no self-certifying AGI language.",
639+
{"attempt": attempt.name},
640+
)
641+
)
642+
643+
return checks
644+
645+
def _contract_text(self, value: str) -> str:
646+
stripped = value.strip()
647+
try:
648+
parsed = py_ast.literal_eval(stripped)
649+
except (SyntaxError, ValueError):
650+
parsed = stripped
651+
if isinstance(parsed, str):
652+
return " ".join(parsed.lower().split())
653+
return " ".join(stripped.lower().split())
654+
655+
def _declares_agi_claim_restriction(self, text: str) -> bool:
656+
if "agi" not in text:
657+
return False
658+
if not any(term in text for term in ("claim", "certif", "declare", "assert")):
659+
return False
660+
return self._is_denial_text(text)
661+
662+
def _declares_research_candidate_boundary(self, text: str) -> bool:
663+
if "research candidate" in text:
664+
return True
665+
if "research" in text and "candidate" in text:
666+
return True
667+
if "research" in text and "not production" in text:
668+
return True
669+
if "evaluation" in text and "not deployment" in text:
670+
return True
671+
return "candidate only" in text
672+
673+
def _contains_prohibited_agi_claim(self, text: str) -> bool:
674+
if self._is_denial_text(text):
675+
return False
676+
677+
prohibited_patterns = (
678+
"is agi",
679+
"achieved agi",
680+
"achieves agi",
681+
"guarantees agi",
682+
"guaranteed agi",
683+
"certifies agi",
684+
"certify agi",
685+
"births agi",
686+
"creates agi",
687+
"proves agi",
688+
"agi achieved",
689+
"agi certified",
690+
"agi guaranteed",
691+
)
692+
return any(pattern in text for pattern in prohibited_patterns)
693+
694+
def _is_denial_text(self, text: str) -> bool:
695+
denial_terms = (
696+
"do not",
697+
"don't",
698+
"must not",
699+
"cannot",
700+
"can't",
701+
"no ",
702+
"never",
703+
"not ",
704+
"forbid",
705+
"forbidden",
706+
"blocked",
707+
"prohibit",
708+
)
709+
return any(term in text for term in denial_terms)
710+
551711
def _check_handoff_contracts(
552712
self,
553713
attempt_name: str,

0 commit comments

Comments
 (0)