|
2 | 2 |
|
3 | 3 | from __future__ import annotations |
4 | 4 |
|
| 5 | +import ast as py_ast |
5 | 6 | from collections.abc import Mapping |
6 | 7 | from dataclasses import dataclass, field |
7 | 8 | from typing import Any, Literal |
@@ -489,6 +490,7 @@ def _check_one_cognition_attempt(self, attempt: AttemptBlock) -> list[AssuranceC |
489 | 490 | ) |
490 | 491 | ) |
491 | 492 |
|
| 493 | + checks.extend(self._check_claim_boundary_language(attempt)) |
492 | 494 | checks.extend(self._check_handoff_contracts(attempt.name, handoffs)) |
493 | 495 |
|
494 | 496 | if obligations: |
@@ -548,6 +550,164 @@ def _presence_check( |
548 | 550 | {"attempt": attempt}, |
549 | 551 | ) |
550 | 552 |
|
| 553 | + def _check_claim_boundary_language(self, attempt: AttemptBlock) -> list[AssuranceCheck]: |
| 554 | + non_goal_texts = [ |
| 555 | + self._contract_text(child.text) |
| 556 | + for child in attempt.statements |
| 557 | + if isinstance(child, NonGoalStatement) |
| 558 | + ] |
| 559 | + claim_boundary_texts = [ |
| 560 | + self._contract_text(child.text) |
| 561 | + for child in attempt.statements |
| 562 | + if isinstance(child, ClaimBoundaryStatement) |
| 563 | + ] |
| 564 | + review_texts = [ |
| 565 | + self._contract_text(child.reason) |
| 566 | + for child in attempt.statements |
| 567 | + if isinstance(child, RequireApprovalStatement) |
| 568 | + ] |
| 569 | + purpose_texts = [ |
| 570 | + self._contract_text(child.text) |
| 571 | + for child in attempt.statements |
| 572 | + if isinstance(child, PurposeStatement) |
| 573 | + ] |
| 574 | + all_boundary_texts = purpose_texts + non_goal_texts + claim_boundary_texts + review_texts |
| 575 | + |
| 576 | + checks: list[AssuranceCheck] = [] |
| 577 | + |
| 578 | + if any(self._declares_agi_claim_restriction(text) for text in non_goal_texts): |
| 579 | + checks.append( |
| 580 | + AssuranceCheck( |
| 581 | + "cognition_contract.agi_claim_restriction.present", |
| 582 | + "pass", |
| 583 | + f"Attempt `{attempt.name}` explicitly blocks AGI self-claiming.", |
| 584 | + {"attempt": attempt.name}, |
| 585 | + ) |
| 586 | + ) |
| 587 | + else: |
| 588 | + checks.append( |
| 589 | + AssuranceCheck( |
| 590 | + "cognition_contract.agi_claim_restriction.missing", |
| 591 | + "fail", |
| 592 | + f"Attempt `{attempt.name}` must include a non_goal blocking AGI claims.", |
| 593 | + {"attempt": attempt.name}, |
| 594 | + ) |
| 595 | + ) |
| 596 | + |
| 597 | + if any(self._declares_research_candidate_boundary(text) for text in claim_boundary_texts): |
| 598 | + checks.append( |
| 599 | + AssuranceCheck( |
| 600 | + "cognition_contract.research_boundary.present", |
| 601 | + "pass", |
| 602 | + f"Attempt `{attempt.name}` is bounded as a research candidate.", |
| 603 | + {"attempt": attempt.name}, |
| 604 | + ) |
| 605 | + ) |
| 606 | + else: |
| 607 | + checks.append( |
| 608 | + AssuranceCheck( |
| 609 | + "cognition_contract.research_boundary.missing", |
| 610 | + "fail", |
| 611 | + f"Attempt `{attempt.name}` must declare a research-candidate claim boundary.", |
| 612 | + {"attempt": attempt.name}, |
| 613 | + ) |
| 614 | + ) |
| 615 | + |
| 616 | + prohibited = [ |
| 617 | + text |
| 618 | + for text in all_boundary_texts |
| 619 | + if self._contains_prohibited_agi_claim(text) |
| 620 | + ] |
| 621 | + if prohibited: |
| 622 | + checks.append( |
| 623 | + AssuranceCheck( |
| 624 | + "cognition_contract.prohibited_claim_language.present", |
| 625 | + "fail", |
| 626 | + f"Attempt `{attempt.name}` contains prohibited AGI-certification language.", |
| 627 | + { |
| 628 | + "attempt": attempt.name, |
| 629 | + "matched_text": prohibited, |
| 630 | + }, |
| 631 | + ) |
| 632 | + ) |
| 633 | + else: |
| 634 | + checks.append( |
| 635 | + AssuranceCheck( |
| 636 | + "cognition_contract.prohibited_claim_language.absent", |
| 637 | + "pass", |
| 638 | + f"Attempt `{attempt.name}` contains no self-certifying AGI language.", |
| 639 | + {"attempt": attempt.name}, |
| 640 | + ) |
| 641 | + ) |
| 642 | + |
| 643 | + return checks |
| 644 | + |
| 645 | + def _contract_text(self, value: str) -> str: |
| 646 | + stripped = value.strip() |
| 647 | + try: |
| 648 | + parsed = py_ast.literal_eval(stripped) |
| 649 | + except (SyntaxError, ValueError): |
| 650 | + parsed = stripped |
| 651 | + if isinstance(parsed, str): |
| 652 | + return " ".join(parsed.lower().split()) |
| 653 | + return " ".join(stripped.lower().split()) |
| 654 | + |
| 655 | + def _declares_agi_claim_restriction(self, text: str) -> bool: |
| 656 | + if "agi" not in text: |
| 657 | + return False |
| 658 | + if not any(term in text for term in ("claim", "certif", "declare", "assert")): |
| 659 | + return False |
| 660 | + return self._is_denial_text(text) |
| 661 | + |
| 662 | + def _declares_research_candidate_boundary(self, text: str) -> bool: |
| 663 | + if "research candidate" in text: |
| 664 | + return True |
| 665 | + if "research" in text and "candidate" in text: |
| 666 | + return True |
| 667 | + if "research" in text and "not production" in text: |
| 668 | + return True |
| 669 | + if "evaluation" in text and "not deployment" in text: |
| 670 | + return True |
| 671 | + return "candidate only" in text |
| 672 | + |
| 673 | + def _contains_prohibited_agi_claim(self, text: str) -> bool: |
| 674 | + if self._is_denial_text(text): |
| 675 | + return False |
| 676 | + |
| 677 | + prohibited_patterns = ( |
| 678 | + "is agi", |
| 679 | + "achieved agi", |
| 680 | + "achieves agi", |
| 681 | + "guarantees agi", |
| 682 | + "guaranteed agi", |
| 683 | + "certifies agi", |
| 684 | + "certify agi", |
| 685 | + "births agi", |
| 686 | + "creates agi", |
| 687 | + "proves agi", |
| 688 | + "agi achieved", |
| 689 | + "agi certified", |
| 690 | + "agi guaranteed", |
| 691 | + ) |
| 692 | + return any(pattern in text for pattern in prohibited_patterns) |
| 693 | + |
| 694 | + def _is_denial_text(self, text: str) -> bool: |
| 695 | + denial_terms = ( |
| 696 | + "do not", |
| 697 | + "don't", |
| 698 | + "must not", |
| 699 | + "cannot", |
| 700 | + "can't", |
| 701 | + "no ", |
| 702 | + "never", |
| 703 | + "not ", |
| 704 | + "forbid", |
| 705 | + "forbidden", |
| 706 | + "blocked", |
| 707 | + "prohibit", |
| 708 | + ) |
| 709 | + return any(term in text for term in denial_terms) |
| 710 | + |
551 | 711 | def _check_handoff_contracts( |
552 | 712 | self, |
553 | 713 | attempt_name: str, |
|
0 commit comments