Added verification

robmoffat · robmoffat · commit e4047c456785 · 2026-02-15T11:46:51.000Z
diff --git a/.github/workflows/validate-gemara.yml b/.github/workflows/validate-gemara.yml
@@ -2,19 +2,7 @@ name: Validate Gemara Front Matter
 
 on:
   push:
-    branches: [main]
-    paths:
-      - 'risks/**/*.md'
-      - 'practices/**/*.md'
-      - 'capabilities/**/*.md'
-      - 'cue/**/*.cue'
   pull_request:
-    branches: [main]
-    paths:
-      - 'risks/**/*.md'
-      - 'practices/**/*.md'
-      - 'capabilities/**/*.md'
-      - 'cue/**/*.cue'
 
 jobs:
   validate:
@@ -33,57 +21,5 @@ jobs:
           sudo wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64
           sudo chmod +x /usr/local/bin/yq
 
-      - name: Validate risk files
-        run: |
-          echo "Validating risk files against #Threat schema..."
-          for file in risks/*.md; do
-            if [ -f "$file" ]; then
-              echo "Checking $file"
-              # Extract gemara YAML from front matter
-              gemara_yaml=$(sed -n '/^---$/,/^---$/p' "$file" | yq '.gemara' -o json 2>/dev/null)
-              if [ "$gemara_yaml" != "null" ] && [ -n "$gemara_yaml" ]; then
-                echo "$gemara_yaml" > /tmp/threat.json
-                cue vet /tmp/threat.json cue/gemara/layer-2.cue -d '#Threat'
-                echo "✓ $file validated"
-              else
-                echo "⚠ $file has no gemara front matter, skipping"
-              fi
-            fi
-          done
-
-      - name: Validate practice files
-        run: |
-          echo "Validating practice files against #Control schema..."
-          for file in practices/*.md; do
-            if [ -f "$file" ]; then
-              echo "Checking $file"
-              gemara_yaml=$(sed -n '/^---$/,/^---$/p' "$file" | yq '.gemara' -o json 2>/dev/null)
-              if [ "$gemara_yaml" != "null" ] && [ -n "$gemara_yaml" ]; then
-                echo "$gemara_yaml" > /tmp/control.json
-                cue vet /tmp/control.json cue/gemara/layer-2.cue -d '#Control'
-                echo "✓ $file validated"
-              else
-                echo "⚠ $file has no gemara front matter, skipping"
-              fi
-            fi
-          done
-
-      - name: Validate capability files
-        run: |
-          echo "Validating capability files against #Capability schema..."
-          for file in capabilities/*.md; do
-            if [ -f "$file" ]; then
-              echo "Checking $file"
-              gemara_yaml=$(sed -n '/^---$/,/^---$/p' "$file" | yq '.gemara' -o json 2>/dev/null)
-              if [ "$gemara_yaml" != "null" ] && [ -n "$gemara_yaml" ]; then
-                echo "$gemara_yaml" > /tmp/capability.json
-                cue vet /tmp/capability.json cue/gemara/layer-2.cue -d '#Capability'
-                echo "✓ $file validated"
-              else
-                echo "⚠ $file has no gemara front matter, skipping"
-              fi
-            fi
-          done
-
-      - name: Validation complete
-        run: echo "All Gemara front matter validated successfully!"
+      - name: Validate Gemara front matter
+        run: ./scripts/validate.sh
diff --git a/cue/gemara/base.cue b/cue/gemara/base.cue
@@ -0,0 +1,82 @@
+// Schema lifecycle: experimental | stable | deprecated
+@status("stable")
+package gemara
+
+import "time"
+
+@go(gemara)
+
+// Contact is the contact information for a person or group
+#Contact: {
+	// name is the preferred descriptor for the contact entity
+	name: string
+
+	// affiliation is the organization with which the contact entity is associated, such as a team, school, or employer
+	affiliation?: string @go(Affiliation,type=*string)
+
+	// email is the preferred email address to reach the contact
+	email?: #Email @go(Email,type=*Email)
+
+	// social is a social media handle or other profile for the contact, such as GitHub
+	social?: string @go(Social,type=*string)
+}
+
+// Actor represents an entity (human or tool) that can perform actions in evaluations
+#Actor: {
+	// id uniquely identifies the actor and allows this entry to be referenced by other elements
+	id: string
+
+	// name is the name of the actor
+	name: string
+
+	// type specifies the type of entity interacting in the workflow
+	type: #ActorType @go(Type)
+
+	// version is the version of the actor (for tools; if applicable)
+	version?: string
+
+	// description provides additional context about the actor
+	description?: string
+
+	// uri is a general URI for the actor information
+	uri?: =~"^https?://[^\\s]+$"
+
+	// contact is contact information for the actor
+	contact?: #Contact @go(Contact)
+}
+
+// ActorType specifies what entity is interacting in the workflow
+#ActorType: "Human" | "Software" | "Software-Assisted" @go(-)
+
+// Email represents a validated email address pattern
+#Email: =~"^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}$"
+
+// Datetime represents an ISO 8601 formatted datetime string
+#Datetime: time.Format("2006-01-02T15:04:05Z07:00") @go(Datetime,format="date-time")
+
+// Date represents a date string (ISO 8601 date format)
+#Date: time.Format("2006-01-02") @go(Date,format="date")
+
+// Category represents a category used for applicability or classification
+#Category: {
+	// id allows this entry to be referenced by other elements
+	id: string
+
+	// title describes the purpose of this category at a glance
+	title: string
+
+	// description explains the significance and traits of entries to this category
+	description: string
+}
+
+// Family represents a logical grouping of guidelines or controls which share a common purpose or function
+#Family: {
+	// id allows this entry to be referenced by other elements
+	id: string
+
+	// title describes the purpose of this family at a glance
+	title: string
+
+	// description explains the significance and traits of entries to this entity family
+	description: string
+}
diff --git a/cue/gemara/mapping.cue b/cue/gemara/mapping.cue
@@ -0,0 +1,71 @@
+// Schema lifecycle: experimental | stable | deprecated
+@status("stable")
+
+package gemara
+
+// MappingReference represents a reference to an external document with full metadata.
+#MappingReference: {
+	// id allows this entry to be referenced by other elements
+	id: string
+
+	// title describes the purpose of this mapping reference at a glance
+	title: string
+
+	// version is the version identifier of the artifact being mapped to
+	version: string
+
+	// description is prose regarding the artifact's purpose or content
+	description?: string
+
+	// url is the path where the artifact may be retrieved; preferrably responds with Gemara-compatible YAML/JSON
+	url?: =~"^(https?|file)://[^\\s]+$"
+}
+
+#ArtifactMapping: {
+	// ReferenceId should reference the corresponding MappingReference id from metadata
+	"reference-id": string @go(ReferenceId)
+
+	// remarks is prose regarding the mapped artifact or the mapping relationship
+	"remarks": string
+}
+
+// MultiEntryMapping represents a mapping to an external reference with one or more entries.
+#MultiEntryMapping: {
+	// ReferenceId should reference the corresponding MappingReference id from metadata
+	"reference-id": string @go(ReferenceId)
+
+	// entries is a list of mapping entries
+	entries: [#MappingEntry, ...#MappingEntry] @go(Entries)
+
+	// remarks is prose regarding the mapped artifact or the mapping relationship
+	remarks?: string
+}
+
+// EntryMapping represents how a specific entry (control/requirement/procedure) maps to a MappingReference.
+#EntryMapping: {
+	// reference-id is the id for a MappingReference entry in the artifact's metadata
+	"reference-id"?: string @go(ReferenceId)
+
+	// entry-id is the identifier being mapped to in the referenced artifact
+	"entry-id": string @go(EntryId)
+
+	// strength is the author's estimate of how completely the current/source material satisfies the target/reference material;
+	// Range: 1-10. Zero value means not yet quantified.
+	strength?: int & >=1 & <=10
+
+	// remarks is prose describing the mapping relationship
+	remarks?: string
+}
+
+// MappingEntry represents a single entry within a mapping
+#MappingEntry: {
+	// reference-id is the id for a MappingReference entry in the artifact's metadata
+	"reference-id": string @go(ReferenceId)
+
+	// strength is the author's estimate of how completely the current/source material satisfies the target/reference material;
+	// Range: 1-10. Zero value means not yet quantified.
+	strength?: int & >=1 & <=10
+
+	// remarks is prose describing the mapping relationship
+	remarks?: string
+}
diff --git a/cue/gemara/metadata.cue b/cue/gemara/metadata.cue
@@ -0,0 +1,33 @@
+// Schema lifecycle: experimental | stable | deprecated
+@status("stable")
+package gemara
+
+// Metadata represents common metadata fields shared across all layers
+#Metadata: {
+	// id allows this entry to be referenced by other elements
+	id: string
+
+	// version is the version identifier of this artifact
+	version?: string
+
+	// date is the publication or effective date of this artifact
+	date?: #Date @go(Date)
+
+	// description provides a high-level summary of the artifact's purpose and scope
+	description: string
+
+	// author is the person or group primarily responsible for this artifact
+	author: #Actor
+
+	// mapping-references is a list of external documents referenced within this artifact
+	"mapping-references"?: [...#MappingReference] @go(MappingReferences) @yaml("mapping-references,omitempty")
+
+	// applicability-categories is a list of categories used to classify within this artifact to specify scope
+	"applicability-categories"?: [...#Category] @go(ApplicabilityCategories) @yaml("applicability-categories,omitempty")
+
+	// draft indicates whether this artifact is a pre-release version; open to modification
+	draft?: bool
+
+	// lexicon is a URI pointing to a controlled vocabulary or glossary relevant to this artifact
+	lexicon?: #ArtifactMapping @go(Lexicon,optional=nillable)
+}
diff --git a/practices/Independent-Verification.md b/practices/Independent-Verification.md
@@ -27,8 +27,11 @@ gemara:
         - autonomous-deployment
         - ci-cd-integration
   threat-mappings:
-    - id: verification-illusion
-      relationship: mitigates
+    - reference-id: verification-illusion
+      entries:
+        - reference-id: verification-illusion
+          strength: 8
+          remarks: Primary control for preventing circular verification
 ---
 
 # Independent Verification
diff --git a/risks/Verification-Illusion.md b/risks/Verification-Illusion.md
@@ -10,20 +10,28 @@ gemara:
   title: Verification Illusion
   description: When an AI agent both writes code and generates its own tests, the tests tend to verify what the code does rather than what it should do. This creates a dangerous illusion of quality — high test coverage with low actual assurance.
   capabilities:
-    - id: code-generation
-      relationship: exploits
-    - id: execution
-      relationship: exploits
+    - reference-id: code-generation
+      entries:
+        - reference-id: code-generation
+          remarks: AI generates both code and tests from the same cognitive process
+    - reference-id: execution
+      entries:
+        - reference-id: execution
+          remarks: Tests execute without independent verification of intent
   actors:
-    - type: unintentional
+    - id: ai-agent
+      name: AI Coding Agent
+      type: Software
       description: AI agents optimizing for coverage metrics rather than correctness
   external-mappings:
-    - id: nist-ai-rmf
-      relationship: gap
-      notes: Not addressed at code verification level
-    - id: iso-42001
-      relationship: gap
-      notes: No assurance independence requirement
+    - reference-id: nist-ai-rmf
+      entries:
+        - reference-id: nist-ai-rmf
+          remarks: Not addressed at code verification level
+    - reference-id: iso-42001
+      entries:
+        - reference-id: iso-42001
+          remarks: No assurance independence requirement
 ---
 
 # Verification Illusion
diff --git a/scripts/validate.sh b/scripts/validate.sh