|
| 1 | +""" |
| 2 | +GraphProgram AST Models (ADR-500) |
| 3 | +
|
| 4 | +Pydantic models for the GraphProgram domain-specific query composition language. |
| 5 | +These models define the canonical JSON AST that all authoring surfaces compile to. |
| 6 | +
|
| 7 | +The AST represents a finite sequence of set-algebraic operations over openCypher |
| 8 | +queries and REST API calls. Programs are bounded by construction: no iteration, |
| 9 | +no user-defined abstractions, no mutable variables, no recursion. |
| 10 | +
|
| 11 | +Zero platform dependencies: pure Python + Pydantic only. No database, no FastAPI, |
| 12 | +no AGE client. Importable and testable from a bare ``pytest`` run without Docker. |
| 13 | +
|
| 14 | +Usage: |
| 15 | + from api.app.models.program import GraphProgram |
| 16 | +
|
| 17 | + program = GraphProgram.model_validate(json_data) |
| 18 | +
|
| 19 | +@verified 0000000 |
| 20 | +""" |
| 21 | + |
| 22 | +from typing import Optional, List, Union, Literal, Dict, Any |
| 23 | +from pydantic import BaseModel, Field |
| 24 | + |
| 25 | + |
| 26 | +# --------------------------------------------------------------------------- |
| 27 | +# Constants |
| 28 | +# --------------------------------------------------------------------------- |
| 29 | + |
| 30 | +VALID_OPERATORS = ('+', '-', '&', '?', '!') |
| 31 | +"""Allowed set-algebra operator characters. @verified 0000000""" |
| 32 | + |
| 33 | +VALID_CONDITION_TESTS = ( |
| 34 | + 'has_results', 'empty', |
| 35 | + 'count_gte', 'count_lte', |
| 36 | + 'has_ontology', 'has_relationship', |
| 37 | +) |
| 38 | +"""Allowed condition test identifiers for ConditionalOp. @verified 0000000""" |
| 39 | + |
| 40 | +CYPHER_WRITE_KEYWORDS = frozenset({ |
| 41 | + 'CREATE', 'SET', 'DELETE', 'MERGE', 'REMOVE', 'DROP', 'DETACH', |
| 42 | +}) |
| 43 | +"""Cypher keywords that indicate a write operation (deny list). @verified 0000000""" |
| 44 | + |
| 45 | +API_ENDPOINT_ALLOWLIST: Dict[str, Dict[str, Any]] = { |
| 46 | + '/search/concepts': { |
| 47 | + 'required': ['query'], |
| 48 | + 'optional': ['min_similarity', 'limit', 'ontology', 'offset'], |
| 49 | + 'types': { |
| 50 | + 'query': str, 'min_similarity': (int, float), |
| 51 | + 'limit': int, 'ontology': str, 'offset': int, |
| 52 | + }, |
| 53 | + }, |
| 54 | + '/search/sources': { |
| 55 | + 'required': ['query'], |
| 56 | + 'optional': ['min_similarity', 'limit', 'ontology', 'offset'], |
| 57 | + 'types': { |
| 58 | + 'query': str, 'min_similarity': (int, float), |
| 59 | + 'limit': int, 'ontology': str, 'offset': int, |
| 60 | + }, |
| 61 | + }, |
| 62 | + '/vocabulary/status': { |
| 63 | + 'required': [], |
| 64 | + 'optional': ['status_filter', 'relationship_type'], |
| 65 | + 'types': {'status_filter': str, 'relationship_type': str}, |
| 66 | + }, |
| 67 | + '/concepts/batch': { |
| 68 | + 'required': ['concept_ids'], |
| 69 | + 'optional': ['include_details'], |
| 70 | + 'types': {'concept_ids': list, 'include_details': bool}, |
| 71 | + }, |
| 72 | + '/concepts/details': { |
| 73 | + 'required': ['concept_id'], |
| 74 | + 'optional': ['include_diversity', 'include_grounding'], |
| 75 | + 'types': { |
| 76 | + 'concept_id': str, 'include_diversity': bool, |
| 77 | + 'include_grounding': bool, |
| 78 | + }, |
| 79 | + }, |
| 80 | + '/concepts/related': { |
| 81 | + 'required': ['concept_id'], |
| 82 | + 'optional': ['max_depth', 'relationship_types'], |
| 83 | + 'types': { |
| 84 | + 'concept_id': str, 'max_depth': int, |
| 85 | + 'relationship_types': list, |
| 86 | + }, |
| 87 | + }, |
| 88 | +} |
| 89 | +""" |
| 90 | +Permitted API endpoints for ApiOp statements. |
| 91 | +
|
| 92 | +Each entry maps an endpoint path to its required and optional parameter names. |
| 93 | +The ``types`` dict maps parameter names to expected Python types (or tuples of |
| 94 | +types for multiple acceptable types, e.g., ``(int, float)``). Endpoints not in |
| 95 | +this dict are rejected by the validator (V020). Parameter type mismatches |
| 96 | +produce V023 errors. |
| 97 | +
|
| 98 | +@verified 0000000 |
| 99 | +""" |
| 100 | + |
| 101 | +MAX_VARIABLE_PATH_LENGTH = 6 |
| 102 | +"""Maximum hops for variable-length Cypher paths (V030). @verified 0000000""" |
| 103 | + |
| 104 | +MAX_STATEMENTS = 100 |
| 105 | +"""Maximum total operation count per program (V006). @verified 0000000""" |
| 106 | + |
| 107 | +MAX_NESTING_DEPTH = 3 |
| 108 | +"""Maximum conditional nesting depth (V007). @verified 0000000""" |
| 109 | + |
| 110 | +CURRENT_VERSION = 1 |
| 111 | +"""Only supported program version. @verified 0000000""" |
| 112 | + |
| 113 | + |
| 114 | +# --------------------------------------------------------------------------- |
| 115 | +# Operator type |
| 116 | +# --------------------------------------------------------------------------- |
| 117 | + |
| 118 | +Operator = Literal['+', '-', '&', '?', '!'] |
| 119 | +"""Set-algebra operator for a statement. @verified 0000000""" |
| 120 | + |
| 121 | + |
| 122 | +# --------------------------------------------------------------------------- |
| 123 | +# Conditions (for ConditionalOp) |
| 124 | +# --------------------------------------------------------------------------- |
| 125 | + |
| 126 | +class HasResultsCondition(BaseModel): |
| 127 | + """Test whether the working graph W is non-empty. @verified 0000000""" |
| 128 | + test: Literal['has_results'] |
| 129 | + |
| 130 | + |
| 131 | +class EmptyCondition(BaseModel): |
| 132 | + """Test whether the working graph W is empty. @verified 0000000""" |
| 133 | + test: Literal['empty'] |
| 134 | + |
| 135 | + |
| 136 | +class CountGteCondition(BaseModel): |
| 137 | + """Test whether W has >= N nodes. @verified 0000000""" |
| 138 | + test: Literal['count_gte'] |
| 139 | + value: int = Field(..., gt=0) |
| 140 | + |
| 141 | + |
| 142 | +class CountLteCondition(BaseModel): |
| 143 | + """Test whether W has <= N nodes. @verified 0000000""" |
| 144 | + test: Literal['count_lte'] |
| 145 | + value: int = Field(..., ge=0) |
| 146 | + |
| 147 | + |
| 148 | +class HasOntologyCondition(BaseModel): |
| 149 | + """Test whether W contains nodes from a specific ontology. @verified 0000000""" |
| 150 | + test: Literal['has_ontology'] |
| 151 | + ontology: str = Field(..., min_length=1) |
| 152 | + |
| 153 | + |
| 154 | +class HasRelationshipCondition(BaseModel): |
| 155 | + """Test whether W contains edges of a specific type. @verified 0000000""" |
| 156 | + test: Literal['has_relationship'] |
| 157 | + type: str = Field(..., min_length=1) |
| 158 | + |
| 159 | + |
| 160 | +Condition = Union[ |
| 161 | + HasResultsCondition, |
| 162 | + EmptyCondition, |
| 163 | + CountGteCondition, |
| 164 | + CountLteCondition, |
| 165 | + HasOntologyCondition, |
| 166 | + HasRelationshipCondition, |
| 167 | +] |
| 168 | +"""Discriminated union of all condition types for ConditionalOp. @verified 0000000""" |
| 169 | + |
| 170 | + |
| 171 | +# --------------------------------------------------------------------------- |
| 172 | +# Operations |
| 173 | +# --------------------------------------------------------------------------- |
| 174 | + |
| 175 | +class CypherOp(BaseModel): |
| 176 | + """ |
| 177 | + Execute a read-only openCypher query against the source graph H. |
| 178 | +
|
| 179 | + The query string is validated for write keywords (V010-V016) before execution. |
| 180 | + An optional ``limit`` field caps the result set independently of any LIMIT |
| 181 | + clause in the Cypher text. |
| 182 | +
|
| 183 | + @verified 0000000 |
| 184 | + """ |
| 185 | + type: Literal['cypher'] |
| 186 | + query: str = Field(..., min_length=1) |
| 187 | + limit: Optional[int] = Field(None, gt=0) |
| 188 | + |
| 189 | + |
| 190 | +class ApiOp(BaseModel): |
| 191 | + """ |
| 192 | + Call a REST API endpoint (smart block). |
| 193 | +
|
| 194 | + The endpoint must be in ``API_ENDPOINT_ALLOWLIST`` (V020). Required parameters |
| 195 | + for that endpoint must be present in ``params`` (V021). Unknown parameters |
| 196 | + produce a warning (V022) but do not block validation. |
| 197 | +
|
| 198 | + @verified 0000000 |
| 199 | + """ |
| 200 | + type: Literal['api'] |
| 201 | + endpoint: str = Field(..., min_length=1) |
| 202 | + params: Dict[str, Any] = Field(default_factory=dict) |
| 203 | + |
| 204 | + |
| 205 | +class ConditionalOp(BaseModel): |
| 206 | + """ |
| 207 | + Conditional branching based on working graph W state. |
| 208 | +
|
| 209 | + Selects which statements to execute based on a ``Condition``. The ``then`` |
| 210 | + branch executes when the condition is true; the optional ``else`` branch |
| 211 | + executes otherwise. Nesting depth is limited to ``MAX_NESTING_DEPTH`` (V007). |
| 212 | +
|
| 213 | + @verified 0000000 |
| 214 | + """ |
| 215 | + type: Literal['conditional'] |
| 216 | + condition: Condition |
| 217 | + then: List['Statement'] = Field(..., min_length=1) |
| 218 | + else_: Optional[List['Statement']] = Field(None, alias='else') |
| 219 | + |
| 220 | + model_config = {'populate_by_name': True} |
| 221 | + |
| 222 | + |
| 223 | +Operation = Union[CypherOp, ApiOp, ConditionalOp] |
| 224 | +"""Discriminated union of all operation types, dispatched on ``type``. @verified 0000000""" |
| 225 | + |
| 226 | + |
| 227 | +# --------------------------------------------------------------------------- |
| 228 | +# Block annotations (decompilation support) |
| 229 | +# --------------------------------------------------------------------------- |
| 230 | + |
| 231 | +class BlockAnnotation(BaseModel): |
| 232 | + """ |
| 233 | + Source block type and params for round-trip decompilation. |
| 234 | +
|
| 235 | + When the block editor compiles to the AST, it annotates each statement with |
| 236 | + the originating block type and its parameters. This enables AST-to-blocks |
| 237 | + round-trip. |
| 238 | +
|
| 239 | + @verified 0000000 |
| 240 | + """ |
| 241 | + blockType: str = Field(..., min_length=1) |
| 242 | + params: Dict[str, Any] = Field(default_factory=dict) |
| 243 | + |
| 244 | + |
| 245 | +# --------------------------------------------------------------------------- |
| 246 | +# Statement |
| 247 | +# --------------------------------------------------------------------------- |
| 248 | + |
| 249 | +class Statement(BaseModel): |
| 250 | + """ |
| 251 | + A single step in a GraphProgram. |
| 252 | +
|
| 253 | + Combines a set-algebra operator (``op``) with a typed operation. An optional |
| 254 | + ``label`` provides a human-readable step description. An optional ``block`` |
| 255 | + annotation enables decompilation back to the visual block editor. |
| 256 | +
|
| 257 | + @verified 0000000 |
| 258 | + """ |
| 259 | + op: Operator |
| 260 | + operation: Operation = Field(..., discriminator='type') |
| 261 | + label: Optional[str] = None |
| 262 | + block: Optional[BlockAnnotation] = None |
| 263 | + |
| 264 | + |
| 265 | +# --------------------------------------------------------------------------- |
| 266 | +# Metadata and parameters |
| 267 | +# --------------------------------------------------------------------------- |
| 268 | + |
| 269 | +class ProgramMetadata(BaseModel): |
| 270 | + """ |
| 271 | + Program-level metadata. |
| 272 | +
|
| 273 | + All fields are optional. ``author`` identifies the originator category |
| 274 | + (human, agent, or system). |
| 275 | +
|
| 276 | + @verified 0000000 |
| 277 | + """ |
| 278 | + name: Optional[str] = None |
| 279 | + description: Optional[str] = None |
| 280 | + author: Optional[Literal['human', 'agent', 'system']] = None |
| 281 | + created: Optional[str] = None |
| 282 | + |
| 283 | + |
| 284 | +class ParamDeclaration(BaseModel): |
| 285 | + """ |
| 286 | + A program parameter declaration. |
| 287 | +
|
| 288 | + Parameter names must be valid identifiers (letter or underscore, followed by |
| 289 | + alphanumerics or underscores). Only ``string`` and ``number`` types are |
| 290 | + supported. An optional ``default`` provides a fallback when the parameter is |
| 291 | + not supplied at execution time. |
| 292 | +
|
| 293 | + @verified 0000000 |
| 294 | + """ |
| 295 | + name: str = Field(..., min_length=1, pattern=r'^[a-zA-Z_][a-zA-Z0-9_]*$') |
| 296 | + type: Literal['string', 'number'] |
| 297 | + default: Optional[Union[str, int, float]] = None |
| 298 | + |
| 299 | + |
| 300 | +# --------------------------------------------------------------------------- |
| 301 | +# GraphProgram (top-level) |
| 302 | +# --------------------------------------------------------------------------- |
| 303 | + |
| 304 | +class GraphProgram(BaseModel): |
| 305 | + """ |
| 306 | + The canonical AST for a graph query program (ADR-500). |
| 307 | +
|
| 308 | + A finite, bounded sequence of set-algebraic operations over openCypher |
| 309 | + queries and REST API calls. The JSON AST is the single source of truth; |
| 310 | + text DSL, block diagrams, and recorded explorations compile to it. |
| 311 | +
|
| 312 | + Invariants enforced by the type system: |
| 313 | + - ``version`` must be exactly 1 |
| 314 | + - ``statements`` must contain at least one entry |
| 315 | +
|
| 316 | + Additional invariants enforced by the validator: |
| 317 | + - V004: parameter names must be unique |
| 318 | + - V006: total operation count <= MAX_STATEMENTS |
| 319 | + - V007: conditional nesting depth <= MAX_NESTING_DEPTH |
| 320 | + - V010-V016: Cypher queries must not contain write keywords |
| 321 | + - V020-V022: API endpoints must be in the allowlist |
| 322 | +
|
| 323 | + @verified 0000000 |
| 324 | + """ |
| 325 | + version: int = Field(..., ge=1, le=1) |
| 326 | + metadata: ProgramMetadata = Field(default_factory=ProgramMetadata) |
| 327 | + params: Optional[List[ParamDeclaration]] = None |
| 328 | + statements: List[Statement] = Field(..., min_length=1) |
| 329 | + |
| 330 | + |
| 331 | +# Rebuild models to resolve forward references |
| 332 | +ConditionalOp.model_rebuild() |
| 333 | +Statement.model_rebuild() |
0 commit comments