Skip to content

Commit fb00c36

Browse files
authored
docs: GraphProgram language specification and executable validation (ADR-500)
GraphProgram language specification and executable validation (ADR-500)
2 parents 5d1595f + fa737bd commit fb00c36

8 files changed

Lines changed: 4652 additions & 0 deletions

File tree

api/app/models/program.py

Lines changed: 333 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,333 @@
1+
"""
2+
GraphProgram AST Models (ADR-500)
3+
4+
Pydantic models for the GraphProgram domain-specific query composition language.
5+
These models define the canonical JSON AST that all authoring surfaces compile to.
6+
7+
The AST represents a finite sequence of set-algebraic operations over openCypher
8+
queries and REST API calls. Programs are bounded by construction: no iteration,
9+
no user-defined abstractions, no mutable variables, no recursion.
10+
11+
Zero platform dependencies: pure Python + Pydantic only. No database, no FastAPI,
12+
no AGE client. Importable and testable from a bare ``pytest`` run without Docker.
13+
14+
Usage:
15+
from api.app.models.program import GraphProgram
16+
17+
program = GraphProgram.model_validate(json_data)
18+
19+
@verified 0000000
20+
"""
21+
22+
from typing import Optional, List, Union, Literal, Dict, Any
23+
from pydantic import BaseModel, Field
24+
25+
26+
# ---------------------------------------------------------------------------
27+
# Constants
28+
# ---------------------------------------------------------------------------
29+
30+
VALID_OPERATORS = ('+', '-', '&', '?', '!')
31+
"""Allowed set-algebra operator characters. @verified 0000000"""
32+
33+
VALID_CONDITION_TESTS = (
34+
'has_results', 'empty',
35+
'count_gte', 'count_lte',
36+
'has_ontology', 'has_relationship',
37+
)
38+
"""Allowed condition test identifiers for ConditionalOp. @verified 0000000"""
39+
40+
CYPHER_WRITE_KEYWORDS = frozenset({
41+
'CREATE', 'SET', 'DELETE', 'MERGE', 'REMOVE', 'DROP', 'DETACH',
42+
})
43+
"""Cypher keywords that indicate a write operation (deny list). @verified 0000000"""
44+
45+
API_ENDPOINT_ALLOWLIST: Dict[str, Dict[str, Any]] = {
46+
'/search/concepts': {
47+
'required': ['query'],
48+
'optional': ['min_similarity', 'limit', 'ontology', 'offset'],
49+
'types': {
50+
'query': str, 'min_similarity': (int, float),
51+
'limit': int, 'ontology': str, 'offset': int,
52+
},
53+
},
54+
'/search/sources': {
55+
'required': ['query'],
56+
'optional': ['min_similarity', 'limit', 'ontology', 'offset'],
57+
'types': {
58+
'query': str, 'min_similarity': (int, float),
59+
'limit': int, 'ontology': str, 'offset': int,
60+
},
61+
},
62+
'/vocabulary/status': {
63+
'required': [],
64+
'optional': ['status_filter', 'relationship_type'],
65+
'types': {'status_filter': str, 'relationship_type': str},
66+
},
67+
'/concepts/batch': {
68+
'required': ['concept_ids'],
69+
'optional': ['include_details'],
70+
'types': {'concept_ids': list, 'include_details': bool},
71+
},
72+
'/concepts/details': {
73+
'required': ['concept_id'],
74+
'optional': ['include_diversity', 'include_grounding'],
75+
'types': {
76+
'concept_id': str, 'include_diversity': bool,
77+
'include_grounding': bool,
78+
},
79+
},
80+
'/concepts/related': {
81+
'required': ['concept_id'],
82+
'optional': ['max_depth', 'relationship_types'],
83+
'types': {
84+
'concept_id': str, 'max_depth': int,
85+
'relationship_types': list,
86+
},
87+
},
88+
}
89+
"""
90+
Permitted API endpoints for ApiOp statements.
91+
92+
Each entry maps an endpoint path to its required and optional parameter names.
93+
The ``types`` dict maps parameter names to expected Python types (or tuples of
94+
types for multiple acceptable types, e.g., ``(int, float)``). Endpoints not in
95+
this dict are rejected by the validator (V020). Parameter type mismatches
96+
produce V023 errors.
97+
98+
@verified 0000000
99+
"""
100+
101+
MAX_VARIABLE_PATH_LENGTH = 6
102+
"""Maximum hops for variable-length Cypher paths (V030). @verified 0000000"""
103+
104+
MAX_STATEMENTS = 100
105+
"""Maximum total operation count per program (V006). @verified 0000000"""
106+
107+
MAX_NESTING_DEPTH = 3
108+
"""Maximum conditional nesting depth (V007). @verified 0000000"""
109+
110+
CURRENT_VERSION = 1
111+
"""Only supported program version. @verified 0000000"""
112+
113+
114+
# ---------------------------------------------------------------------------
115+
# Operator type
116+
# ---------------------------------------------------------------------------
117+
118+
Operator = Literal['+', '-', '&', '?', '!']
119+
"""Set-algebra operator for a statement. @verified 0000000"""
120+
121+
122+
# ---------------------------------------------------------------------------
123+
# Conditions (for ConditionalOp)
124+
# ---------------------------------------------------------------------------
125+
126+
class HasResultsCondition(BaseModel):
127+
"""Test whether the working graph W is non-empty. @verified 0000000"""
128+
test: Literal['has_results']
129+
130+
131+
class EmptyCondition(BaseModel):
132+
"""Test whether the working graph W is empty. @verified 0000000"""
133+
test: Literal['empty']
134+
135+
136+
class CountGteCondition(BaseModel):
137+
"""Test whether W has >= N nodes. @verified 0000000"""
138+
test: Literal['count_gte']
139+
value: int = Field(..., gt=0)
140+
141+
142+
class CountLteCondition(BaseModel):
143+
"""Test whether W has <= N nodes. @verified 0000000"""
144+
test: Literal['count_lte']
145+
value: int = Field(..., ge=0)
146+
147+
148+
class HasOntologyCondition(BaseModel):
149+
"""Test whether W contains nodes from a specific ontology. @verified 0000000"""
150+
test: Literal['has_ontology']
151+
ontology: str = Field(..., min_length=1)
152+
153+
154+
class HasRelationshipCondition(BaseModel):
155+
"""Test whether W contains edges of a specific type. @verified 0000000"""
156+
test: Literal['has_relationship']
157+
type: str = Field(..., min_length=1)
158+
159+
160+
Condition = Union[
161+
HasResultsCondition,
162+
EmptyCondition,
163+
CountGteCondition,
164+
CountLteCondition,
165+
HasOntologyCondition,
166+
HasRelationshipCondition,
167+
]
168+
"""Discriminated union of all condition types for ConditionalOp. @verified 0000000"""
169+
170+
171+
# ---------------------------------------------------------------------------
172+
# Operations
173+
# ---------------------------------------------------------------------------
174+
175+
class CypherOp(BaseModel):
176+
"""
177+
Execute a read-only openCypher query against the source graph H.
178+
179+
The query string is validated for write keywords (V010-V016) before execution.
180+
An optional ``limit`` field caps the result set independently of any LIMIT
181+
clause in the Cypher text.
182+
183+
@verified 0000000
184+
"""
185+
type: Literal['cypher']
186+
query: str = Field(..., min_length=1)
187+
limit: Optional[int] = Field(None, gt=0)
188+
189+
190+
class ApiOp(BaseModel):
191+
"""
192+
Call a REST API endpoint (smart block).
193+
194+
The endpoint must be in ``API_ENDPOINT_ALLOWLIST`` (V020). Required parameters
195+
for that endpoint must be present in ``params`` (V021). Unknown parameters
196+
produce a warning (V022) but do not block validation.
197+
198+
@verified 0000000
199+
"""
200+
type: Literal['api']
201+
endpoint: str = Field(..., min_length=1)
202+
params: Dict[str, Any] = Field(default_factory=dict)
203+
204+
205+
class ConditionalOp(BaseModel):
206+
"""
207+
Conditional branching based on working graph W state.
208+
209+
Selects which statements to execute based on a ``Condition``. The ``then``
210+
branch executes when the condition is true; the optional ``else`` branch
211+
executes otherwise. Nesting depth is limited to ``MAX_NESTING_DEPTH`` (V007).
212+
213+
@verified 0000000
214+
"""
215+
type: Literal['conditional']
216+
condition: Condition
217+
then: List['Statement'] = Field(..., min_length=1)
218+
else_: Optional[List['Statement']] = Field(None, alias='else')
219+
220+
model_config = {'populate_by_name': True}
221+
222+
223+
Operation = Union[CypherOp, ApiOp, ConditionalOp]
224+
"""Discriminated union of all operation types, dispatched on ``type``. @verified 0000000"""
225+
226+
227+
# ---------------------------------------------------------------------------
228+
# Block annotations (decompilation support)
229+
# ---------------------------------------------------------------------------
230+
231+
class BlockAnnotation(BaseModel):
232+
"""
233+
Source block type and params for round-trip decompilation.
234+
235+
When the block editor compiles to the AST, it annotates each statement with
236+
the originating block type and its parameters. This enables AST-to-blocks
237+
round-trip.
238+
239+
@verified 0000000
240+
"""
241+
blockType: str = Field(..., min_length=1)
242+
params: Dict[str, Any] = Field(default_factory=dict)
243+
244+
245+
# ---------------------------------------------------------------------------
246+
# Statement
247+
# ---------------------------------------------------------------------------
248+
249+
class Statement(BaseModel):
250+
"""
251+
A single step in a GraphProgram.
252+
253+
Combines a set-algebra operator (``op``) with a typed operation. An optional
254+
``label`` provides a human-readable step description. An optional ``block``
255+
annotation enables decompilation back to the visual block editor.
256+
257+
@verified 0000000
258+
"""
259+
op: Operator
260+
operation: Operation = Field(..., discriminator='type')
261+
label: Optional[str] = None
262+
block: Optional[BlockAnnotation] = None
263+
264+
265+
# ---------------------------------------------------------------------------
266+
# Metadata and parameters
267+
# ---------------------------------------------------------------------------
268+
269+
class ProgramMetadata(BaseModel):
270+
"""
271+
Program-level metadata.
272+
273+
All fields are optional. ``author`` identifies the originator category
274+
(human, agent, or system).
275+
276+
@verified 0000000
277+
"""
278+
name: Optional[str] = None
279+
description: Optional[str] = None
280+
author: Optional[Literal['human', 'agent', 'system']] = None
281+
created: Optional[str] = None
282+
283+
284+
class ParamDeclaration(BaseModel):
285+
"""
286+
A program parameter declaration.
287+
288+
Parameter names must be valid identifiers (letter or underscore, followed by
289+
alphanumerics or underscores). Only ``string`` and ``number`` types are
290+
supported. An optional ``default`` provides a fallback when the parameter is
291+
not supplied at execution time.
292+
293+
@verified 0000000
294+
"""
295+
name: str = Field(..., min_length=1, pattern=r'^[a-zA-Z_][a-zA-Z0-9_]*$')
296+
type: Literal['string', 'number']
297+
default: Optional[Union[str, int, float]] = None
298+
299+
300+
# ---------------------------------------------------------------------------
301+
# GraphProgram (top-level)
302+
# ---------------------------------------------------------------------------
303+
304+
class GraphProgram(BaseModel):
305+
"""
306+
The canonical AST for a graph query program (ADR-500).
307+
308+
A finite, bounded sequence of set-algebraic operations over openCypher
309+
queries and REST API calls. The JSON AST is the single source of truth;
310+
text DSL, block diagrams, and recorded explorations compile to it.
311+
312+
Invariants enforced by the type system:
313+
- ``version`` must be exactly 1
314+
- ``statements`` must contain at least one entry
315+
316+
Additional invariants enforced by the validator:
317+
- V004: parameter names must be unique
318+
- V006: total operation count <= MAX_STATEMENTS
319+
- V007: conditional nesting depth <= MAX_NESTING_DEPTH
320+
- V010-V016: Cypher queries must not contain write keywords
321+
- V020-V022: API endpoints must be in the allowlist
322+
323+
@verified 0000000
324+
"""
325+
version: int = Field(..., ge=1, le=1)
326+
metadata: ProgramMetadata = Field(default_factory=ProgramMetadata)
327+
params: Optional[List[ParamDeclaration]] = None
328+
statements: List[Statement] = Field(..., min_length=1)
329+
330+
331+
# Rebuild models to resolve forward references
332+
ConditionalOp.model_rebuild()
333+
Statement.model_rebuild()

0 commit comments

Comments
 (0)