Skip to content

Commit 0be7eb9

Browse files
committed
feat: add policy control
1 parent c25b0c6 commit 0be7eb9

17 files changed

Lines changed: 506 additions & 74 deletions

File tree

configs/policies.json

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
{
2+
"admin": {
3+
"description": "System Administrator",
4+
"role": "admin",
5+
"allowed_datasources": [
6+
"*"
7+
],
8+
"allowed_tables": [
9+
"*"
10+
]
11+
},
12+
"ops_manager": {
13+
"description": "Operations Manager",
14+
"role": "manager",
15+
"allowed_datasources": [
16+
"manufacturing_ops",
17+
"manufacturing_ref"
18+
],
19+
"allowed_tables": [
20+
"manufacturing_ops.employees",
21+
"manufacturing_ops.machines",
22+
"manufacturing_ops.maintenance_logs",
23+
"manufacturing_ops.spare_parts",
24+
"manufacturing_ref.factories",
25+
"manufacturing_ref.machine_types",
26+
"manufacturing_ref.shifts"
27+
]
28+
},
29+
"sales_analyst": {
30+
"description": "Sales & Supply Chain Analyst",
31+
"role": "analyst",
32+
"allowed_datasources": [
33+
"manufacturing_history",
34+
"manufacturing_supply"
35+
],
36+
"allowed_tables": [
37+
"manufacturing_history.customers",
38+
"manufacturing_history.sales_orders",
39+
"manufacturing_history.sales_order_items",
40+
"manufacturing_history.production_runs",
41+
"manufacturing_history.defects",
42+
"manufacturing_supply.products",
43+
"manufacturing_supply.inventory",
44+
"manufacturing_supply.suppliers",
45+
"manufacturing_supply.purchase_orders",
46+
"manufacturing_supply.purchase_order_items"
47+
]
48+
},
49+
"guest": {
50+
"description": "Guest User",
51+
"role": "guest",
52+
"allowed_datasources": [
53+
"manufacturing_supply"
54+
],
55+
"allowed_tables": [
56+
"manufacturing_supply.products"
57+
]
58+
}
59+
}

docs/architecture/security.md

Lines changed: 47 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -106,29 +106,66 @@ The system supports extensible providers via the `SecretProvider` protocol. You
106106

107107
### Strict Validation (V3)
108108

109-
Datasource configurations are validated using Pydantic V3 models (`DatasourceProfile`). This ensures:
109+
Datasource configurations are validated strictly at load time. This ensures:
110110

111111
* **Type Safety**: Malformed integers or booleans are rejected.
112112
* **Field Constraints**: Unknown fields are forbidden, preventing "config injection" or typos.
113-
* **Sanitization**: Passwords and sensitive fields are masked in logs (via `__repr__` overrides).
113+
* **Sanitization**: Passwords and sensitive fields are masked in logs.
114+
* **Adapter Specifics**: Each adapter (e.g., `PostgresAdapter`) defines and validates its own configuration schema requirements.
114115

115-
### User Authorization Config
116+
### 6.1 Policy Definition (`configs/policies.json`)
116117

117-
User roles and permissions are defined in `users.json` (pointed to by `USERS_CONFIG` setting).
118+
The application uses **Role-Based Access Control (RBAC)**. The `policies.json` file defines policies keyed by **Role ID** (e.g., `admin`, `analyst`).
118119

119-
**Example Configuration**:
120+
**Strict Namespacing Rule**: To prevent namespace collisions, `allowed_tables` MUST use the format `datasource_id.table_name`. Simple table names are not supported.
121+
122+
#### Example
120123

121124
```json
122125
{
123-
"guest": {
124-
"role": "guest",
125-
"allowed_datasources": ["public_data"],
126-
"allowed_tables": ["products", "store_locations"]
126+
"sales_analyst": {
127+
"description": "Access to Sales DB only",
128+
"role": "analyst",
129+
"allowed_datasources": ["sales_db"],
130+
"allowed_tables": [
131+
// Exact Match
132+
"sales_db.orders",
133+
134+
// Datasource Wildcard
135+
"sales_db.customers_*"
136+
]
127137
},
128138
"admin": {
139+
"description": "Super Admin",
129140
"role": "admin",
130-
"allowed_datasources": ["*"],
131141
"allowed_tables": ["*"]
132142
}
133143
}
134144
```
145+
146+
In CLI execution: `nl2sql run ... --role sales_analyst`.
147+
The application assumes the identity provider has already authenticated the user and assigned this role.
148+
149+
### 6.2 Policy Schema & Validation
150+
151+
Policies are treated as **Configuration Code**. To prevent misconfiguration (e.g., typos, invalid types), the system validates `policies.json` against a strict **Pydantic Schema** at startup.
152+
153+
**Schema (`nl2sql.security.policies`)**:
154+
155+
1. **Strict Typing**: Fields like `allowed_datasources` MUST be lists of strings.
156+
2. **Syntax Enforcement**: `allowed_tables` values are validated ensuring they match the `datasource_id.table_name` or wildcard format.
157+
3. **Fail Fast**: If the configuration is invalid, the application refuses to start, printing a clear error message describing the violation.
158+
159+
### 6.3 Policy Management CLI
160+
161+
You can validate your policy file without running a query using the CLI.
162+
163+
```bash
164+
# Validate Syntax & Integrity
165+
nl2sql policy validate
166+
```
167+
168+
This command performs two checks:
169+
170+
1. **Schema Check**: Validates syntax against the Pydantic model.
171+
2. **Integrity Check**: Verifies that referenced `datasources` and `tables` actually exist in `datasources.yaml`. Users often typo table names; this catches those errors before runtime.

docs/guides/configuration.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ You can configure the application using the following environment variables (def
172172
| `DATASOURCE_CONFIG` | `configs/datasources.yaml` | Path to the datasource configuration file. |
173173
| `SECRETS_CONFIG` | `configs/secrets.yaml` | Path to the secrets configuration file. |
174174
| `LLM_CONFIG` | `configs/llm.yaml` | Path to the LLM model configuration file. |
175-
| `USERS_CONFIG` | `users.json` | Path to the user permissions file. |
175+
| `POLICIES_CONFIG` | `configs/policies.json` | Path to the RBAC policies file. |
176176
| `VECTOR_STORE` | `./chroma_db` | Path (directory) to persist the vector store. |
177177
| `EMBEDDING_MODEL` | `text-embedding-3-small` | OpenAI embedding model name. |
178178
| `BENCHMARK_CONFIG` | `configs/benchmark_suite.yaml` | Path to accurate testing suite configuration. |

docs/guides/deployment.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ services:
3434
3535
## Production Checklist
3636
37-
1. [ ] **Security**: Ensure `users.json` is mapped to your identity provider (e.g. OAuth) to dynamically enforce `allowed_tables`.
37+
1. [ ] **Security**: Ensure `configs/policies.json` is mapped to your identity provider (e.g. OAuth) to dynamically enforce `allowed_tables`.
3838
2. [ ] **Read-Only**: Configure the database users in `datasources.yaml` to have READ-ONLY permissions at the database level.
3939
> The PhysicalValidator provides a safety net, but deep defense requires DB-level grants.
4040
3. [ ] **Monitoring**: Enable the `PipelineMonitorCallback` to log all traces to your observability stack (e.g. LangSmith).

packages/cli/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ nl2sql chat
6060
Execute a query and see the result (JSON or Text streaming).
6161

6262
```bash
63-
nl2sql run "Show me the top 5 customers by revenue"
63+
nl2sql run "Show me the top 5 customers by revenue" --role sales_analyst
6464
```
6565

6666
## Architecture
@@ -79,7 +79,7 @@ The CLI relies on the standard NL2SQL configuration files:
7979

8080
- `datasources.yaml`: Connection profiles.
8181
- `llm_config.yaml`: LLM provider settings.
82-
- `users.json`: User context and permissions.
82+
- `configs/policies.json`: RBAC policies and permissions.
8383

8484
Run `nl2sql doctor` to see where it expects these files to be.
8585

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
import typer
2+
import pathlib
3+
import sys
4+
from typing_extensions import Annotated
5+
from rich.console import Console
6+
from rich.table import Table
7+
8+
from nl2sql.common.settings import settings
9+
from nl2sql.security.policies import PolicyConfig
10+
from nl2sql.datasources.config import load_configs
11+
from nl2sql.datasources import DatasourceRegistry
12+
from pydantic import ValidationError
13+
from nl2sql.secrets import secret_manager, load_secret_configs
14+
15+
app = typer.Typer(help="Manage RBAC policies and security.")
16+
console = Console()
17+
18+
@app.command("validate")
19+
def validate(
20+
config: Annotated[pathlib.Path, typer.Option("--config", help="Path to datasource config")] = pathlib.Path(settings.datasource_config_path),
21+
policies: Annotated[pathlib.Path, typer.Option("--policies", help="Path to policies.json")] = pathlib.Path(settings.policies_config_path),
22+
secrets: Annotated[pathlib.Path, typer.Option("--secrets", help="Path to secrets config")] = pathlib.Path(settings.secrets_config_path),
23+
):
24+
"""
25+
Validate policy syntax and integrity against defined datasources.
26+
"""
27+
console.print(f"[bold blue]Validating Policies from:[/bold blue] {policies}")
28+
29+
# 1. Load Policies (Schema Check)
30+
try:
31+
if not policies.exists():
32+
console.print(f"[bold red]Error:[/bold red] Policy file not found at {policies}")
33+
sys.exit(1)
34+
35+
with open(policies, "r") as f:
36+
raw_json = f.read()
37+
38+
policy_cfg = PolicyConfig.model_validate_json(raw_json)
39+
console.print("[green]✓ Schema Syntax Valid[/green]")
40+
41+
except ValidationError as ve:
42+
console.print(f"[bold red]Schema Validation Failed:[/bold red]\n{ve}")
43+
sys.exit(1)
44+
except Exception as e:
45+
console.print(f"[bold red]Error loading policies:[/bold red] {e}")
46+
sys.exit(1)
47+
48+
# 2. Load Datasources (Integrity Check)
49+
console.print(f"[bold blue]Checking Integrity against Datasources:[/bold blue] {config}")
50+
try:
51+
# Secrets are needed to load datasources properly
52+
if secrets.exists():
53+
secret_configs = load_secret_configs(secrets)
54+
secret_manager.configure(secret_configs)
55+
56+
ds_configs = load_configs(config)
57+
registry = DatasourceRegistry(ds_configs)
58+
59+
available_ds = set(registry.list_ids())
60+
console.print(f"[dim]Available Datasources: {available_ds}[/dim]")
61+
62+
has_errors = False
63+
64+
table = Table(title="Policy Integrity Report")
65+
table.add_column("Role", style="cyan")
66+
table.add_column("Target", style="magenta")
67+
table.add_column("Status", style="green")
68+
table.add_column("Details", style="white")
69+
70+
for role_id, role_def in policy_cfg.root.items():
71+
# Check Datasources
72+
for ds in role_def.allowed_datasources:
73+
if ds == "*":
74+
table.add_row(role_id, "Datasource: *", "[green]OK[/green]", "Global Access")
75+
continue
76+
77+
if ds not in available_ds:
78+
table.add_row(role_id, f"Datasource: {ds}", "[red]MISSING[/red]", "Datasource not defined in config")
79+
has_errors = True
80+
else:
81+
table.add_row(role_id, f"Datasource: {ds}", "[green]OK[/green]", "Verified")
82+
83+
# Check Tables (Heuristic only - we can't verify tables without checking DB connection, which is slow/expensive.
84+
# But we CAN verify the component 'datasource' part of 'datasource.table')
85+
for rule in role_def.allowed_tables:
86+
if rule == "*":
87+
table.add_row(role_id, "Table: *", "[green]OK[/green]", "Global Access")
88+
continue
89+
90+
parts = rule.split(".")
91+
if len(parts) >= 2:
92+
ds_part = parts[0]
93+
if ds_part not in available_ds and ds_part != "*":
94+
table.add_row(role_id, f"Table Rule: {rule}", "[red]INVALID DS[/red]", f"Datasource '{ds_part}' unknown")
95+
has_errors = True
96+
else:
97+
table.add_row(role_id, f"Table Rule: {rule}", "[green]OK[/green]", "DS Verified")
98+
99+
console.print(table)
100+
101+
if has_errors:
102+
console.print("\n[bold red]Integrity Check Failed: Policies reference missing resources.[/bold red]")
103+
sys.exit(1)
104+
else:
105+
console.print("\n[bold green]✓ Policy Integrity Verified[/bold green]")
106+
107+
except Exception as e:
108+
console.print(f"[bold red]Integrity Check Failed:[/bold red] {e}")
109+
sys.exit(1)

packages/cli/src/nl2sql_cli/commands/run.py

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -61,29 +61,45 @@ def _run_simple_mode(
6161

6262
start_time = time.perf_counter()
6363

64-
# Load User Context
65-
user_context = {}
64+
# Load Role Context (RBAC)
65+
policy_context = {}
6666
try:
6767
import pathlib
68-
users_path = pathlib.Path(settings.users_config_path)
69-
if users_path.exists():
70-
with open(users_path, "r") as f:
71-
users_db = json.load(f)
72-
user_context = users_db.get(config.user)
68+
from nl2sql.security.policies import PolicyConfig
69+
from pydantic import ValidationError
70+
71+
policies_path = pathlib.Path(settings.policies_config_path)
72+
if policies_path.exists():
73+
with open(policies_path, "r") as f:
74+
raw_json = f.read()
75+
76+
try:
77+
# 1. Strict Schema Validation
78+
policy_cfg = PolicyConfig.model_validate_json(raw_json)
7379

74-
if not user_context:
75-
presenter.print_error(f"Critical: User '{config.user}' not found in '{users_path.resolve()}'. Cannot execute without context.")
80+
# 2. Look up by Role ID
81+
role_policy = policy_cfg.get_role(config.role)
82+
83+
if not role_policy:
84+
presenter.print_error(f"Critical: Role '{config.role}' not defined in '{policies_path.resolve()}'. Available roles: {list(policy_cfg.root.keys())}")
7685
sys.exit(1)
86+
87+
# 3. Convert to Dict for Pipeline
88+
policy_context = role_policy.model_dump()
89+
90+
except ValidationError as ve:
91+
presenter.print_error(f"Policy Configuration Error in '{policies_path}':\n{ve}")
92+
sys.exit(1)
7793
else:
78-
presenter.print_error(f"Critical: User config file not found at '{users_path}'. Cannot load context.")
94+
presenter.print_error(f"Critical: Policy config file not found at '{policies_path}'. Cannot load context.")
7995
sys.exit(1)
8096

8197
except Exception as e:
82-
presenter.print_error(f"Failed to load user context: {e}")
98+
presenter.print_error(f"Failed to load policy context: {e}")
8399
sys.exit(1)
84100

85101
try:
86-
print(f"User Context: {user_context}")
102+
print(f"Policy Context: {policy_context}")
87103
final_state = run_with_graph(
88104
registry=datasource_registry,
89105
llm_registry=llm_registry,
@@ -93,7 +109,7 @@ def _run_simple_mode(
93109
vector_store=vector_store,
94110
vector_store_path=config.vector_store_path,
95111
callbacks=[monitor],
96-
user_context=user_context
112+
user_context=policy_context
97113
)
98114
except Exception as e:
99115
import traceback

0 commit comments

Comments
 (0)