Skip to content

Commit 49f233c

Browse files
authored
feat: add Pydantic V2 models for --json output (#903)
* feat: add Pydantic V2 models for --json output (STOPS-7491) Define structured JSON output models for the 16 pgbelt commands PGBaaS invokes. Simple commands (setup, teardown variants, analyze, load-constraints, revoke-logins) use CommandResult directly with steps + detail. Commands with rich per-item output (precheck, status, sync-sequences, sync-tables, validate-data, create-indexes, check-connectivity, connections) get dedicated subclasses. Includes round-trip serialization/deserialization tests for all models. Made-with: Cursor * fix: isolate poetry from project venv in Dockerfile Poetry and its transitive deps (cryptography, pycparser, cffi) were installed into the same venv as the project. When poetry install ran, it tried to downgrade those packages to match the lock file, corrupting dist-info and breaking pip. Install poetry into a separate venv (/opt/poetry) so it never conflicts with project dependencies. Made-with: Cursor
1 parent ba8f266 commit 49f233c

11 files changed

Lines changed: 1083 additions & 4 deletions

File tree

Dockerfile

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
ARG PYTHON_VERSION=3.13
22
FROM python:${PYTHON_VERSION}-slim
33
ENV VIRTUAL_ENV=/opt/venv
4-
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
4+
ENV POETRY_HOME=/opt/poetry
5+
ENV PATH="$POETRY_HOME/bin:$VIRTUAL_ENV/bin:$PATH"
56

67
RUN set -e \
78
&& apt-get -y update
@@ -11,9 +12,10 @@ RUN apt-get -y install \
1112
gcc
1213

1314
RUN set -e \
14-
&& python -m venv $VIRTUAL_ENV \
15-
&& python -m pip install --upgrade pip \
16-
&& pip install poetry poetry-dynamic-versioning
15+
&& python -m venv $POETRY_HOME \
16+
&& $POETRY_HOME/bin/pip install poetry poetry-dynamic-versioning
17+
18+
RUN python -m venv $VIRTUAL_ENV
1719

1820
COPY ./ /opt/pgbelt
1921
WORKDIR /opt/pgbelt

pgbelt/models/__init__.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
"""
2+
Pydantic V2 models for ``belt --json`` output.
3+
4+
When to use ``CommandResult`` directly vs. create a subclass
5+
------------------------------------------------------------
6+
7+
Use **CommandResult** directly when the command's output is just pass/fail
8+
plus optional step tracking and a handful of extras. Put command-specific
9+
values in the ``detail`` dict. Examples: setup, teardown variants, analyze,
10+
load-constraints, revoke-logins.
11+
12+
Create a **subclass** of ``CommandResult`` when the command produces
13+
per-item structured data that a consumer would iterate, filter, or
14+
aggregate -- e.g. per-table validation results, per-index status,
15+
per-sequence sync detail. The subclass lives in its own file grouped
16+
by domain (sync.py, schema.py, etc.).
17+
18+
Breaking changes to any model require a pgbelt version bump because
19+
PGBaaS deserializes against the same types.
20+
"""
21+
22+
from pgbelt.models.base import CommandError
23+
from pgbelt.models.base import CommandResult
24+
from pgbelt.models.base import StepResult
25+
from pgbelt.models.base import StepStatus
26+
from pgbelt.models.connectivity import ConnectivityCheckResult
27+
from pgbelt.models.connectivity import ConnectivityCheckRow
28+
from pgbelt.models.connections import ConnectionsResult
29+
from pgbelt.models.connections import ConnectionsRow
30+
from pgbelt.models.connections import ConnectionsSide
31+
from pgbelt.models.preflight import PrecheckResult
32+
from pgbelt.models.preflight import PrecheckSide
33+
from pgbelt.models.schema import CreateIndexesResult
34+
from pgbelt.models.schema import IndexDetail
35+
from pgbelt.models.status import StatusResult
36+
from pgbelt.models.status import StatusRow
37+
from pgbelt.models.sync import SyncSequencesResult
38+
from pgbelt.models.sync import SyncTablesResult
39+
from pgbelt.models.sync import ValidateDataResult
40+
41+
__all__ = [
42+
# Base -- used directly by simple commands (setup, teardown, analyze, etc.)
43+
"CommandError",
44+
"CommandResult",
45+
"StepResult",
46+
"StepStatus",
47+
# Rich result models -- commands with structured per-item output
48+
"ConnectivityCheckResult",
49+
"ConnectivityCheckRow",
50+
"ConnectionsResult",
51+
"ConnectionsRow",
52+
"ConnectionsSide",
53+
"PrecheckResult",
54+
"PrecheckSide",
55+
"CreateIndexesResult",
56+
"IndexDetail",
57+
"StatusResult",
58+
"StatusRow",
59+
"SyncSequencesResult",
60+
"SyncTablesResult",
61+
"ValidateDataResult",
62+
]

pgbelt/models/base.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
from __future__ import annotations
2+
3+
from datetime import datetime
4+
from datetime import timezone
5+
from enum import Enum
6+
from typing import Any
7+
from typing import Optional
8+
9+
from pydantic import BaseModel
10+
from pydantic import Field
11+
12+
13+
class StepStatus(str, Enum):
14+
ok = "ok"
15+
skipped = "skipped"
16+
failed = "failed"
17+
18+
19+
class CommandError(BaseModel):
20+
"""Structured error information from a failed command."""
21+
22+
error_type: str
23+
message: str
24+
detail: Optional[str] = None
25+
26+
27+
class StepResult(BaseModel):
28+
"""Outcome of a single phase/step within a command."""
29+
30+
name: str
31+
status: StepStatus
32+
message: Optional[str] = None
33+
duration_ms: Optional[int] = None
34+
35+
36+
class CommandResult(BaseModel):
37+
"""
38+
Base model for all pgbelt --json output.
39+
40+
Simple commands (setup, teardown variants, analyze, load-constraints) use
41+
this directly -- they only need pass/fail, steps, and maybe a few extras
42+
in ``detail``. Commands with rich structured output (precheck, status,
43+
sync-sequences, etc.) extend this with their own fields.
44+
"""
45+
46+
db: str
47+
dc: str
48+
command: str
49+
success: bool
50+
timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
51+
duration_ms: Optional[int] = None
52+
error: Optional[CommandError] = None
53+
steps: list[StepResult] = []
54+
detail: dict[str, Any] = {}

pgbelt/models/connections.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
from __future__ import annotations
2+
3+
4+
from pydantic import BaseModel
5+
6+
from pgbelt.models.base import CommandResult
7+
8+
9+
class ConnectionsSide(BaseModel):
10+
"""Connection summary for one side of a database pair."""
11+
12+
total_connections: int
13+
by_user: dict[str, int] = {}
14+
15+
16+
class ConnectionsRow(BaseModel):
17+
"""Connection info for a single database pair."""
18+
19+
db: str
20+
source: ConnectionsSide
21+
destination: ConnectionsSide
22+
23+
24+
class ConnectionsResult(CommandResult):
25+
"""JSON output for ``belt connections``."""
26+
27+
command: str = "connections"
28+
exclude_users: list[str] = []
29+
exclude_patterns: list[str] = []
30+
results: list[ConnectionsRow] = []

pgbelt/models/connectivity.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
from __future__ import annotations
2+
3+
4+
from pydantic import BaseModel
5+
from pydantic import computed_field
6+
7+
from pgbelt.models.base import CommandResult
8+
9+
10+
class ConnectivityCheckRow(BaseModel):
11+
"""Connectivity results for a single database pair."""
12+
13+
db: str
14+
src_tcp: bool
15+
src_query: bool
16+
src_to_dst_dblink: bool
17+
dst_tcp: bool
18+
dst_query: bool
19+
dst_to_src_dblink: bool
20+
21+
@computed_field
22+
@property
23+
def all_ok(self) -> bool:
24+
return all(
25+
[
26+
self.src_tcp,
27+
self.src_query,
28+
self.src_to_dst_dblink,
29+
self.dst_tcp,
30+
self.dst_query,
31+
self.dst_to_src_dblink,
32+
]
33+
)
34+
35+
@computed_field
36+
@property
37+
def failed_checks(self) -> list[str]:
38+
checks = {
39+
"src_tcp": self.src_tcp,
40+
"src_query": self.src_query,
41+
"src_to_dst_dblink": self.src_to_dst_dblink,
42+
"dst_tcp": self.dst_tcp,
43+
"dst_query": self.dst_query,
44+
"dst_to_src_dblink": self.dst_to_src_dblink,
45+
}
46+
return [name for name, passed in checks.items() if not passed]
47+
48+
49+
class ConnectivityCheckResult(CommandResult):
50+
"""JSON output for ``belt check-connectivity``."""
51+
52+
command: str = "check-connectivity"
53+
results: list[ConnectivityCheckRow] = []
54+
55+
@computed_field
56+
@property
57+
def overall_ok(self) -> bool:
58+
return all(r.all_ok for r in self.results)

pgbelt/models/preflight.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
from __future__ import annotations
2+
3+
from typing import Optional
4+
5+
from pydantic import BaseModel
6+
from pydantic import computed_field
7+
8+
from pgbelt.models.base import CommandResult
9+
10+
11+
class RoleInfo(BaseModel):
12+
"""Role/user details from pg_roles."""
13+
14+
rolname: str
15+
rolcanlogin: bool
16+
rolcreaterole: bool
17+
rolinherit: bool
18+
rolsuper: bool
19+
memberof: list[str] = []
20+
can_create: Optional[bool] = None
21+
22+
23+
class RelationInfo(BaseModel):
24+
"""A table or sequence discovered in the schema."""
25+
26+
name: str
27+
schema_name: str
28+
owner: str
29+
object_type: str
30+
31+
@computed_field
32+
@property
33+
def can_replicate(self) -> bool:
34+
"""True when schema and owner match the targeted migration config."""
35+
return True # Actual check requires runtime context; populated by caller.
36+
37+
38+
class TableReplicationInfo(BaseModel):
39+
"""Table with replication-method classification (source side only)."""
40+
41+
name: str
42+
schema_name: str
43+
owner: str
44+
has_primary_key: bool
45+
replication_method: str # "pglogical" | "dump_and_load" | "unavailable"
46+
47+
48+
class ExtensionInfo(BaseModel):
49+
"""Installed Postgres extension."""
50+
51+
extname: str
52+
in_other_side: Optional[bool] = None
53+
54+
55+
class PrecheckSide(BaseModel):
56+
"""Precheck data for one side (source or destination) of a migration pair."""
57+
58+
db: str
59+
schema_name: str
60+
server_version: str
61+
max_replication_slots: str
62+
max_worker_processes: str
63+
max_wal_senders: str
64+
shared_preload_libraries: list[str] = []
65+
rds_logical_replication: str
66+
root_user: RoleInfo
67+
owner_user: RoleInfo
68+
tables: list[TableReplicationInfo] = []
69+
sequences: list[RelationInfo] = []
70+
extensions: list[ExtensionInfo] = []
71+
72+
@computed_field
73+
@property
74+
def root_ok(self) -> bool:
75+
return (
76+
self.root_user.rolcanlogin
77+
and self.root_user.rolcreaterole
78+
and self.root_user.rolinherit
79+
and ("rds_superuser" in self.root_user.memberof or self.root_user.rolsuper)
80+
)
81+
82+
@computed_field
83+
@property
84+
def shared_preload_ok(self) -> bool:
85+
return (
86+
"pglogical" in self.shared_preload_libraries
87+
and "pg_stat_statements" in self.shared_preload_libraries
88+
)
89+
90+
91+
class PrecheckResult(CommandResult):
92+
"""JSON output for ``belt precheck``."""
93+
94+
command: str = "precheck"
95+
src: Optional[PrecheckSide] = None
96+
dst: Optional[PrecheckSide] = None
97+
98+
@computed_field
99+
@property
100+
def extensions_match(self) -> Optional[bool]:
101+
if self.src is None or self.dst is None:
102+
return None
103+
src_names = {e.extname for e in self.src.extensions}
104+
dst_names = {e.extname for e in self.dst.extensions}
105+
return src_names == dst_names

pgbelt/models/schema.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
from __future__ import annotations
2+
3+
from typing import Optional
4+
5+
from pydantic import BaseModel
6+
7+
from pgbelt.models.base import CommandResult
8+
9+
10+
class IndexDetail(BaseModel):
11+
"""Result for a single CREATE INDEX operation."""
12+
13+
name: str
14+
status: str # "created" | "skipped_exists" | "failed"
15+
duration_ms: Optional[int] = None
16+
error: Optional[str] = None
17+
18+
19+
class CreateIndexesResult(CommandResult):
20+
"""JSON output for ``belt create-indexes``."""
21+
22+
command: str = "create-indexes"
23+
indexes_file: Optional[str] = None
24+
indexes: list[IndexDetail] = []
25+
analyze_ran: bool = False
26+
27+
@property
28+
def created_count(self) -> int:
29+
return sum(1 for i in self.indexes if i.status == "created")
30+
31+
@property
32+
def skipped_count(self) -> int:
33+
return sum(1 for i in self.indexes if i.status == "skipped_exists")
34+
35+
@property
36+
def failed_count(self) -> int:
37+
return sum(1 for i in self.indexes if i.status == "failed")

0 commit comments

Comments
 (0)