Skip to content

Commit 891f65e

Browse files
authored
Merge pull request #30 from h4gen/sdk-v2
Sdk v2
2 parents 6e791f1 + f1aabbe commit 891f65e

17 files changed

Lines changed: 553 additions & 530 deletions

debug_test_output.log

Lines changed: 0 additions & 460 deletions
This file was deleted.

demo_notebook.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
2+
import asyncio
3+
import logging
4+
from pg_replica import connect
5+
6+
# Configure logging to see the output
7+
logging.basicConfig(level=logging.INFO)
8+
9+
async def run_demo():
10+
print("🚀 Starting God Tier DX Demo...")
11+
12+
# 1. Connect (Mocking the DB connection for this script if needed,
13+
# but let's assume the client initializes correctly)
14+
# We use a dummy URL since we might not have a real DB running in this specific interaction
15+
client = connect(sink_url="postgresql://user:pass@localhost:5432/db", sync=False)
16+
17+
print("✅ Connected.")
18+
19+
# 2. Fluent Access
20+
products = client.products
21+
print(f"✅ Accessed pipeline: {products.name}")
22+
23+
# 3. Configure (Plan)
24+
print("\n--- Planning Configuration ---")
25+
changeset = await products.configure(
26+
model="openai/small",
27+
columns=["name", "description"],
28+
template="Product: $name\nContext: $chunk"
29+
)
30+
print(changeset)
31+
32+
# 4. Apply (Simulated)
33+
print("\n--- Applying Configuration ---")
34+
await changeset.apply()
35+
print("✅ Configuration applied.")
36+
37+
# 5. Branching (SearchOps)
38+
print("\n--- Creating Branch ---")
39+
# interactive=False to avoid waiting for sync in this quick test
40+
branch = await products.branch("experiment-v2", model="voyage/large", interactive=False)
41+
print(f"✅ Branch created: {branch.name}")
42+
43+
# 6. Promotion
44+
print("\n--- Promoting Branch ---")
45+
await products.promote("experiment-v2")
46+
print("✅ Promotion triggered.")
47+
48+
if __name__ == "__main__":
49+
asyncio.run(run_demo())

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ dependencies = [
2424
"prometheus-client>=0.23.1",
2525
"fastapi>=0.128.0",
2626
"uvicorn>=0.40.0",
27+
"rich>=14.2.0",
28+
"tqdm>=4.67.1",
2729
]
2830

2931
[project.optional-dependencies]

src/pg_replica/client.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,8 @@ async def search(
112112
embedding=embedding,
113113
limit=limit,
114114
config=config,
115-
conn_provider=self._get_conn
115+
conn_provider=self._get_conn,
116+
target_name=target_name
116117
)
117118

118119
async def get_status(self) -> dict[str, Any]:

src/pg_replica/database.py

Lines changed: 46 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,17 @@ async def close_pools():
5656
global _source_pool, _sink_pool
5757
if _source_pool:
5858
logger.info("Closing source connection pool...")
59-
await _source_pool.close()
59+
try:
60+
await _source_pool.close()
61+
except (asyncio.CancelledError, Exception):
62+
pass
6063
_source_pool = None
6164
if _sink_pool:
6265
logger.info("Closing sink connection pool...")
63-
await _sink_pool.close()
66+
try:
67+
await _sink_pool.close()
68+
except (asyncio.CancelledError, Exception):
69+
pass
6470
_sink_pool = None
6571

6672

@@ -183,6 +189,7 @@ async def get_source_column_types(
183189
async def setup_source(settings: Settings, config: SearchPipeline, target_name: str):
184190
"""Remotely initialize the source publication."""
185191
pub_name = f"pub_{target_name}"
192+
print(f"DEBUG: setup_source called for {pub_name}")
186193
logger.info(f"Setting up remote source publication {pub_name}...")
187194

188195
# Pre-flight readiness check to avoid race conditions in tests
@@ -199,6 +206,7 @@ async def setup_source(settings: Settings, config: SearchPipeline, target_name:
199206
else ""
200207
)
201208

209+
# Ensure publication exists (idempotent check)
202210
await cur.execute(
203211
f"SELECT 1 FROM pg_publication WHERE pubname = '{pub_name}'"
204212
)
@@ -210,6 +218,7 @@ async def setup_source(settings: Settings, config: SearchPipeline, target_name:
210218
f"CREATE PUBLICATION {pub_name} FOR TABLE {config.ingest.table} ({cols}){where_clause}"
211219
)
212220
else:
221+
logger.debug(f"Publication {pub_name} exists. Updating definition...")
213222
await cur.execute(
214223
f"ALTER PUBLICATION {pub_name} SET TABLE {config.ingest.table} ({cols}){where_clause}"
215224
)
@@ -662,7 +671,7 @@ async def ensure_embedding_cache_table(settings: Settings, config: SearchPipelin
662671
)
663672

664673
async def cleanup_vectorizer_infrastructure(
665-
settings: Settings, config: SearchPipeline, vectorizer_name: str
674+
settings: Settings, config: SearchPipeline, target_name: str, vectorizer_name: str
666675
):
667676
"""Robustly clean up all infrastructure for a specific vectorizer."""
668677
logger.info(f"Robust cleanup for vectorizer {vectorizer_name}...")
@@ -679,13 +688,13 @@ async def cleanup_vectorizer_infrastructure(
679688
-- 1. Check if ANY view is using this as its target (safety)
680689
SELECT table_name INTO live_target
681690
FROM information_schema.view_table_usage
682-
WHERE view_name = '{config.ingest.table}_search'
691+
WHERE view_name = '{target_name}_search'
683692
AND table_name IN ('{vectorizer_name}', '{embedding_view}')
684693
LIMIT 1;
685694
686695
-- 2. If it's live, we MUST drop the replica view first
687696
IF live_target IS NOT NULL THEN
688-
EXECUTE 'DROP VIEW IF EXISTS ' || quote_ident('{config.ingest.table}_search') || ' CASCADE';
697+
EXECUTE 'DROP VIEW IF EXISTS ' || quote_ident('{target_name}_search') || ' CASCADE';
689698
END IF;
690699
691700
-- 3. Drop the pgai vectorizer if it exists
@@ -771,7 +780,7 @@ async def atomic_view_swap(
771780
await conn.set_autocommit(False)
772781
try:
773782
async with conn.cursor() as cur:
774-
await cur.execute(f"DROP VIEW IF EXISTS {config.ingest.table}_search")
783+
await cur.execute(f"DROP VIEW IF EXISTS {target_name}_search")
775784

776785
extra_cols = ",\n ".join([f"r.{c}" for c in config.ingest.columns if c != config.ingest.p_key])
777786
if extra_cols:
@@ -784,7 +793,7 @@ async def atomic_view_swap(
784793
logger.info("Implementing Hybrid View with RRF scoring support...")
785794
await cur.execute(
786795
f"""
787-
CREATE VIEW {config.ingest.table}_search AS
796+
CREATE VIEW {target_name}_search AS
788797
SELECT
789798
r.{config.ingest.p_key},
790799
e.chunk as chunk,
@@ -798,7 +807,7 @@ async def atomic_view_swap(
798807
# VECTOR SEARCH ONLY
799808
await cur.execute(
800809
f"""
801-
CREATE VIEW {config.ingest.table}_search AS
810+
CREATE VIEW {target_name}_search AS
802811
SELECT
803812
r.{config.ingest.p_key},
804813
e.chunk as chunk,
@@ -827,7 +836,7 @@ async def ensure_outbox_infrastructure(settings: Settings):
827836
# 1. The Outbox: Transactional log of vectorized changes
828837
await cur.execute(
829838
"""
830-
CREATE TABLE IF NOT EXISTS _sink_outbox (
839+
CREATE TABLE IF NOT EXISTS public._sink_outbox (
831840
id BIGSERIAL PRIMARY KEY,
832841
target_name TEXT NOT NULL,
833842
version_id TEXT NOT NULL,
@@ -840,7 +849,7 @@ async def ensure_outbox_infrastructure(settings: Settings):
840849
)
841850
# Index for the MirrorWorker to poll efficiently
842851
await cur.execute(
843-
"CREATE INDEX IF NOT EXISTS idx_sink_outbox_id ON _sink_outbox(id)"
852+
"CREATE INDEX IF NOT EXISTS idx_sink_outbox_id ON public._sink_outbox(id)"
844853
)
845854

846855
# 2. Mirror Registry: Track progress of external sinks
@@ -926,10 +935,10 @@ async def setup_outbox_trigger(
926935
CREATE OR REPLACE FUNCTION {trigger_fn_name}() RETURNS TRIGGER AS $$
927936
BEGIN
928937
IF (TG_OP = 'DELETE') THEN
929-
INSERT INTO _sink_outbox (target_name, version_id, source_id, action)
938+
INSERT INTO public._sink_outbox (target_name, version_id, source_id, action)
930939
VALUES ('{target_name}', '{version_id}', OLD.{config.ingest.p_key}::text, 'DELETE');
931940
ELSE
932-
INSERT INTO _sink_outbox (target_name, version_id, source_id, action, payload)
941+
INSERT INTO public._sink_outbox (target_name, version_id, source_id, action, payload)
933942
VALUES (
934943
'{target_name}',
935944
'{version_id}',
@@ -949,14 +958,21 @@ async def setup_outbox_trigger(
949958

950959
# 2. Attach Trigger to pgai STORE table
951960
# We use AFTER INSERT OR UPDATE OR DELETE
952-
await cur.execute(
953-
f"""
954-
DROP TRIGGER IF EXISTS {trigger_name} ON {vectorizer_name};
955-
CREATE TRIGGER {trigger_name}
956-
AFTER INSERT OR UPDATE OR DELETE ON {vectorizer_name}
957-
FOR EACH ROW EXECUTE FUNCTION {trigger_fn_name}();
958-
"""
959-
)
961+
# Use try/except to handle race where worker hasn't created table yet
962+
try:
963+
await cur.execute(
964+
f"""
965+
DROP TRIGGER IF EXISTS {trigger_name} ON {vectorizer_name};
966+
CREATE TRIGGER {trigger_name}
967+
AFTER INSERT OR UPDATE OR DELETE ON {vectorizer_name}
968+
FOR EACH ROW EXECUTE FUNCTION {trigger_fn_name}();
969+
"""
970+
)
971+
except Exception as e:
972+
if "does not exist" in str(e):
973+
logger.warning(f"Deferred trigger setup for {vectorizer_name}: Table not yet created by worker.")
974+
return # Retry next loop
975+
raise e
960976

961977
# 3. Backfill existing rows (Handling the Race Condition)
962978
# Since the vectorizer might have already processed rows before we attached the trigger,
@@ -1339,10 +1355,17 @@ async def find_and_fix_ghost_records(settings: Settings, config: SearchPipeline,
13391355
logger.debug(f"No records found for anti-entropy in {target_name}")
13401356
return
13411357

1342-
min_id_raw, max_id_raw = min(all_ids), max(all_ids)
13431358
source_types = await get_source_column_types(settings, config)
13441359
id_type = source_types.get(config.ingest.p_key, "TEXT")
13451360

1361+
# Ensure all IDs are of the same type for comparison
1362+
if id_type in ("INT", "BIGINT"):
1363+
all_ids = [int(x) for x in all_ids]
1364+
else:
1365+
all_ids = [str(x) for x in all_ids]
1366+
1367+
min_id_raw, max_id_raw = min(all_ids), max(all_ids)
1368+
13461369
# 2. Strategy: Set Comparison for UUIDs/Strings or Small Tables
13471370
if id_type not in ("INT", "BIGINT"):
13481371
async with await get_source_conn() as s_conn:
@@ -1405,7 +1428,7 @@ async def drop_subscription_completely(settings: Settings, config: SearchPipelin
14051428
try:
14061429
async with await connect_db(settings.resolved_sink_url) as conn:
14071430
await conn.set_autocommit(True)
1408-
await conn.execute(f"DROP VIEW IF EXISTS {config.ingest.table}_search CASCADE")
1431+
await conn.execute(f"DROP VIEW IF EXISTS {target_name}_search CASCADE")
14091432

14101433
# Retry loop for dropping subscription to handle "sync in progress"
14111434
async def try_drop_subscription():
@@ -1439,7 +1462,7 @@ async def try_drop_subscription():
14391462

14401463
# Cleanup vectorizers
14411464
async with conn.cursor() as cur:
1442-
await cur.execute("SELECT id FROM ai.vectorizer WHERE name LIKE %s", (f"{config.ingest.table}_store%",))
1465+
await cur.execute("SELECT id FROM ai.vectorizer WHERE name LIKE %s", (f"{target_name}_store%",))
14431466
for (vid,) in await cur.fetchall():
14441467
await cur.execute(f"SELECT ai.drop_vectorizer({vid}, drop_all => true)")
14451468
except Exception as e:

src/pg_replica/orchestrator.py

Lines changed: 36 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,26 @@ async def _replication_loop(self):
149149
except asyncio.TimeoutError:
150150
continue
151151

152+
async def _supervised_run(self, name: str, factory):
153+
"""Supervises a worker task, restarting it on failure."""
154+
logger.info(f"Starting supervised worker: {name}")
155+
while not self._stop_event.is_set():
156+
try:
157+
worker = factory()
158+
# Run the worker. If it returns, it finished (unexpected for long-running).
159+
# If it raises, we catch and restart.
160+
await worker.run()
161+
except asyncio.CancelledError:
162+
logger.info(f"Worker {name} cancelled.")
163+
break
164+
except Exception as e:
165+
logger.error(f"Worker {name} crashed: {e}. Restarting in 2s...", exc_info=True)
166+
try:
167+
await asyncio.sleep(2.0)
168+
except asyncio.CancelledError:
169+
break
170+
logger.info(f"Worker {name} stopped.")
171+
152172
async def start(self):
153173
"""Start all managed services."""
154174
if self.settings.sink_url == "local":
@@ -178,11 +198,23 @@ async def try_reconcile():
178198
except asyncio.TimeoutError as e:
179199
raise RuntimeError(str(e))
180200

181-
worker = Worker(db_url=self.settings.resolved_sink_url, poll_interval=timedelta(seconds=2.0))
182-
self._tasks.append(asyncio.create_task(worker.run(), name="pgai_worker"))
201+
# Supervise pgai worker
202+
self._tasks.append(asyncio.create_task(
203+
self._supervised_run(
204+
"pgai_worker",
205+
lambda: Worker(db_url=self.settings.resolved_sink_url, poll_interval=timedelta(seconds=2.0))
206+
),
207+
name="pgai_worker_supervisor"
208+
))
183209

184-
mirror_worker = MirrorWorker(self.settings)
185-
self._tasks.append(asyncio.create_task(mirror_worker.run(), name="mirror_worker"))
210+
# Supervise mirror worker
211+
self._tasks.append(asyncio.create_task(
212+
self._supervised_run(
213+
"mirror_worker",
214+
lambda: MirrorWorker(self.settings)
215+
),
216+
name="mirror_worker_supervisor"
217+
))
186218

187219
self._tasks.append(asyncio.create_task(self._replication_loop(), name="watchdog"))
188220

0 commit comments

Comments
 (0)