Skip to content

Commit 8a68b6c

Browse files
authored
feat: fork database (#185)
Fixes #184 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit * **New Features** * Admin-level database fork: create independent duplicates of databases (preserves data and attributes). Requires server >= 1.2.0. * **Tests** * Added integration tests covering successful forks, empty sources, attribute preservation, destination-exists errors, independence, and repeated forks. * **Chores** * Updated pre-commit Python version and lint configuration for tests. * **Style** * Minor test cleanup (no functional change). <!-- end of auto-generated comment: release notes by coderabbit.ai -->
1 parent 313a04e commit 8a68b6c

6 files changed

Lines changed: 282 additions & 4 deletions

File tree

.pre-commit-config.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
default_language_version:
2+
python: python3.11
3+
14
repos:
25
- repo: https://github.com/pre-commit/pre-commit-hooks
36
rev: "v6.0.0"

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ ignore = [
107107
]
108108

109109
[tool.ruff.lint.per-file-ignores]
110-
"tests/*" = ["S101"]
110+
"tests/*" = ["S101", "S608"]
111111
"src/pyseekdb/client/client_base.py" = ["S608"]
112112

113113
[tool.ruff.format]

src/pyseekdb/client/admin_client.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,29 @@ def list_databases(
8888
"""
8989
pass
9090

91+
@abstractmethod
92+
def fork_database(self, source_name: str, destination_name: str, tenant: str = DEFAULT_TENANT) -> Database:
93+
"""
94+
Fork (duplicate) a database to create a new independent copy.
95+
96+
The destination database is logically equivalent to the source database at the
97+
fork snapshot moment, containing all user tables and their data. It can be used
98+
as an independent database for subsequent read/write operations.
99+
100+
Args:
101+
source_name: source database name
102+
destination_name: destination database name (must not already exist)
103+
tenant: tenant name (for OceanBase)
104+
105+
Returns:
106+
Database object for the newly created destination database
107+
108+
Raises:
109+
ValueError: If fork is not supported (requires seekdb >= 1.2.0),
110+
or if the destination database already exists.
111+
"""
112+
pass
113+
91114

92115
class _AdminClientProxy(AdminAPI):
93116
"""
@@ -129,6 +152,10 @@ def list_databases(
129152
"""Proxy to server implementation"""
130153
return self._server.list_databases(limit=limit, offset=offset, tenant=tenant)
131154

155+
def fork_database(self, source_name: str, destination_name: str, tenant: str = DEFAULT_TENANT) -> Database:
156+
"""Proxy to server implementation"""
157+
return self._server.fork_database(source_name=source_name, destination_name=destination_name, tenant=tenant)
158+
132159
def __repr__(self):
133160
return f"<AdminClient server={self._server}>"
134161

src/pyseekdb/client/client_base.py

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,28 @@ def list_databases(
577577
logger.debug(f"✅ Found {len(databases)} databases{self._database_context(effective_tenant)}")
578578
return databases
579579

580+
def fork_database(self, source_name: str, destination_name: str, tenant: str = DEFAULT_TENANT) -> Database:
581+
"""
582+
Fork (duplicate) a database to create a new independent copy.
583+
584+
Args:
585+
source_name: source database name
586+
destination_name: destination database name (must not already exist)
587+
tenant: tenant name (for OceanBase)
588+
589+
Returns:
590+
Database object for the newly created destination database
591+
"""
592+
if not self._fork_database_enabled():
593+
raise ValueError("Fork database is not enabled (requires seekdb >= 1.2.0)")
594+
595+
effective_tenant = self._database_tenant(tenant)
596+
logger.debug(f"Forking database: {source_name} -> {destination_name}{self._database_context(effective_tenant)}")
597+
sql = f"FORK DATABASE `{source_name}` TO `{destination_name}`"
598+
self._execute(sql)
599+
logger.debug(f"✅ Successfully forked database '{source_name}' to '{destination_name}'")
600+
return self.get_database(destination_name, tenant=tenant)
601+
580602
# ==================== Collection Management (User-facing) ====================
581603

582604
def _prepare_schema_parameters( # noqa: C901
@@ -1460,12 +1482,18 @@ def _get_collection_table_name(self, collection_id: str | None, collection_name:
14601482
return CollectionNames.table_name_v2(collection_id)
14611483
return CollectionNames.table_name(collection_name)
14621484

1463-
def _fork_enabled(self) -> bool:
1485+
def _fork_table_enabled(self) -> bool:
14641486
db_type, version = self.detect_db_type_and_version()
14651487
version_110 = Version("1.1.0.0")
14661488
logger.debug(f"db_type: {db_type}, version: {version}")
14671489
return db_type.lower() == "seekdb" and version >= version_110
14681490

1491+
def _fork_database_enabled(self) -> bool:
1492+
db_type, version = self.detect_db_type_and_version()
1493+
version_120 = Version("1.2.0.0")
1494+
logger.debug(f"db_type: {db_type}, version: {version}")
1495+
return db_type.lower() == "seekdb" and version >= version_120
1496+
14691497
def _get_collection_id(self, collection_name: str) -> str:
14701498
collection_id_query_sql = f"SELECT COLLECTION_ID FROM `{CollectionNames.sdk_collections_table_name()}` WHERE COLLECTION_NAME = '{collection_name}'"
14711499
collection_id_query_result = self._execute(collection_id_query_sql)
@@ -1487,7 +1515,7 @@ def _collection_fork(self, collection: Collection, forked_name: str) -> None:
14871515
collection: Collection to fork
14881516
forked_name: Forked collection name
14891517
"""
1490-
if not self._fork_enabled():
1518+
if not self._fork_table_enabled():
14911519
raise ValueError("Fork is not enabled for this database")
14921520

14931521
_validate_collection_name(forked_name)

tests/integration_tests/test_collection_hybrid_search_source_inference.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def _insert_test_data(self, collection, dimension: int):
7070
def _get_sql_query(self, client, table_name: str, search_parm: dict) -> str:
7171
search_parm_json = json.dumps(search_parm, ensure_ascii=False)
7272
client._server._execute(f"SET @search_parm = '{escape_string(search_parm_json)}'")
73-
get_sql_query = f"SELECT DBMS_HYBRID_SEARCH.GET_SQL('{table_name}', @search_parm) as query_sql FROM dual" # noqa: S608
73+
get_sql_query = f"SELECT DBMS_HYBRID_SEARCH.GET_SQL('{table_name}', @search_parm) as query_sql FROM dual"
7474
rows = client._server._execute(get_sql_query)
7575
assert rows and rows[0].get("query_sql")
7676
query_sql = rows[0]["query_sql"]
Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
"""
2+
Integration tests for AdminClient.fork_database method.
3+
4+
Tests the fork_database functionality against real databases, including:
5+
- Successful fork operations
6+
- Empty database fork
7+
- Fork preserves database attributes (charset, collation)
8+
- Fork creates independent databases
9+
- Error handling when destination already exists
10+
- Error handling when fork is not enabled
11+
"""
12+
13+
import contextlib
14+
import logging
15+
import time
16+
17+
import pytest
18+
19+
logger = logging.getLogger(__name__)
20+
21+
22+
class TestDatabaseFork:
23+
"""Tests for AdminClient.fork_database() using real database connections."""
24+
25+
def _is_fork_database_enabled(self, admin_client) -> bool:
26+
"""Check if fork_database is enabled for the given admin client."""
27+
try:
28+
return admin_client._server._fork_database_enabled()
29+
except Exception:
30+
logger.exception("Failed to check if fork_database is enabled")
31+
return False
32+
33+
def _unique_name(self, prefix: str) -> str:
34+
return f"{prefix}_{int(time.time() * 1000)}"
35+
36+
def test_fork_database_success(self, admin_client):
37+
"""
38+
Test successful fork_database operation.
39+
40+
Automatically runs for: embedded, server, oceanbase
41+
Skips if fork_database is not enabled.
42+
"""
43+
if not self._is_fork_database_enabled(admin_client):
44+
pytest.skip("Fork database is not enabled for this database")
45+
46+
source_name = self._unique_name("test_forkdb_src")
47+
dest_name = self._unique_name("test_forkdb_dst")
48+
49+
try:
50+
admin_client.create_database(source_name)
51+
52+
forked_db = admin_client.fork_database(source_name, dest_name)
53+
54+
assert forked_db is not None
55+
assert forked_db.name == dest_name
56+
57+
retrieved_db = admin_client.get_database(dest_name)
58+
assert retrieved_db.name == dest_name
59+
60+
finally:
61+
with contextlib.suppress(Exception):
62+
admin_client.delete_database(dest_name)
63+
with contextlib.suppress(Exception):
64+
admin_client.delete_database(source_name)
65+
66+
def test_fork_empty_database(self, admin_client):
67+
"""
68+
Test fork of an empty database (no tables).
69+
70+
Automatically runs for: embedded, server, oceanbase
71+
Skips if fork_database is not enabled.
72+
"""
73+
if not self._is_fork_database_enabled(admin_client):
74+
pytest.skip("Fork database is not enabled for this database")
75+
76+
source_name = self._unique_name("test_forkdb_empty_src")
77+
dest_name = self._unique_name("test_forkdb_empty_dst")
78+
79+
try:
80+
admin_client.create_database(source_name)
81+
82+
forked_db = admin_client.fork_database(source_name, dest_name)
83+
assert forked_db is not None
84+
assert forked_db.name == dest_name
85+
86+
finally:
87+
with contextlib.suppress(Exception):
88+
admin_client.delete_database(dest_name)
89+
with contextlib.suppress(Exception):
90+
admin_client.delete_database(source_name)
91+
92+
def test_fork_database_preserves_attributes(self, admin_client):
93+
"""
94+
Test that fork preserves database attributes (charset, collation).
95+
96+
Automatically runs for: embedded, server, oceanbase
97+
Skips if fork_database is not enabled.
98+
"""
99+
if not self._is_fork_database_enabled(admin_client):
100+
pytest.skip("Fork database is not enabled for this database")
101+
102+
source_name = self._unique_name("test_forkdb_attr_src")
103+
dest_name = self._unique_name("test_forkdb_attr_dst")
104+
105+
try:
106+
admin_client.create_database(source_name)
107+
source_db = admin_client.get_database(source_name)
108+
109+
forked_db = admin_client.fork_database(source_name, dest_name)
110+
111+
assert forked_db.charset == source_db.charset
112+
assert forked_db.collation == source_db.collation
113+
114+
finally:
115+
with contextlib.suppress(Exception):
116+
admin_client.delete_database(dest_name)
117+
with contextlib.suppress(Exception):
118+
admin_client.delete_database(source_name)
119+
120+
def test_fork_database_destination_already_exists(self, admin_client):
121+
"""
122+
Test that forking to an existing database raises an error.
123+
124+
Automatically runs for: embedded, server, oceanbase
125+
Skips if fork_database is not enabled.
126+
"""
127+
if not self._is_fork_database_enabled(admin_client):
128+
pytest.skip("Fork database is not enabled for this database")
129+
130+
source_name = self._unique_name("test_forkdb_dup_src")
131+
dest_name = self._unique_name("test_forkdb_dup_dst")
132+
133+
try:
134+
admin_client.create_database(source_name)
135+
admin_client.create_database(dest_name)
136+
137+
with pytest.raises(ValueError):
138+
admin_client.fork_database(source_name, dest_name)
139+
140+
finally:
141+
with contextlib.suppress(Exception):
142+
admin_client.delete_database(dest_name)
143+
with contextlib.suppress(Exception):
144+
admin_client.delete_database(source_name)
145+
146+
def test_fork_database_independent_operations(self, admin_client):
147+
"""
148+
Test that forked database is independent from the source.
149+
150+
Creates a table in source before fork, then verifies that modifications
151+
to the forked database do not affect the source and vice versa.
152+
153+
Automatically runs for: embedded, server, oceanbase
154+
Skips if fork_database is not enabled.
155+
"""
156+
if not self._is_fork_database_enabled(admin_client):
157+
pytest.skip("Fork database is not enabled for this database")
158+
159+
source_name = self._unique_name("test_forkdb_indep_src")
160+
dest_name = self._unique_name("test_forkdb_indep_dst")
161+
162+
try:
163+
admin_client.create_database(source_name)
164+
165+
admin_client._server._execute(f"CREATE TABLE `{source_name}`.`t1` (id INT PRIMARY KEY, val VARCHAR(100))")
166+
admin_client._server._execute(f"INSERT INTO `{source_name}`.`t1` VALUES (1, 'original')")
167+
168+
admin_client.fork_database(source_name, dest_name)
169+
170+
admin_client._server._execute(f"INSERT INTO `{dest_name}`.`t1` VALUES (2, 'forked_only')")
171+
172+
source_rows = admin_client._server._execute(f"SELECT COUNT(*) as cnt FROM `{source_name}`.`t1`")
173+
dest_rows = admin_client._server._execute(f"SELECT COUNT(*) as cnt FROM `{dest_name}`.`t1`")
174+
175+
source_count = source_rows[0]["cnt"] if isinstance(source_rows[0], dict) else source_rows[0][0]
176+
dest_count = dest_rows[0]["cnt"] if isinstance(dest_rows[0], dict) else dest_rows[0][0]
177+
178+
assert source_count == 1, f"Source should have 1 row, got {source_count}"
179+
assert dest_count == 2, f"Destination should have 2 rows, got {dest_count}"
180+
181+
finally:
182+
with contextlib.suppress(Exception):
183+
admin_client.delete_database(dest_name)
184+
with contextlib.suppress(Exception):
185+
admin_client.delete_database(source_name)
186+
187+
def test_fork_database_multiple_times(self, admin_client):
188+
"""
189+
Test that the same source database can be forked multiple times.
190+
191+
Automatically runs for: embedded, server, oceanbase
192+
Skips if fork_database is not enabled.
193+
"""
194+
if not self._is_fork_database_enabled(admin_client):
195+
pytest.skip("Fork database is not enabled for this database")
196+
197+
source_name = self._unique_name("test_forkdb_multi_src")
198+
dest_name_1 = self._unique_name("test_forkdb_multi_d1")
199+
dest_name_2 = self._unique_name("test_forkdb_multi_d2")
200+
201+
try:
202+
admin_client.create_database(source_name)
203+
204+
forked_1 = admin_client.fork_database(source_name, dest_name_1)
205+
forked_2 = admin_client.fork_database(source_name, dest_name_2)
206+
207+
assert forked_1.name == dest_name_1
208+
assert forked_2.name == dest_name_2
209+
210+
finally:
211+
with contextlib.suppress(Exception):
212+
admin_client.delete_database(dest_name_2)
213+
with contextlib.suppress(Exception):
214+
admin_client.delete_database(dest_name_1)
215+
with contextlib.suppress(Exception):
216+
admin_client.delete_database(source_name)
217+
218+
219+
if __name__ == "__main__":
220+
pytest.main([__file__, "-v", "-s"])

0 commit comments

Comments
 (0)