Skip to content

Commit 2ceb0fa

Browse files
authored
feat: upgrade to 0.0.7 (#206)
1 parent 8022160 commit 2ceb0fa

19 files changed

Lines changed: 171 additions & 38 deletions

File tree

README.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
# Lance Namespace
22

3-
![logo](./docs/src/logo/wide.png)
4-
53
**Lance Namespace** is an open specification on top of the storage-based Lance data format
64
to standardize access to a collection of Lance tables (a.k.a. Lance datasets).
75
It describes how a metadata service like Apache Hive MetaStore (HMS),

python/Makefile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
# See the License for the specific language governing permissions and
1111
# limitations under the License.
1212

13-
VERSION = 0.0.6
13+
VERSION = 0.0.7
1414

1515
.PHONY: clean-urllib3-client
1616
clean-urllib3-client:
@@ -38,7 +38,7 @@ build-urllib3-client: gen-urllib3-client
3838
.PHONY: build-ns
3939
build-ns: build-urllib3-client
4040
cd lance_namespace; \
41-
uv pip install ".[test]"
41+
uv pip install ".[glue,hive2,test]"
4242
uv run pytest
4343

4444
.PHONY: clean

python/lance_namespace/README.md

Whitespace-only changes.

python/lance_namespace/pyproject.toml

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
[project]
22
name = "lance-namespace"
3-
version = "0.0.6"
3+
version = "0.0.7"
44
description = "Python client for Lance Namespace API"
5-
readme = "README.md"
6-
authors = [
7-
{ name = "Jack Ye", email = "yezhaoqin@gmail.com" }
8-
]
5+
readme = "../../README.md"
6+
license = {file = "../../LICENSE"}
7+
authors = [{ name = "LanceDB Devs", email = "dev@lancedb.com" }]
98
requires-python = ">=3.9"
109
dependencies = [
1110
"lance-namespace-urllib3-client",

python/lance_namespace/src/lance_namespace/glue.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,14 @@ def __init__(self, **properties):
133133
)
134134

135135
self.config = GlueNamespaceConfig(properties)
136-
self.glue = self._initialize_glue_client()
136+
self._glue = None # Lazy initialization to support pickling
137+
138+
@property
139+
def glue(self):
140+
"""Get the Glue client, initializing it if necessary."""
141+
if self._glue is None:
142+
self._glue = self._initialize_glue_client()
143+
return self._glue
137144

138145
def _initialize_glue_client(self):
139146
"""Initialize the AWS Glue client."""
@@ -634,6 +641,18 @@ def _convert_pyarrow_type_to_glue_type(self, arrow_type: pa.DataType) -> str:
634641
# Default to string for unknown types
635642
return 'string'
636643

644+
def __getstate__(self):
645+
"""Prepare instance for pickling by excluding unpickleable objects."""
646+
state = self.__dict__.copy()
647+
# Remove the unpickleable Glue client
648+
state['_glue'] = None
649+
return state
650+
651+
def __setstate__(self, state):
652+
"""Restore instance from pickled state."""
653+
self.__dict__.update(state)
654+
# The Glue client will be re-initialized lazily via the property
655+
637656

638657

639658
class GlueNamespaceConfig:
@@ -663,6 +682,9 @@ def __init__(self, properties: Optional[Dict[str, str]] = None):
663682
if properties is None:
664683
properties = {}
665684

685+
# Store raw properties for pickling support
686+
self._properties = properties.copy()
687+
666688
self._catalog_id = properties.get(self.CATALOG_ID)
667689
self._endpoint = properties.get(self.ENDPOINT)
668690
self._region = properties.get(self.REGION)
@@ -733,4 +755,9 @@ def root(self) -> Optional[str]:
733755
@property
734756
def storage_options(self) -> Dict[str, str]:
735757
"""Get the storage configuration properties."""
736-
return self._storage_options.copy()
758+
return self._storage_options.copy()
759+
760+
@property
761+
def properties(self) -> Dict[str, str]:
762+
"""Get the raw properties dictionary."""
763+
return self._properties.copy()

python/lance_namespace/src/lance_namespace/hive.py

Lines changed: 36 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -203,8 +203,18 @@ def __init__(self, **properties):
203203
# Extract storage properties
204204
self.storage_properties = {k[8:]: v for k, v in properties.items() if k.startswith("storage.")}
205205

206-
# Create client
207-
self._client = HiveMetastoreClient(self.uri, self.ugi)
206+
# Store properties for pickling support
207+
self._properties = properties.copy()
208+
209+
# Lazy initialization to support pickling
210+
self._client = None
211+
212+
@property
213+
def client(self):
214+
"""Get the Hive client, initializing it if necessary."""
215+
if self._client is None:
216+
self._client = HiveMetastoreClient(self.uri, self.ugi)
217+
return self._client
208218

209219
def _normalize_identifier(self, identifier: List[str]) -> tuple:
210220
"""Normalize identifier to (database, table) tuple."""
@@ -231,7 +241,7 @@ def list_namespaces(self, request: ListNamespacesRequest) -> ListNamespacesRespo
231241
# Non-root namespaces don't have children in Hive2
232242
return ListNamespacesResponse(namespaces=[])
233243

234-
with self._client as client:
244+
with self.client as client:
235245
databases = client.get_all_databases()
236246
# Return just database names as strings (excluding default)
237247
namespaces = [db for db in databases if db != "default"]
@@ -259,7 +269,7 @@ def describe_namespace(self, request: DescribeNamespaceRequest) -> DescribeNames
259269

260270
database_name = request.id[0]
261271

262-
with self._client as client:
272+
with self.client as client:
263273
database = client.get_database(database_name)
264274

265275
properties = {}
@@ -309,7 +319,7 @@ def create_namespace(self, request: CreateNamespaceRequest) -> CreateNamespaceRe
309319
if k not in ["comment", "owner", "location"]
310320
}
311321

312-
with self._client as client:
322+
with self.client as client:
313323
client.create_database(database)
314324

315325
return CreateNamespaceResponse()
@@ -331,7 +341,7 @@ def drop_namespace(self, request: DropNamespaceRequest) -> DropNamespaceResponse
331341

332342
database_name = request.id[0]
333343

334-
with self._client as client:
344+
with self.client as client:
335345
# Check if database is empty
336346
tables = client.get_all_tables(database_name)
337347
cascade = request.behavior == "CASCADE" if request.behavior else False
@@ -360,7 +370,7 @@ def namespace_exists(self, request: NamespaceExistsRequest) -> None:
360370

361371
database_name = request.id[0]
362372

363-
with self._client as client:
373+
with self.client as client:
364374
client.get_database(database_name)
365375
except Exception as e:
366376
if NoSuchObjectException and isinstance(e, NoSuchObjectException):
@@ -380,7 +390,7 @@ def list_tables(self, request: ListTablesRequest) -> ListTablesResponse:
380390

381391
database_name = request.id[0]
382392

383-
with self._client as client:
393+
with self.client as client:
384394
table_names = client.get_all_tables(database_name)
385395

386396
# Filter for Lance tables if needed
@@ -410,7 +420,7 @@ def describe_table(self, request: DescribeTableRequest) -> DescribeTableResponse
410420
try:
411421
database, table_name = self._normalize_identifier(request.id)
412422

413-
with self._client as client:
423+
with self.client as client:
414424
table = client.get_table(database, table_name)
415425

416426
# Check if it's a Lance table (case insensitive)
@@ -531,7 +541,7 @@ def register_table(self, request: RegisterTableRequest) -> RegisterTableResponse
531541
if k not in [TABLE_TYPE_KEY, MANAGED_BY_KEY, VERSION_KEY]:
532542
hive_table.parameters[k] = v
533543

534-
with self._client as client:
544+
with self.client as client:
535545
client.create_table(hive_table)
536546

537547
return RegisterTableResponse(
@@ -549,7 +559,7 @@ def table_exists(self, request: TableExistsRequest) -> None:
549559
try:
550560
database, table_name = self._normalize_identifier(request.id)
551561

552-
with self._client as client:
562+
with self.client as client:
553563
table = client.get_table(database, table_name)
554564

555565
# Check if it's a Lance table (case insensitive)
@@ -569,7 +579,7 @@ def drop_table(self, request: DropTableRequest) -> DropTableResponse:
569579
try:
570580
database, table_name = self._normalize_identifier(request.id)
571581

572-
with self._client as client:
582+
with self.client as client:
573583
# Get table to check if it's a Lance table
574584
table = client.get_table(database, table_name)
575585

@@ -595,7 +605,7 @@ def deregister_table(self, request: DeregisterTableRequest) -> DeregisterTableRe
595605
try:
596606
database, table_name = self._normalize_identifier(request.id)
597607

598-
with self._client as client:
608+
with self.client as client:
599609
# Get table to check if it's a Lance table
600610
table = client.get_table(database, table_name)
601611

@@ -710,4 +720,16 @@ def _pyarrow_type_to_hive_type(self, dtype: pa.DataType) -> str:
710720
field_strs.append(f"{field.name}:{field_type}")
711721
return f"struct<{','.join(field_strs)}>"
712722
else:
713-
return "string" # Default to string for unknown types
723+
return "string" # Default to string for unknown types
724+
725+
def __getstate__(self):
726+
"""Prepare instance for pickling by excluding unpickleable objects."""
727+
state = self.__dict__.copy()
728+
# Remove the unpickleable Hive client
729+
state['_client'] = None
730+
return state
731+
732+
def __setstate__(self, state):
733+
"""Restore instance from pickled state."""
734+
self.__dict__.update(state)
735+
# The Hive client will be re-initialized lazily via the property

python/lance_namespace/tests/test_glue.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -594,3 +594,41 @@ def test_pyarrow_schema_to_glue_columns(self, glue_namespace):
594594
assert columns[2] == {'Name': 'scores', 'Type': 'array<float>'}
595595
assert columns[3] == {'Name': 'metadata', 'Type': 'struct<created:timestamp,version:int>'}
596596

597+
def test_pickle_support(self, mock_boto3):
598+
"""Test that GlueNamespace can be pickled and unpickled for Ray compatibility."""
599+
import pickle
600+
601+
# Create a GlueNamespace instance
602+
properties = {
603+
'region': 'us-east-1',
604+
'catalog_id': '123456789012',
605+
'endpoint': 'https://glue.example.com',
606+
'storage.access_key_id': 'test-key',
607+
'storage.secret_access_key': 'test-secret'
608+
}
609+
namespace = GlueNamespace(**properties)
610+
611+
# Test pickling
612+
pickled = pickle.dumps(namespace)
613+
assert pickled is not None
614+
615+
# Test unpickling
616+
restored = pickle.loads(pickled)
617+
assert isinstance(restored, GlueNamespace)
618+
619+
# Verify configuration is preserved
620+
assert restored.config.region == 'us-east-1'
621+
assert restored.config.catalog_id == '123456789012'
622+
assert restored.config.endpoint == 'https://glue.example.com'
623+
assert restored.config.storage_options['access_key_id'] == 'test-key'
624+
assert restored.config.storage_options['secret_access_key'] == 'test-secret'
625+
626+
# Verify glue client is None after unpickling (will be lazily initialized)
627+
assert restored._glue is None
628+
629+
# Test that glue client can be re-initialized after unpickling
630+
# This will create a new mock client when accessed
631+
client = restored.glue
632+
assert client is not None
633+
assert restored._glue is not None
634+

python/lance_namespace/tests/test_hive.py

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,12 @@ def test_initialization(self):
6262
assert namespace.uri == "thrift://localhost:9083"
6363
assert namespace.root == "/tmp/warehouse"
6464
assert namespace.ugi == "user:group1,group2"
65+
66+
# Client should not be initialized yet (lazy loading)
67+
mock_client.assert_not_called()
68+
69+
# Access the client property to trigger initialization
70+
_ = namespace.client
6571
mock_client.assert_called_once_with("thrift://localhost:9083", "user:group1,group2")
6672

6773
def test_initialization_without_hive_deps(self):
@@ -428,4 +434,47 @@ def test_root_namespace_operations(self, hive_namespace):
428434
# Test drop_namespace for root (should fail)
429435
request = DropNamespaceRequest(id=[])
430436
with pytest.raises(ValueError, match="Cannot drop root namespace"):
431-
hive_namespace.drop_namespace(request)
437+
hive_namespace.drop_namespace(request)
438+
439+
def test_pickle_support(self):
440+
"""Test that Hive2Namespace can be pickled and unpickled for Ray compatibility."""
441+
import pickle
442+
443+
with patch("lance_namespace.hive.HIVE_AVAILABLE", True):
444+
with patch("lance_namespace.hive.HiveMetastoreClient"):
445+
# Create a Hive2Namespace instance
446+
namespace = connect("hive2", {
447+
"uri": "thrift://localhost:9083",
448+
"root": "/tmp/warehouse",
449+
"ugi": "user:group1,group2",
450+
"client.pool-size": "5",
451+
"storage.access_key_id": "test-key",
452+
"storage.secret_access_key": "test-secret"
453+
})
454+
455+
# Test pickling
456+
pickled = pickle.dumps(namespace)
457+
assert pickled is not None
458+
459+
# Test unpickling
460+
restored = pickle.loads(pickled)
461+
assert isinstance(restored, namespace.__class__)
462+
463+
# Verify configuration is preserved
464+
assert restored.uri == "thrift://localhost:9083"
465+
assert restored.root == "/tmp/warehouse"
466+
assert restored.ugi == "user:group1,group2"
467+
assert restored.pool_size == 5
468+
assert restored.storage_properties["access_key_id"] == "test-key"
469+
assert restored.storage_properties["secret_access_key"] == "test-secret"
470+
471+
# Verify client is None after unpickling (will be lazily initialized)
472+
assert restored._client is None
473+
474+
# Test that client can be re-initialized after unpickling
475+
with patch("lance_namespace.hive.HiveMetastoreClient") as mock_client:
476+
# This will create a new mock client when accessed
477+
client = restored.client
478+
assert client is not None
479+
assert restored._client is not None
480+
mock_client.assert_called_once_with("thrift://localhost:9083", "user:group1,group2")

python/lance_namespace_urllib3_client/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ See https://lancedb.github.io/lance-namespace/spec/impls/rest for more details.
1313
This Python package is automatically generated by the [OpenAPI Generator](https://openapi-generator.tech) project:
1414

1515
- API version: 1.0.0
16-
- Package version: 0.0.6
16+
- Package version: 0.0.7
1717
- Generator version: 7.12.0
1818
- Build package: org.openapitools.codegen.languages.PythonClientCodegen
1919

python/lance_namespace_urllib3_client/lance_namespace_urllib3_client/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
""" # noqa: E501
1515

1616

17-
__version__ = "0.0.6"
17+
__version__ = "0.0.7"
1818

1919
# import apis into sdk package
2020
from lance_namespace_urllib3_client.api.data_api import DataApi

0 commit comments

Comments
 (0)