|
11 | 11 | import math |
12 | 12 | import warnings |
13 | 13 | from pathlib import Path |
14 | | -from typing import Callable, Dict, Iterable, List, Literal, Optional, Union |
| 14 | +from typing import Any, Callable, Dict, Iterable, List, Literal, Optional, Union |
15 | 15 |
|
16 | 16 | import pyarrow as pa |
17 | 17 |
|
@@ -182,6 +182,7 @@ def __init__( |
182 | 182 | dataset: Union[torch.utils.data.Dataset, str, Path], |
183 | 183 | batch_size: int, |
184 | 184 | *args, |
| 185 | + dataset_options: Optional[Dict[str, Any]] = None, |
185 | 186 | columns: Optional[Union[List[str], Dict[str, str]]] = None, |
186 | 187 | filter: Optional[str] = None, |
187 | 188 | samples: Optional[int] = 0, |
@@ -237,7 +238,8 @@ def __init__( |
237 | 238 | """ |
238 | 239 | super().__init__() |
239 | 240 | if isinstance(dataset, (str, Path)): |
240 | | - dataset = lance.dataset(dataset) |
| 241 | + dataset_options = dataset_options or {} |
| 242 | + dataset = lance.dataset(dataset, **dataset_options) |
241 | 243 | self.dataset = dataset |
242 | 244 | self.columns = columns |
243 | 245 | self.batch_size = batch_size |
@@ -378,16 +380,18 @@ def _blob_columns(self) -> List[str]: |
378 | 380 |
|
379 | 381 |
|
380 | 382 | class SafeLanceDataset(torch.utils.data.Dataset): |
381 | | - def __init__(self, uri): |
| 383 | + def __init__(self, uri, *, dataset_options=None, **kwargs): |
| 384 | + super().__init__(**kwargs) |
382 | 385 | self.uri = uri |
| 386 | + self.dataset_options = dataset_options or {} |
383 | 387 | self._len = self._safe_preload() |
384 | | - self._ds = None # Deferred initialization |
| 388 | + self._ds = None |
385 | 389 |
|
386 | 390 | def _safe_preload(self): |
387 | 391 | """Main-process safe metadata loading""" |
388 | | - ds = lance.dataset(self.uri) |
| 392 | + ds = lance.dataset(self.uri, **self.dataset_options) |
389 | 393 | length = ds.count_rows() |
390 | | - del ds # Critical: release before spawning |
| 394 | + del ds |
391 | 395 | return length |
392 | 396 |
|
393 | 397 | def __len__(self): |
|
0 commit comments