Skip to content

Commit 1575451

Browse files
committed
feat: support remote storage options
1 parent c32ecae commit 1575451

8 files changed

Lines changed: 327 additions & 29 deletions

File tree

README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Key motivations inspired by the broader Lance roadmap<sup>[1](https://github.com
1717

1818
- Unified schema for agent messages (`ContextRecord`) with optional embeddings and metadata.
1919
- Automatic versioning via Lance manifests with `checkout(version)` support.
20+
- Remote persistence: point the store at `s3://` URIs with either AWS environment variables or explicit credentials/endpoint overrides.
2021
- Python API (`lance_context.api.Context`) aligned with the Rust implementation.
2122
- Integration tests that exercise real persistence, image serialization, and version rollbacks.
2223

@@ -65,6 +66,17 @@ ctx.add("assistant", "Let me fetch suggestions…")
6566
ctx.checkout(first_version)
6667

6768
print("Entries after checkout:", ctx.entries())
69+
70+
# Store context in S3 (e.g., for MinIO/moto test endpoints)
71+
ctx = Context.create(
72+
"s3://my-bucket/context.lance",
73+
aws_access_key_id="minioadmin",
74+
aws_secret_access_key="minioadmin",
75+
region="us-east-1",
76+
endpoint_url="http://localhost:9000",
77+
allow_http=True,
78+
)
79+
# AWS_* environment variables work too—pass overrides only when you need custom endpoints.
6880
```
6981

7082
### Rust

crates/lance-context-core/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@ mod store;
77

88
pub use context::{Context, ContextEntry, Snapshot};
99
pub use record::{ContextRecord, SearchResult, StateMetadata};
10-
pub use store::ContextStore;
10+
pub use store::{ContextStore, ContextStoreOptions};

crates/lance-context-core/src/store.rs

Lines changed: 65 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use std::collections::HashMap;
12
use std::sync::Arc;
23

34
use arrow_array::builder::{
@@ -13,7 +14,8 @@ use arrow_array::{
1314
use arrow_schema::{ArrowError, DataType, Field, FieldRef, Schema, TimeUnit};
1415
use chrono::DateTime;
1516
use futures::TryStreamExt;
16-
use lance::dataset::{Dataset, WriteMode, WriteParams};
17+
use lance::dataset::{builder::DatasetBuilder, Dataset, WriteMode, WriteParams};
18+
use lance::io::ObjectStoreParams;
1719
use lance::{Error as LanceError, Result as LanceResult};
1820

1921
use crate::record::{ContextRecord, SearchResult, StateMetadata};
@@ -28,23 +30,32 @@ pub struct ContextStore {
2830
dataset: Dataset,
2931
}
3032

33+
/// Additional configuration when opening a [`ContextStore`].
34+
#[derive(Debug, Clone, Default)]
35+
pub struct ContextStoreOptions {
36+
pub storage_options: Option<HashMap<String, String>>,
37+
}
38+
39+
impl ContextStoreOptions {
40+
#[must_use]
41+
pub fn storage_options(&self) -> Option<HashMap<String, String>> {
42+
self.storage_options.clone()
43+
}
44+
}
45+
3146
impl ContextStore {
3247
/// Open an existing context dataset or create a new one with the project schema.
3348
pub async fn open(uri: &str) -> LanceResult<Self> {
34-
match Dataset::open(uri).await {
49+
Self::open_with_options(uri, ContextStoreOptions::default()).await
50+
}
51+
52+
/// Open a dataset with explicit object store configuration (e.g. S3 credentials).
53+
pub async fn open_with_options(uri: &str, options: ContextStoreOptions) -> LanceResult<Self> {
54+
let storage_options = options.storage_options();
55+
match Self::load_with_options(uri, storage_options.clone()).await {
3556
Ok(dataset) => Ok(Self { dataset }),
3657
Err(LanceError::DatasetNotFound { .. }) => {
37-
let schema = Arc::new(Self::schema());
38-
let empty_batch = RecordBatch::new_empty(schema.clone());
39-
let batches = RecordBatchIterator::new(
40-
vec![Ok::<RecordBatch, ArrowError>(empty_batch)].into_iter(),
41-
schema.clone(),
42-
);
43-
let params = WriteParams {
44-
mode: WriteMode::Create,
45-
..Default::default()
46-
};
47-
let dataset = Dataset::write(batches, uri, Some(params)).await?;
58+
let dataset = Self::create_with_options(uri, storage_options).await?;
4859
Ok(Self { dataset })
4960
}
5061
Err(err) => Err(err),
@@ -156,6 +167,47 @@ impl ContextStore {
156167
])
157168
}
158169

170+
async fn load_with_options(
171+
uri: &str,
172+
storage_options: Option<HashMap<String, String>>,
173+
) -> LanceResult<Dataset> {
174+
if let Some(options) = storage_options {
175+
DatasetBuilder::from_uri(uri)
176+
.with_storage_options(options)
177+
.load()
178+
.await
179+
} else {
180+
Dataset::open(uri).await
181+
}
182+
}
183+
184+
async fn create_with_options(
185+
uri: &str,
186+
storage_options: Option<HashMap<String, String>>,
187+
) -> LanceResult<Dataset> {
188+
let schema = Arc::new(Self::schema());
189+
let empty_batch = RecordBatch::new_empty(schema.clone());
190+
let batches = RecordBatchIterator::new(
191+
vec![Ok::<RecordBatch, ArrowError>(empty_batch)].into_iter(),
192+
schema.clone(),
193+
);
194+
195+
let mut params = WriteParams {
196+
mode: WriteMode::Create,
197+
..Default::default()
198+
};
199+
200+
if let Some(options) = storage_options {
201+
let store_params = ObjectStoreParams {
202+
storage_options: Some(options),
203+
..Default::default()
204+
};
205+
params.store_params = Some(store_params);
206+
}
207+
208+
Dataset::write(batches, uri, Some(params)).await
209+
}
210+
159211
fn records_to_batch(entries: &[ContextRecord]) -> LanceResult<RecordBatch> {
160212
let mut id_builder = StringBuilder::new();
161213
let mut run_id_builder = StringBuilder::new();

python/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ requires = ["maturin>=1.4"]
4141
build-backend = "maturin"
4242

4343
[project.optional-dependencies]
44-
tests = ["pytest", "ruff"]
44+
tests = ["pytest", "ruff", "moto[s3]", "boto3", "botocore"]
4545
dev = ["ruff", "pyright"]
4646

4747
[tool.ruff]

python/python/lance_context/api.py

Lines changed: 52 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -126,12 +126,60 @@ def _normalize_search_hit(raw: dict[str, Any]) -> dict[str, Any]:
126126

127127

128128
class Context:
129-
def __init__(self, uri: str) -> None:
130-
self._inner = _Context.create(uri)
129+
def __init__(
130+
self,
131+
uri: str,
132+
*,
133+
storage_options: dict[str, Any] | None = None,
134+
aws_access_key_id: str | None = None,
135+
aws_secret_access_key: str | None = None,
136+
aws_session_token: str | None = None,
137+
region: str | None = None,
138+
endpoint_url: str | None = None,
139+
allow_http: bool = False,
140+
) -> None:
141+
options = dict(storage_options or {})
142+
if aws_access_key_id is not None:
143+
options["aws_access_key_id"] = aws_access_key_id
144+
if aws_secret_access_key is not None:
145+
options["aws_secret_access_key"] = aws_secret_access_key
146+
if aws_session_token is not None:
147+
options["aws_session_token"] = aws_session_token
148+
if region is not None:
149+
options["aws_region"] = region
150+
if endpoint_url is not None:
151+
options["aws_endpoint_url"] = endpoint_url
152+
if allow_http:
153+
options["aws_allow_http"] = True
154+
155+
if options:
156+
self._inner = _Context.create(uri, storage_options=options)
157+
else:
158+
self._inner = _Context.create(uri)
131159

132160
@classmethod
133-
def create(cls, uri: str) -> Context:
134-
return cls(uri)
161+
def create(
162+
cls,
163+
uri: str,
164+
*,
165+
storage_options: dict[str, Any] | None = None,
166+
aws_access_key_id: str | None = None,
167+
aws_secret_access_key: str | None = None,
168+
aws_session_token: str | None = None,
169+
region: str | None = None,
170+
endpoint_url: str | None = None,
171+
allow_http: bool = False,
172+
) -> Context:
173+
return cls(
174+
uri,
175+
storage_options=storage_options,
176+
aws_access_key_id=aws_access_key_id,
177+
aws_secret_access_key=aws_secret_access_key,
178+
aws_session_token=aws_session_token,
179+
region=region,
180+
endpoint_url=endpoint_url,
181+
allow_http=allow_http,
182+
)
135183

136184
def uri(self) -> str:
137185
return self._inner.uri()

python/src/lib.rs

Lines changed: 52 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use std::collections::HashMap;
12
use std::sync::Arc;
23

34
use chrono::{SecondsFormat, Utc};
@@ -8,7 +9,9 @@ use pyo3::IntoPyObject;
89
use tokio::runtime::Runtime;
910

1011
use lance_context::serde::CONTENT_TYPE_TEXT;
11-
use lance_context::{Context as RustContext, ContextRecord, ContextStore, SearchResult};
12+
use lance_context::{
13+
Context as RustContext, ContextRecord, ContextStore, ContextStoreOptions, SearchResult,
14+
};
1215

1316
const DEFAULT_BINARY_CONTENT_TYPE: &str = "application/octet-stream";
1417
const BINARY_PLACEHOLDER: &str = "[binary]";
@@ -26,13 +29,59 @@ struct Context {
2629
run_id: String,
2730
}
2831

32+
fn storage_options_from_dict<'py>(
33+
dict: Option<&Bound<'py, PyDict>>,
34+
) -> PyResult<Option<HashMap<String, String>>> {
35+
let Some(dict) = dict else {
36+
return Ok(None);
37+
};
38+
39+
let mut options = HashMap::new();
40+
for (key, value) in dict.iter() {
41+
let key_str = key.extract::<String>()?;
42+
if value.is_none() {
43+
continue;
44+
}
45+
let string_value = if let Ok(boolean) = value.extract::<bool>() {
46+
if boolean {
47+
"true".to_string()
48+
} else {
49+
"false".to_string()
50+
}
51+
} else if let Ok(number) = value.extract::<i64>() {
52+
number.to_string()
53+
} else if let Ok(float_val) = value.extract::<f64>() {
54+
float_val.to_string()
55+
} else {
56+
value.str()?.to_string()
57+
};
58+
options.insert(key_str, string_value);
59+
}
60+
61+
if options.is_empty() {
62+
Ok(None)
63+
} else {
64+
Ok(Some(options))
65+
}
66+
}
67+
2968
#[pymethods]
3069
impl Context {
3170
#[classmethod]
32-
fn create(_cls: &Bound<'_, PyType>, uri: &str) -> PyResult<Self> {
71+
#[pyo3(signature = (uri, *, storage_options=None))]
72+
fn create(
73+
_cls: &Bound<'_, PyType>,
74+
uri: &str,
75+
storage_options: Option<&Bound<'_, PyDict>>,
76+
) -> PyResult<Self> {
3377
let runtime = Arc::new(Runtime::new().map_err(to_py_err)?);
78+
79+
let options = ContextStoreOptions {
80+
storage_options: storage_options_from_dict(storage_options)?,
81+
};
82+
3483
let store = runtime
35-
.block_on(ContextStore::open(uri))
84+
.block_on(ContextStore::open_with_options(uri, options))
3685
.map_err(to_py_err)?;
3786
let run_id = new_run_id();
3887
Ok(Self {

0 commit comments

Comments
 (0)