Skip to content

Commit d248160

Browse files
authored
feat: Update docs, examples, tests to use method-based API (#625)
1 parent c58dd01 commit d248160

15 files changed

Lines changed: 90 additions & 132 deletions

File tree

docs/authentication.md

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -94,28 +94,26 @@ Refer to [`obstore.auth.google`](api/auth/google.md).
9494
You can use the [`AzureCredentialProvider`][obstore.auth.azure.AzureCredentialProvider] to use [`azure.identity`][] to handle credentials.
9595

9696
```py
97-
import obstore as obs
9897
from obstore.auth.azure import AzureCredentialProvider
9998
from obstore.store import AzureStore
10099

101100
credential_provider = AzureAsyncCredentialProvider(credential=...)
102101
store = AzureStore("container", credential_provider=credential_provider)
103-
print(obs.list(store).collect())
102+
print(store.list().collect())
104103
```
105104

106105
Alternatively, you can use [`AzureAsyncCredentialProvider`][obstore.auth.azure.AzureAsyncCredentialProvider] with the async API:
107106

108107
```py
109108
import asyncio
110-
import obstore as obs
111109
from obstore.auth.azure import AzureCredentialProvider
112110
from obstore.store import AzureStore
113111

114112
credential_provider = AzureAsyncCredentialProvider(credential=...)
115113
store = AzureStore("container", credential_provider=credential_provider)
116114

117115
async def fetch_blobs():
118-
blobs = await obs.list(store).collect_async()
116+
blobs = await store.list().collect_async()
119117
print(blobs)
120118

121119
asyncio.run(fetch_blobs())

docs/cookbook.md

Lines changed: 21 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
Use the [`obstore.list`][] method.
66

77
```py
8-
import obstore as obs
9-
108
store = ... # store of your choice
119

1210
# Recursively list all files below the 'data' path.
@@ -15,7 +13,7 @@ store = ... # store of your choice
1513
prefix = "data"
1614

1715
# Get a stream of metadata objects:
18-
list_stream = obs.list(store, prefix)
16+
list_stream = store.list(prefix)
1917

2018
# Print info
2119
for batch in list_stream:
@@ -32,12 +30,10 @@ Instead, you may consider passing `return_arrow=True` to [`obstore.list`][] to r
3230
This Arrow integration requires the [`arro3-core` dependency](https://kylebarron.dev/arro3/latest/), a lightweight Arrow implementation. You can pass the emitted `RecordBatch` to [`pyarrow`](https://arrow.apache.org/docs/python/index.html) (zero-copy) by passing it to [`pyarrow.record_batch`][] or to [`polars`](https://pola.rs/) (also zero-copy) by passing it to `polars.DataFrame`.
3331

3432
```py
35-
import obstore as obs
36-
3733
store = ... # store of your choice
3834

3935
# Get a stream of Arrow RecordBatches of metadata
40-
list_stream = obs.list(store, prefix="data", return_arrow=True)
36+
list_stream = store.list(prefix="data", return_arrow=True)
4137
for record_batch in list_stream:
4238
# Perform zero-copy conversion to your arrow-backed library of choice
4339
#
@@ -58,13 +54,12 @@ for record_batch in list_stream:
5854
Here's a working example with the [`sentinel-cogs` bucket](https://registry.opendata.aws/sentinel-2-l2a-cogs/) in AWS Open Data:
5955

6056
```py
61-
import obstore as obs
6257
import pandas as pd
6358
import pyarrow as pa
6459
from obstore.store import S3Store
6560

6661
store = S3Store("sentinel-cogs", region="us-west-2", skip_signature=True)
67-
stream = obs.list(store, chunk_size=20, return_arrow=True)
62+
stream = store.list(chunk_size=20, return_arrow=True)
6863

6964
for record_batch in stream:
7065
# Convert to pyarrow (zero-copy), then to pandas for easy export to a
@@ -86,30 +81,28 @@ The Arrow record batch looks like the following:
8681

8782
## Fetch objects
8883

89-
Use the [`obstore.get`][] function to fetch data bytes from remote storage or files in the local filesystem.
84+
Use the `get` method to fetch data bytes from remote storage or files in the local filesystem.
9085

9186
```py
92-
import obstore as obs
93-
9487
store = ... # store of your choice
9588

9689
# Retrieve a specific file
9790
path = "data/file01.parquet"
9891

9992
# Fetch just the file metadata
100-
meta = obs.head(store, path)
93+
meta = store.head(path)
10194
print(meta)
10295

10396
# Fetch the object including metadata
104-
result = obs.get(store, path)
97+
result = store.get(path)
10598
assert result.meta == meta
10699

107100
# Buffer the entire object in memory
108101
buffer = result.bytes()
109102
assert len(buffer) == meta.size
110103

111104
# Alternatively stream the bytes from object storage
112-
stream = obs.get(store, path).stream()
105+
stream = store.get(path).stream()
113106

114107
# We can now iterate over the stream
115108
total_buffer_len = 0
@@ -125,9 +118,7 @@ Using the response as an iterator ensures that we don't buffer the entire file
125118
into memory.
126119

127120
```py
128-
import obstore as obs
129-
130-
resp = obs.get(store, path)
121+
resp = store.get(path)
131122

132123
with open("output/file", "wb") as f:
133124
for chunk in resp:
@@ -139,65 +130,56 @@ with open("output/file", "wb") as f:
139130
Use the [`obstore.put`][] function to atomically write data. `obstore.put` will automatically use [multipart uploads](https://docs.aws.amazon.com/AmazonS3/latest/userguide/mpuoverview.html) for large input data.
140131

141132
```py
142-
import obstore as obs
143-
144133
store = ... # store of your choice
145134
path = "data/file1"
146135
content = b"hello"
147-
obs.put(store, path, content)
136+
store.put(path, content)
148137
```
149138

150139
You can also upload local files:
151140

152141
```py
153142
from pathlib import Path
154-
import obstore as obs
155143

156144
store = ... # store of your choice
157145
path = "data/file1"
158146
content = Path("path/to/local/file")
159-
obs.put(store, path, content)
147+
store.put(path, content)
160148
```
161149

162150
Or file-like objects:
163151

164152
```py
165-
import obstore as obs
166-
167153
store = ... # store of your choice
168154
path = "data/file1"
169155
with open("path/to/local/file", "rb") as content:
170-
obs.put(store, path, content)
156+
store.put(path, content)
171157
```
172158

173159
Or iterables:
174160

175161
```py
176-
import obstore as obs
177-
178162
def bytes_iter():
179163
for i in range(5):
180164
yield b"foo"
181165

182166
store = ... # store of your choice
183167
path = "data/file1"
184168
content = bytes_iter()
185-
obs.put(store, path, content)
169+
store.put(path, content)
186170
```
187171

188172
Or async iterables:
189173

190174
```py
191-
import obstore as obs
192-
193175
async def bytes_stream():
194176
for i in range(5):
195177
yield b"foo"
196178

197179
store = ... # store of your choice
198180
path = "data/file1"
199181
content = bytes_stream()
200-
obs.put(store, path, content)
182+
store.put(path, content)
201183
```
202184

203185
## Copy objects from one store to another
@@ -209,16 +191,14 @@ Perhaps you have data in one store, say AWS S3, that you need to copy to another
209191
Download the file, collect its bytes in memory, then upload it. Note that this will materialize the entire file in memory.
210192

211193
```py
212-
import obstore as obs
213-
214194
store1 = ... # store of your choice
215195
store2 = ... # store of your choice
216196

217197
path1 = "data/file1"
218198
path2 = "data/file2"
219199

220-
buffer = obs.get(store1, path1).bytes()
221-
obs.put(store2, path2, buffer)
200+
buffer = store1.get(path1).bytes()
201+
store2.put(path2, buffer)
222202
```
223203

224204
### Local file
@@ -227,22 +207,21 @@ First download the file to disk, then upload it.
227207

228208
```py
229209
from pathlib import Path
230-
import obstore as obs
231210

232211
store1 = ... # store of your choice
233212
store2 = ... # store of your choice
234213

235214
path1 = "data/file1"
236215
path2 = "data/file2"
237216

238-
resp = obs.get(store1, path1)
217+
resp = store1.get(path1)
239218

240219
with open("temporary_file", "wb") as f:
241220
for chunk in resp:
242221
f.write(chunk)
243222

244223
# Upload the path
245-
obs.put(store2, path2, Path("temporary_file"))
224+
store2.put(path2, Path("temporary_file"))
246225
```
247226

248227
### Streaming
@@ -254,30 +233,27 @@ It's easy to **stream** a download from one store directly as the upload to anot
254233
Using the async API is currently required to use streaming copies.
255234

256235
```py
257-
import obstore as obs
258-
259236
store1 = ... # store of your choice
260237
store2 = ... # store of your choice
261238

262239
path1 = "data/file1"
263240
path2 = "data/file2"
264241

265242
# This only constructs the stream, it doesn't materialize the data in memory
266-
resp = await obs.get_async(store1, path1)
243+
resp = await store1.get_async(path1)
267244
# A streaming upload is created to copy the file to path2
268-
await obs.put_async(store2, path2, resp)
245+
await store2.put_async(path2, resp)
269246
```
270247

271248
Or, by customizing the chunk size and the upload concurrency you can control memory overhead.
272249

273250
```py
274-
resp = await obs.get_async(store1, path1)
251+
resp = await store1.get_async(path1)
275252
chunk_size = 5 * 1024 * 1024 # 5MB
276253
stream = resp.stream(min_chunk_size=chunk_size)
277254

278255
# A streaming upload is created to copy the file to path2
279-
await obs.put_async(
280-
store2,
256+
await store2.put_async(
281257
path2,
282258
stream,
283259
chunk_size=chunk_size,

docs/examples/fastapi.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ First, import `fastapi` and `obstore` and create the FastAPI application.
1818
from fastapi import FastAPI
1919
from fastapi.responses import StreamingResponse
2020

21-
import obstore as obs
2221
from obstore.store import HTTPStore, S3Store
2322

2423
app = FastAPI()
@@ -39,7 +38,7 @@ async def download_example() -> StreamingResponse:
3938

4039
# Make the request. This only begins the download; it does not wait for the
4140
# download to finish.
42-
resp = await obs.get_async(store, path)
41+
resp = await store.get_async(path)
4342
return StreamingResponse(resp)
4443
```
4544

@@ -56,7 +55,7 @@ async def large_example() -> StreamingResponse:
5655

5756
# Note: for large file downloads you may need to increase the timeout in
5857
# the client configuration
59-
resp = await obs.get_async(store, path)
58+
resp = await store.get_async(path)
6059

6160
# Example: Ensure the stream returns at least 5MB of data in each chunk.
6261
return StreamingResponse(resp.stream(min_chunk_size=5 * 1024 * 1024))

docs/examples/minio.md

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ docker run -p 9000:9000 -p 9001:9001 \
2020
Now we can create an `S3Store` to interact with minio:
2121

2222
```py
23-
import obstore as obs
2423
from obstore.store import S3Store
2524

2625
store = S3Store(
@@ -33,20 +32,20 @@ store = S3Store(
3332
)
3433

3534
# Add files
36-
obs.put(store, "a.txt", b"foo")
37-
obs.put(store, "b.txt", b"bar")
38-
obs.put(store, "c/d.txt", b"baz")
35+
store.put("a.txt", b"foo")
36+
store.put("b.txt", b"bar")
37+
store.put("c/d.txt", b"baz")
3938

4039
# List files
41-
files = obs.list(store).collect()
40+
files = store.list().collect()
4241
print(files)
4342

4443
# Download a file
45-
resp = obs.get(store, "a.txt")
44+
resp = store.get("a.txt")
4645
print(resp.bytes())
4746

4847
# Delete a file
49-
obs.delete(store, "a.txt")
48+
store.delete("a.txt")
5049
```
5150

5251
There's a [full example](https://github.com/developmentseed/obstore/tree/main/examples/minio) in the obstore repository.

docs/getting-started.md

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,12 @@ File-like object support is also provided:
7676
### Example
7777

7878
```py
79-
import obstore as obs
79+
from obstore.store import MemoryStore
8080

81-
store = obs.store.MemoryStore()
81+
store = MemoryStore()
8282

83-
obs.put(store, "file.txt", b"hello world!")
84-
response = obs.get(store, "file.txt")
83+
store.put("file.txt", b"hello world!")
84+
response = store.get("file.txt")
8585
response.meta
8686
# {'path': 'file.txt',
8787
# 'last_modified': datetime.datetime(2024, 10, 21, 16, 19, 45, 102620, tzinfo=datetime.timezone.utc),
@@ -90,22 +90,22 @@ response.meta
9090
# 'version': None}
9191
assert response.bytes() == b"hello world!"
9292

93-
byte_range = obs.get_range(store, "file.txt", start=0, end=5)
93+
byte_range = store.get_range("file.txt", start=0, end=5)
9494
assert byte_range == b"hello"
9595

96-
obs.copy(store, "file.txt", "other.txt")
97-
assert obs.get(store, "other.txt").bytes() == b"hello world!"
96+
store.copy("file.txt", "other.txt")
97+
assert store.get("other.txt").bytes() == b"hello world!"
9898
```
9999

100100
All of these methods also have `async` counterparts, suffixed with `_async`.
101101

102102
```py
103-
import obstore as obs
103+
from obstore.store import MemoryStore
104104

105-
store = obs.store.MemoryStore()
105+
store = MemoryStore()
106106

107-
await obs.put_async(store, "file.txt", b"hello world!")
108-
response = await obs.get_async(store, "file.txt")
107+
await store.put_async("file.txt", b"hello world!")
108+
response = await store.get_async("file.txt")
109109
response.meta
110110
# {'path': 'file.txt',
111111
# 'last_modified': datetime.datetime(2024, 10, 21, 16, 20, 36, 477418, tzinfo=datetime.timezone.utc),
@@ -114,10 +114,10 @@ response.meta
114114
# 'version': None}
115115
assert await response.bytes_async() == b"hello world!"
116116

117-
byte_range = await obs.get_range_async(store, "file.txt", start=0, end=5)
117+
byte_range = await store.get_range_async("file.txt", start=0, end=5)
118118
assert byte_range == b"hello"
119119

120-
await obs.copy_async(store, "file.txt", "other.txt")
121-
resp = await obs.get_async(store, "other.txt")
120+
await store.copy_async("file.txt", "other.txt")
121+
resp = await store.get_async("other.txt")
122122
assert await resp.bytes_async() == b"hello world!"
123123
```

0 commit comments

Comments
 (0)