-
Notifications
You must be signed in to change notification settings - Fork 11
Expand file tree
/
Copy pathcollection.py
More file actions
267 lines (210 loc) · 8.52 KB
/
collection.py
File metadata and controls
267 lines (210 loc) · 8.52 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
from __future__ import annotations
from typing import (
Any,
Iterator,
Sequence,
Tuple,
Type,
TypeVar,
overload,
)
import pyarrow as pa
from typing_extensions import Protocol, Self, runtime_checkable
from . import base
from . import data
from . import options
_Elem = TypeVar("_Elem", bound=base.SOMAObject)
"""Element Type for a SOMA collection."""
_CT = TypeVar("_CT", bound="BaseCollection")
"""Any implementation of a Collection."""
@runtime_checkable
class BaseCollection(base.SOMAObject, Protocol[_Elem]):
"""A generic string-keyed collection of :class:`base.SOMAObject`s.
The generic type specifies what type the Collection may contain. At its
most generic, a Collection may contain any SOMA object, but a subclass
may specify that it is a Collection of a specific type of SOMA object.
Lifecycle: maturing
"""
# MutableMapping interface methods
def __getitem__(self, key: str) -> _Elem: ...
def __delitem__(self, key: str) -> None: ...
def __iter__(self) -> Iterator[str]: ...
def __len__(self) -> int: ...
# Lifecycle
@classmethod
def create(
cls,
uri: str,
*,
platform_config: options.PlatformConfig | None = None,
context: Any | None = None,
) -> Self:
"""Creates a new collection of this type at the given URI.
Args:
uri: The URI where the collection will be created.
Returns:
The newly created collection, opened for writing.
Lifecycle: maturing
"""
...
# Overloads to allow type inference to work when doing:
#
# some_coll.add_new_collection("key") # -> Collection
# and
# some_coll.add_new_collection("key", Experiment) # -> Experiment
@overload
def add_new_collection(
self,
key: str,
kind: None = None,
*,
uri: str | None = ...,
platform_config: options.PlatformConfig | None = ...,
) -> "Collection": ...
@overload
def add_new_collection(
self,
key: str,
kind: Type[_CT],
*,
uri: str | None = ...,
platform_config: options.PlatformConfig | None = ...,
) -> _CT: ...
def add_new_collection(
self,
key: str,
kind: Type["BaseCollection"] | None = None,
*,
uri: str | None = None,
platform_config: options.PlatformConfig | None = None,
) -> "BaseCollection":
"""Creates a new sub-collection of this collection.
To add an existing collection as a sub-element of this collection,
use :meth:`set` or indexed access (``coll[name] = value``) instead.
The type provided is used to create the skeleton of this collection
as in :meth:`create`. By default, this will create a basic collection::
# Create a child Measurement object at the key "density"
# with default settings.
density = the_collection.add_new_collection("density", somacore.Measurement)
# This will create a basic Collection as a child at the location
# storage://authority/path/to/sub-child.
sub_child = density.add_new_collection(
"sub_child", uri="storage://authority/path/to/sub-child")
The URI provided may be absolute or relative. If a child URI is not
provided, the collection should construct a default child URI based
on the key of the new entry, making a relative URI (when possible)::
# coll URI is "file:///path/to/coll"
coll.add_new_collection("new child!")
# The URI of the child collection might be:
# "file:///path/to/coll/new_child"
# flat_ns_coll URI is "flat://authority/key" in a flat namespace
# where relative paths are unsupported.
flat_ns_coll.add_new_collection("flat child")
# The URI of the child collection might be:
# "flat://authority/flat_child"
The way the URI is constructed is left unspecified so that an
implementation can create a URI based on its own needs. Users should
directly get the URI of the new child using ``new_child.uri`` if needed;
they should never assume what it will be.
Args:
key: The key that this child should live at
(i.e., it will be accessed via ``the_collection[key]``).
kind: The type of child that should be added.
uri: If provided, overrides the default URI that would be used
to create this object. This may be absolute or relative.
platform_config: Platform-specific configuration options used
when creating the child.
Returns:
The newly created collection, opened for writing.
Lifecycle: maturing
"""
...
def add_new_dataframe(
self,
key: str,
*,
uri: str | None = None,
schema: pa.Schema,
index_column_names: Sequence[str] = (options.SOMA_JOINID,),
domain: Sequence[Tuple[Any, Any] | None] | None = None,
platform_config: options.PlatformConfig | None = None,
) -> data.DataFrame:
"""Creates a new DataFrame as a child of this collection.
Parameters are as in :meth:`data.DataFrame.create`.
See :meth:`add_new_collection` for details about child URIs.
Returns:
The newly created DataFrame, opened for writing.
Lifecycle: maturing
"""
...
def add_new_dense_ndarray(
self,
key: str,
*,
uri: str | None = None,
type: pa.DataType,
shape: Sequence[int],
platform_config: options.PlatformConfig | None = None,
) -> data.DenseNDArray:
"""Creates a new dense NDArray as a child of this collection.
Parameters are as in :meth:`data.DenseNDArray.create`.
See :meth:`add_new_collection` for details about child URIs.
Returns:
The newly created dense NDArray, opened for writing.
Lifecycle: maturing
"""
...
def add_new_sparse_ndarray(
self,
key: str,
*,
uri: str | None = None,
type: pa.DataType,
shape: Sequence[int],
platform_config: options.PlatformConfig | None = None,
) -> data.SparseNDArray:
"""Creates a new sparse NDArray as a child of this collection.
Parameters are as in :meth:`data.SparseNDArray.create`.
See :meth:`add_new_collection` for details about child creation.
Returns:
The newly created sparse NDArray, opened for writing.
Lifecycle: maturing
"""
...
def __setitem__(self, key: str, value: _Elem) -> None:
"""Sets an entry into this collection. See :meth:`set` for details."""
self.set(key, value)
def set(self, key: str, value: _Elem, *, use_relative_uri: bool | None = None) -> Self:
"""Sets an entry of this collection.
Important note: Because parent objects may need to share
implementation-internal state with children, when you set an item in a
collection, it is not guaranteed that the SOMAObject instance available
by accessing the collection is the same as the one that was set::
some_collection["thing"] = my_soma_object
added_soma_object = some_collection["thing"]
my_soma_object is added_soma_object # could be False
The two objects *will* refer to the same stored data.
Args:
key: The string key to set.
value: The SOMA object to insert into the collection.
use_relative_uri: Determines whether to store the collection
entry with a relative URI (provided the storage engine
supports it).
If ``None`` (the default), will automatically determine whether
to use an absolute or relative URI based on their relative
location.
If ``True``, will always use a relative URI. If the new child
does not share a relative URI base, or use of relative URIs
is not possible at all, the collection should raise an error.
If ``False``, will always use an absolute URI.
Returns:
``self``, to enable method chaining.
Lifecycle: maturing
"""
...
@runtime_checkable
class Collection(BaseCollection[base.SOMAObject], Protocol):
"""SOMA Collection imposing no semantics on the contained values.
Lifecycle: maturing
"""
...