Skip to content

Commit 3f78571

Browse files
authored
Merge branch 'dev/1.37' into feat/tokenizer-endpoint
2 parents 61665e7 + b9a7c69 commit 3f78571

19 files changed

Lines changed: 780 additions & 4 deletions

File tree

.github/workflows/main.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ env:
2828
WEAVIATE_134: 1.34.19
2929
WEAVIATE_135: 1.35.16-efdedfa
3030
WEAVIATE_136: 1.36.9-d905e6c
31-
WEAVIATE_137: 1.37.0-rc.1-578c4eb
31+
WEAVIATE_137: 1.37.0-rc.1-bc3891e
3232

3333
jobs:
3434
lint-and-format:

ci/docker-compose.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ services:
3131
DISABLE_LAZY_LOAD_SHARDS: 'true'
3232
GRPC_MAX_MESSAGE_SIZE: 100000000 # 100mb
3333
OBJECTS_TTL_DELETE_SCHEDULE: "@every 12h" # for objectTTL tests to work
34+
EXPORT_ENABLED: 'true'
35+
EXPORT_DEFAULT_PATH: "/var/lib/weaviate/exports"
3436

3537
contextionary:
3638
environment:

integration/test_export.py

Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
import time
2+
import uuid
3+
from typing import Generator, List, Union
4+
5+
import pytest
6+
from _pytest.fixtures import SubRequest
7+
8+
import weaviate
9+
from weaviate.collections.classes.config import DataType, Property
10+
from weaviate.exceptions import UnexpectedStatusCodeException
11+
from weaviate.export.export import (
12+
ExportFileFormat,
13+
ExportStatus,
14+
ExportStorage,
15+
)
16+
17+
from .conftest import _sanitize_collection_name
18+
19+
pytestmark = pytest.mark.xdist_group(name="export")
20+
21+
BACKEND = ExportStorage.FILESYSTEM
22+
23+
COLLECTION_NAME = "ExportTestCollection"
24+
25+
OBJECT_PROPS = [{"title": f"object {i}", "count": i} for i in range(5)]
26+
27+
OBJECT_IDS = [
28+
"fd34ccf4-1a2a-47ad-8446-231839366c3f",
29+
"2653442b-05d8-4fa3-b46a-d4a152eb63bc",
30+
"55374edb-17de-487f-86cb-9a9fbc30823f",
31+
"124ff6aa-597f-44d0-8c13-62fbb1e66888",
32+
"f787386e-7d1c-481f-b8c3-3dbfd8bbad85",
33+
]
34+
35+
36+
@pytest.fixture(scope="module")
37+
def client() -> Generator[weaviate.WeaviateClient, None, None]:
38+
client = weaviate.connect_to_local()
39+
if client._connection._weaviate_version.is_lower_than(1, 37, 0):
40+
client.close()
41+
pytest.skip("Collection export is not supported in versions lower than 1.37.0")
42+
client.collections.delete(COLLECTION_NAME)
43+
44+
col = client.collections.create(
45+
name=COLLECTION_NAME,
46+
properties=[
47+
Property(name="title", data_type=DataType.TEXT),
48+
Property(name="count", data_type=DataType.INT),
49+
],
50+
)
51+
for i, props in enumerate(OBJECT_PROPS):
52+
col.data.insert(properties=props, uuid=OBJECT_IDS[i])
53+
54+
yield client
55+
client.collections.delete(COLLECTION_NAME)
56+
client.close()
57+
58+
59+
def unique_export_id(name: str) -> str:
60+
"""Generate a unique export ID based on the test name."""
61+
name = _sanitize_collection_name(name)
62+
random_part = str(uuid.uuid4()).replace("-", "")[:12]
63+
return name + random_part
64+
65+
66+
def test_create_export_with_waiting(client: weaviate.WeaviateClient, request: SubRequest) -> None:
67+
"""Create an export with wait_for_completion=True."""
68+
export_id = unique_export_id(request.node.name)
69+
70+
resp = client.export.create(
71+
export_id=export_id,
72+
backend=BACKEND,
73+
file_format=ExportFileFormat.PARQUET,
74+
include_collections=[COLLECTION_NAME],
75+
wait_for_completion=True,
76+
)
77+
assert resp.status == ExportStatus.SUCCESS
78+
assert COLLECTION_NAME in resp.collections
79+
80+
81+
def test_create_export_without_waiting(
82+
client: weaviate.WeaviateClient, request: SubRequest
83+
) -> None:
84+
"""Create an export without waiting, then poll status."""
85+
export_id = unique_export_id(request.node.name)
86+
87+
resp = client.export.create(
88+
export_id=export_id,
89+
backend=BACKEND,
90+
file_format=ExportFileFormat.PARQUET,
91+
include_collections=[COLLECTION_NAME],
92+
)
93+
assert resp.status in [ExportStatus.STARTED, ExportStatus.TRANSFERRING, ExportStatus.SUCCESS]
94+
95+
# poll until done
96+
while True:
97+
status = client.export.get_status(export_id=export_id, backend=BACKEND)
98+
assert status.status in [
99+
ExportStatus.STARTED,
100+
ExportStatus.TRANSFERRING,
101+
ExportStatus.SUCCESS,
102+
]
103+
if status.status == ExportStatus.SUCCESS:
104+
break
105+
time.sleep(0.1)
106+
107+
assert status.export_id.lower() == export_id.lower()
108+
109+
110+
def test_get_export_status(client: weaviate.WeaviateClient, request: SubRequest) -> None:
111+
"""Check status of a completed export."""
112+
export_id = unique_export_id(request.node.name)
113+
114+
client.export.create(
115+
export_id=export_id,
116+
backend=BACKEND,
117+
file_format=ExportFileFormat.PARQUET,
118+
include_collections=[COLLECTION_NAME],
119+
wait_for_completion=True,
120+
)
121+
122+
status = client.export.get_status(export_id=export_id, backend=BACKEND)
123+
assert status.status == ExportStatus.SUCCESS
124+
assert status.export_id.lower() == export_id.lower()
125+
assert status.backend == BACKEND.value
126+
127+
128+
def test_create_export_with_parquet_format(
129+
client: weaviate.WeaviateClient, request: SubRequest
130+
) -> None:
131+
"""Create an export explicitly specifying parquet format."""
132+
export_id = unique_export_id(request.node.name)
133+
134+
resp = client.export.create(
135+
export_id=export_id,
136+
backend=BACKEND,
137+
file_format=ExportFileFormat.PARQUET,
138+
include_collections=[COLLECTION_NAME],
139+
wait_for_completion=True,
140+
)
141+
assert resp.status == ExportStatus.SUCCESS
142+
143+
144+
@pytest.mark.parametrize("include", [[COLLECTION_NAME], COLLECTION_NAME])
145+
def test_create_export_include_as_str_and_list(
146+
client: weaviate.WeaviateClient, include: Union[str, List[str]], request: SubRequest
147+
) -> None:
148+
"""Verify include_collections accepts both str and list."""
149+
export_id = unique_export_id(request.node.name)
150+
151+
resp = client.export.create(
152+
export_id=export_id,
153+
backend=BACKEND,
154+
file_format=ExportFileFormat.PARQUET,
155+
include_collections=include,
156+
wait_for_completion=True,
157+
)
158+
assert resp.status == ExportStatus.SUCCESS
159+
assert COLLECTION_NAME in resp.collections
160+
161+
162+
def test_cancel_export(client: weaviate.WeaviateClient, request: SubRequest) -> None:
163+
"""Cancel a running export."""
164+
export_id = unique_export_id(request.node.name)
165+
166+
resp = client.export.create(
167+
export_id=export_id,
168+
backend=BACKEND,
169+
file_format=ExportFileFormat.PARQUET,
170+
include_collections=[COLLECTION_NAME],
171+
)
172+
assert resp.status in [ExportStatus.STARTED, ExportStatus.TRANSFERRING, ExportStatus.SUCCESS]
173+
174+
client.export.cancel(export_id=export_id, backend=BACKEND)
175+
176+
# verify it's cancelled or already completed (race condition)
177+
start = time.time()
178+
while time.time() - start < 5:
179+
status = client.export.get_status(export_id=export_id, backend=BACKEND)
180+
if status.status in [ExportStatus.CANCELED, ExportStatus.SUCCESS]:
181+
break
182+
time.sleep(0.1)
183+
assert status.status in [ExportStatus.CANCELED, ExportStatus.SUCCESS]
184+
185+
186+
def test_fail_on_non_existing_collection(
187+
client: weaviate.WeaviateClient, request: SubRequest
188+
) -> None:
189+
"""Fail export on non-existing collection."""
190+
export_id = unique_export_id(request.node.name)
191+
with pytest.raises(UnexpectedStatusCodeException):
192+
client.export.create(
193+
export_id=export_id,
194+
backend=BACKEND,
195+
file_format=ExportFileFormat.PARQUET,
196+
include_collections=["NonExistingCollection"],
197+
wait_for_completion=True,
198+
)
199+
200+
201+
def test_fail_on_both_include_and_exclude(
202+
client: weaviate.WeaviateClient, request: SubRequest
203+
) -> None:
204+
"""Fail when both include and exclude collections are set."""
205+
export_id = unique_export_id(request.node.name)
206+
with pytest.raises(ValueError):
207+
client.export.create(
208+
export_id=export_id,
209+
backend=BACKEND,
210+
file_format=ExportFileFormat.PARQUET,
211+
include_collections=COLLECTION_NAME,
212+
exclude_collections="SomeOther",
213+
)
214+
215+
216+
def test_fail_status_for_non_existing_export(
217+
client: weaviate.WeaviateClient, request: SubRequest
218+
) -> None:
219+
"""Fail checking status for non-existing export."""
220+
export_id = unique_export_id(request.node.name)
221+
with pytest.raises(UnexpectedStatusCodeException):
222+
client.export.get_status(export_id=export_id, backend=BACKEND)

weaviate/classes/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
batch,
66
config,
77
data,
8+
export,
89
generate,
910
generics,
1011
init,
@@ -22,6 +23,7 @@
2223
"config",
2324
"ConsistencyLevel",
2425
"data",
26+
"export",
2527
"generate",
2628
"generics",
2729
"init",

weaviate/classes/export.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from weaviate.export.export import (
2+
ExportFileFormat,
3+
ExportStorage,
4+
)
5+
6+
__all__ = [
7+
"ExportFileFormat",
8+
"ExportStorage",
9+
]

weaviate/client.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from .connect.v4 import ConnectionAsync, ConnectionSync
2121
from .debug import _Debug, _DebugAsync
2222
from .embedded import EmbeddedOptions
23+
from .export import _Export, _ExportAsync
2324
from .groups import _Groups, _GroupsAsync
2425
from .rbac import _Roles, _RolesAsync
2526
from .tokenization import _Tokenization, _TokenizationAsync
@@ -77,6 +78,7 @@ def __init__(
7778
)
7879
self.alias = _AliasAsync(self._connection)
7980
self.backup = _BackupAsync(self._connection)
81+
self.export = _ExportAsync(self._connection)
8082
self.batch = _BatchClientWrapperAsync(self._connection)
8183
self.cluster = _ClusterAsync(self._connection)
8284
self.collections = _CollectionsAsync(self._connection)
@@ -154,6 +156,7 @@ def __init__(
154156
consistency_level=None,
155157
)
156158
self.backup = _Backup(self._connection)
159+
self.export = _Export(self._connection)
157160
self.cluster = _Cluster(self._connection)
158161
self.collections = collections
159162
self.debug = _Debug(self._connection)

weaviate/client.pyi

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ from .backup import _Backup, _BackupAsync
2020
from .cluster import _Cluster, _ClusterAsync
2121
from .collections.batch.client import _BatchClientWrapper, _BatchClientWrapperAsync
2222
from .debug import _Debug, _DebugAsync
23+
from .export import _Export, _ExportAsync
2324
from .rbac import _Roles, _RolesAsync
2425
from .tokenization import _Tokenization, _TokenizationAsync
2526
from .types import NUMBER
@@ -30,6 +31,7 @@ class WeaviateAsyncClient(_WeaviateClientExecutor[ConnectionAsync]):
3031
_connection: ConnectionAsync
3132
alias: _AliasAsync
3233
backup: _BackupAsync
34+
export: _ExportAsync
3335
batch: _BatchClientWrapperAsync
3436
collections: _CollectionsAsync
3537
cluster: _ClusterAsync
@@ -54,6 +56,7 @@ class WeaviateClient(_WeaviateClientExecutor[ConnectionSync]):
5456
_connection: ConnectionSync
5557
alias: _Alias
5658
backup: _Backup
59+
export: _Export
5760
batch: _BatchClientWrapper
5861
collections: _Collections
5962
cluster: _Cluster

weaviate/collections/collections/executor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,13 @@
4444
_check_references_generic,
4545
)
4646
from weaviate.collections.collection import Collection, CollectionAsync
47+
from weaviate.collections.config.executor import _any_property_has_text_analyzer
4748
from weaviate.connect import executor
4849
from weaviate.connect.v4 import (
4950
ConnectionAsync,
5051
ConnectionType,
5152
_ExpectedStatusCodes,
5253
)
53-
from weaviate.collections.config.executor import _any_property_has_text_analyzer
5454
from weaviate.exceptions import WeaviateInvalidInputError, WeaviateUnsupportedFeatureError
5555
from weaviate.util import _capitalize_first_letter, _decode_json_response_dict
5656
from weaviate.validator import _validate_input, _ValidateArgument

weaviate/collections/config/executor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
Generic,
66
List,
77
Literal,
8-
Sequence,
98
Optional,
9+
Sequence,
1010
Tuple,
1111
Union,
1212
cast,

weaviate/exceptions.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,14 @@ class BackupCanceledError(WeaviateBaseError):
141141
"""Backup canceled Exception."""
142142

143143

144+
class ExportFailedError(WeaviateBaseError):
145+
"""Export Failed Exception."""
146+
147+
148+
class ExportCanceledError(WeaviateBaseError):
149+
"""Export Canceled Exception."""
150+
151+
144152
class EmptyResponseError(WeaviateBaseError):
145153
"""Occurs when an HTTP request unexpectedly returns an empty response."""
146154

0 commit comments

Comments
 (0)