Skip to content

Commit 081aaef

Browse files
committed
Move property back to tokenization
1 parent 220e839 commit 081aaef

7 files changed

Lines changed: 54 additions & 52 deletions

File tree

integration/test_tokenize.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,9 @@ def test_property_and_generic_endpoints_agree(
288288
stopwords = config.inverted_index_config.stopwords
289289

290290
text = "the quick brown fox"
291-
via_property = recipe_collection.config.tokenize_property(property_name="recipe", text=text)
291+
via_property = client.tokenization.for_property(
292+
collection=recipe_collection.name, property_name="recipe", text=text
293+
)
292294
via_generic = client.tokenization.text(
293295
text=text,
294296
tokenization=recipe.tokenization,
@@ -325,8 +327,9 @@ def test_property_result_shape(self, client: weaviate.WeaviateClient) -> None:
325327
],
326328
}
327329
)
328-
col = client.collections.get("TestDeserPropTypes")
329-
result = col.config.tokenize_property(property_name="tag", text=" Hello World ")
330+
result = client.tokenization.for_property(
331+
collection="TestDeserPropTypes", property_name="tag", text=" Hello World "
332+
)
330333
assert isinstance(result, TokenizeResult)
331334
assert result.indexed == ["Hello World"]
332335
finally:
@@ -442,9 +445,8 @@ def test_text_raises_on_old_server(self, client: weaviate.WeaviateClient) -> Non
442445
def test_tokenize_property_raises_on_old_server(self, client: weaviate.WeaviateClient) -> None:
443446
if client._connection._weaviate_version.is_at_least(1, 37, 0):
444447
pytest.skip("Version gate only applies to Weaviate < 1.37.0")
445-
col = client.collections.get("Any")
446448
with pytest.raises(WeaviateUnsupportedFeatureError):
447-
col.config.tokenize_property(property_name="title", text="hello")
449+
client.tokenization.for_property(collection="Any", property_name="title", text="hello")
448450

449451

450452
# ---------------------------------------------------------------------------
@@ -454,7 +456,7 @@ def test_tokenize_property_raises_on_old_server(self, client: weaviate.WeaviateC
454456

455457
@pytest.mark.usefixtures("require_1_37")
456458
class TestAsyncClient:
457-
"""Verify text() and tokenize_property() work through the async client."""
459+
"""Verify tokenization.text() and tokenization.for_property() work through the async client."""
458460

459461
@pytest.mark.asyncio
460462
async def test_text_tokenize(self, async_client: weaviate.WeaviateAsyncClient) -> None:
@@ -498,8 +500,8 @@ async def test_property_tokenize(self, async_client: weaviate.WeaviateAsyncClien
498500
],
499501
}
500502
)
501-
col = async_client.collections.get("TestAsyncPropTokenize")
502-
result = await col.config.tokenize_property(
503+
result = await async_client.tokenization.for_property(
504+
collection="TestAsyncPropTokenize",
503505
property_name="title",
504506
text="The quick brown fox",
505507
)

weaviate/collections/config/async_.pyi

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ from weaviate.collections.classes.config import (
2727
from weaviate.collections.classes.config_object_ttl import _ObjectTTLConfigUpdate
2828
from weaviate.collections.classes.config_vector_index import _VectorIndexConfigDynamicUpdate
2929
from weaviate.connect.v4 import ConnectionAsync
30-
from weaviate.tokenization.models import TokenizeResult
3130

3231
from .executor import _ConfigCollectionExecutor
3332

@@ -91,4 +90,3 @@ class _ConfigCollectionAsync(_ConfigCollectionExecutor[ConnectionAsync]):
9190
self, *, vector_config: Union[_VectorConfigCreate, List[_VectorConfigCreate]]
9291
) -> None: ...
9392
async def delete_property_index(self, property_name: str, index_name: IndexName) -> bool: ...
94-
async def tokenize_property(self, property_name: str, text: str) -> TokenizeResult: ...

weaviate/collections/config/executor.py

Lines changed: 0 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@
5656
WeaviateInvalidInputError,
5757
WeaviateUnsupportedFeatureError,
5858
)
59-
from weaviate.tokenization.models import TokenizeResult
6059
from weaviate.util import (
6160
_capitalize_first_letter,
6261
_decode_json_response_dict,
@@ -667,42 +666,3 @@ def resp(res: Response) -> bool:
667666
error_msg="Property may not exist",
668667
status_codes=_ExpectedStatusCodes(ok_in=[200], error="property exists"),
669668
)
670-
671-
def tokenize_property(
672-
self,
673-
property_name: str,
674-
text: str,
675-
) -> executor.Result[TokenizeResult]:
676-
"""Tokenize text using a property's configured tokenization settings.
677-
678-
Args:
679-
property_name: The property name whose tokenization config to use.
680-
text: The text to tokenize.
681-
682-
Returns:
683-
A TokenizeResult with indexed and query token lists.
684-
685-
Raises:
686-
WeaviateUnsupportedFeatureError: If the server version is below 1.37.0.
687-
"""
688-
if self._connection._weaviate_version.is_lower_than(1, 37, 0):
689-
raise WeaviateUnsupportedFeatureError(
690-
"Tokenization",
691-
str(self._connection._weaviate_version),
692-
"1.37.0",
693-
)
694-
695-
path = f"/schema/{self._name}/properties/{property_name}/tokenize"
696-
payload: Dict[str, Any] = {"text": text}
697-
698-
def resp(response: Response) -> TokenizeResult:
699-
return TokenizeResult.model_validate(response.json())
700-
701-
return executor.execute(
702-
response_callback=resp,
703-
method=self._connection.post,
704-
path=path,
705-
weaviate_object=payload,
706-
error_msg="Property tokenization failed",
707-
status_codes=_ExpectedStatusCodes(ok_in=[200], error="tokenize property text"),
708-
)

weaviate/collections/config/sync.pyi

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ from weaviate.collections.classes.config import (
2727
from weaviate.collections.classes.config_object_ttl import _ObjectTTLConfigUpdate
2828
from weaviate.collections.classes.config_vector_index import _VectorIndexConfigDynamicUpdate
2929
from weaviate.connect.v4 import ConnectionSync
30-
from weaviate.tokenization.models import TokenizeResult
3130

3231
from .executor import _ConfigCollectionExecutor
3332

@@ -89,4 +88,3 @@ class _ConfigCollection(_ConfigCollectionExecutor[ConnectionSync]):
8988
self, *, vector_config: Union[_VectorConfigCreate, List[_VectorConfigCreate]]
9089
) -> None: ...
9190
def delete_property_index(self, property_name: str, index_name: IndexName) -> bool: ...
92-
def tokenize_property(self, property_name: str, text: str) -> TokenizeResult: ...

weaviate/tokenization/async_.pyi

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,6 @@ class _TokenizationAsync(_TokenizationExecutor[ConnectionAsync]):
3030
analyzer_config: Optional[TextAnalyzerConfigCreate] = ...,
3131
stopword_presets: Optional[Dict[str, List[str]]] = ...,
3232
) -> TokenizeResult: ...
33+
async def for_property(
34+
self, collection: str, property_name: str, text: str
35+
) -> TokenizeResult: ...

weaviate/tokenization/executor.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from weaviate.connect.v4 import ConnectionType, _ExpectedStatusCodes
1515
from weaviate.exceptions import WeaviateUnsupportedFeatureError
1616
from weaviate.tokenization.models import TokenizeResult
17+
from weaviate.util import _capitalize_first_letter
1718

1819

1920
class _TokenizationExecutor(Generic[ConnectionType]):
@@ -189,3 +190,42 @@ def resp(response: Response) -> TokenizeResult:
189190
error_msg="Tokenization failed",
190191
status_codes=_ExpectedStatusCodes(ok_in=[200], error="tokenize text"),
191192
)
193+
194+
def for_property(
195+
self,
196+
collection: str,
197+
property_name: str,
198+
text: str,
199+
) -> executor.Result[TokenizeResult]:
200+
"""Tokenize text using a property's configured tokenization settings.
201+
202+
The server resolves the tokenization and analyzer configuration from
203+
the property's schema, so callers only supply the text.
204+
205+
Args:
206+
collection: The collection that owns the property.
207+
property_name: The property name whose tokenization config to use.
208+
text: The text to tokenize.
209+
210+
Returns:
211+
A TokenizeResult with indexed and query token lists.
212+
213+
Raises:
214+
WeaviateUnsupportedFeatureError: If the server version is below 1.37.0.
215+
"""
216+
self.__check_version()
217+
218+
path = f"/schema/{_capitalize_first_letter(collection)}/properties/{property_name}/tokenize"
219+
payload: Dict[str, Any] = {"text": text}
220+
221+
def resp(response: Response) -> TokenizeResult:
222+
return TokenizeResult.model_validate(response.json())
223+
224+
return executor.execute(
225+
response_callback=resp,
226+
method=self._connection.post,
227+
path=path,
228+
weaviate_object=payload,
229+
error_msg="Property tokenization failed",
230+
status_codes=_ExpectedStatusCodes(ok_in=[200], error="tokenize property text"),
231+
)

weaviate/tokenization/sync.pyi

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,4 @@ class _Tokenization(_TokenizationExecutor[ConnectionSync]):
3030
analyzer_config: Optional[TextAnalyzerConfigCreate] = ...,
3131
stopword_presets: Optional[Dict[str, List[str]]] = ...,
3232
) -> TokenizeResult: ...
33+
def for_property(self, collection: str, property_name: str, text: str) -> TokenizeResult: ...

0 commit comments

Comments
 (0)