1+ import base64
12from dataclasses import dataclass
23from typing import ClassVar , Literal , Self , TypeGuard , overload
34
45import numpy as np
56
67from zarr .core .common import JSON , ZarrFormat
7- from zarr .core .dtype .common import HasObjectCodec
8- from zarr .core .dtype .wrapper import TBaseDType , ZDType
8+ from zarr .core .dtype .common import HasObjectCodec , v3_unstable_dtype_warning
9+ from zarr .core .dtype .npy .common import check_json_str
10+ from zarr .core .dtype .wrapper import DTypeJSON_V2 , DTypeJSON_V3 , TBaseDType , ZDType
911
1012
1113@dataclass (frozen = True , kw_only = True )
12- class VariableLengthString (ZDType [np .dtypes .ObjectDType , str ], HasObjectCodec ): # type: ignore[no-redef]
14+ class VariableLengthBytes (ZDType [np .dtypes .ObjectDType , bytes ], HasObjectCodec ):
1315 dtype_cls = np .dtypes .ObjectDType
1416 _zarr_v3_name : ClassVar [Literal ["variable_length_bytes" ]] = "variable_length_bytes"
1517 object_codec_id = "vlen-bytes"
@@ -39,12 +41,13 @@ def check_json_v3(cls, data: JSON) -> TypeGuard[Literal["variable_length_utf8"]]
3941 def to_json (self , zarr_format : Literal [2 ]) -> Literal ["|O" ]: ...
4042
4143 @overload
42- def to_json (self , zarr_format : Literal [3 ]) -> Literal ["variable_length_utf8 " ]: ...
44+ def to_json (self , zarr_format : Literal [3 ]) -> Literal ["variable_length_bytes " ]: ...
4345
44- def to_json (self , zarr_format : ZarrFormat ) -> Literal ["|O" , "variable_length_utf8 " ]:
46+ def to_json (self , zarr_format : ZarrFormat ) -> Literal ["|O" , "variable_length_bytes " ]:
4547 if zarr_format == 2 :
4648 return "|O"
4749 elif zarr_format == 3 :
50+ v3_unstable_dtype_warning (self )
4851 return self ._zarr_v3_name
4952 raise ValueError (f"zarr_format must be 2 or 3, got { zarr_format } " ) # pragma: no cover
5053
@@ -54,22 +57,19 @@ def _from_json_unchecked(
5457 ) -> Self :
5558 return cls ()
5659
57- def default_scalar (self ) -> str :
58- return ""
60+ def default_scalar (self ) -> bytes :
61+ return b ""
5962
6063 def to_json_scalar (self , data : object , * , zarr_format : ZarrFormat ) -> str :
61- return data # type: ignore[return-value ]
64+ return base64 . standard_b64encode ( data ). decode ( "ascii" ) # type: ignore[arg-type ]
6265
63- def from_json_scalar (self , data : JSON , * , zarr_format : ZarrFormat ) -> str :
64- """
65- Strings pass through
66- """
67- if not check_json_str (data ):
68- raise TypeError (f"Invalid type: { data } . Expected a string." )
69- return data
66+ def from_json_scalar (self , data : JSON , * , zarr_format : ZarrFormat ) -> bytes :
67+ if check_json_str (data ):
68+ return base64 .standard_b64decode (data .encode ("ascii" ))
69+ raise TypeError (f"Invalid type: { data } . Expected a string." ) # pragma: no cover
7070
7171 def check_scalar (self , data : object ) -> bool :
72- return isinstance (data , str )
72+ return isinstance (data , bytes | str )
7373
74- def _cast_scalar_unchecked (self , data : object ) -> str :
75- return str (data )
74+ def _cast_scalar_unchecked (self , data : object ) -> bytes :
75+ return bytes (data ) # type: ignore[no-any-return, call-overload]
0 commit comments