Skip to content

Commit 541e17f

Browse files
Kobzolspirali
authored andcommitted
[python] add Arrow serialization content_type
1 parent ab99354 commit 541e17f

5 files changed

Lines changed: 14 additions & 3 deletions

File tree

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ RUN apt-get update && \
88
curl https://sh.rustup.rs -sSf | sh -s -- -y && \
99
. $HOME/.cargo/env && \
1010
cargo install capnpc && \
11-
pip3 install pycapnp cloudpickle pytest pytest-timeout cbor pytest-timeout && \
11+
pip3 install pycapnp cloudpickle pytest pytest-timeout cbor pyarrow && \
1212
cargo build --all-features --release --verbose && \
1313
cd python && \
1414
python3 setup.py install

docs/guide/user.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ Recognized content types:
180180
* cloudpickle - Serialized Python object via Cloudpickle
181181
* json - Object serialized into JSON
182182
* cbor - Object serialized into CBOR
183+
* arrow - Object serialized with Apache Arrow
183184
* text - UTF-8 string.
184185
* text:<ENCODING> - Text with specified encoding
185186
* mime:<MIME> - Content type defined as MIME type

python/rain/common/content_type.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55

66
def check_content_type(name):
7-
if name in [None, "pickle", "json", "dir", "text", "cbor",
7+
if name in [None, "pickle", "json", "dir", "text", "cbor", "arrow",
88
"protobuf", "cloudpickle"]:
99
return True
1010
if (name.startswith("text:") or
@@ -64,6 +64,9 @@ def encode_value(val, content_type):
6464
elif content_type == "cbor":
6565
import cbor
6666
d = cbor.dumps(val)
67+
elif content_type == "arrow":
68+
import pyarrow
69+
d = pyarrow.serialize(val).to_buffer().to_pybytes()
6770
elif content_type.startswith("text"):
6871
if not isinstance(val, str):
6972
raise RainException("Encoding {!r} can only encode `str` objects."
@@ -102,6 +105,9 @@ def decode_value(data, content_type):
102105
elif content_type == "cbor":
103106
import cbor
104107
return cbor.loads(data)
108+
elif content_type == "arrow":
109+
import pyarrow
110+
return pyarrow.deserialize(data)
105111
elif content_type.startswith("text"):
106112
if content_type == "text":
107113
enc = "utf-8"

python/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
cbor
22
cloudpickle
3+
pyarrow
34
pycapnp

tests/pytests/test_python.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -479,6 +479,7 @@ def test_gen(ctx, in1, in2, in3, in4, in5, *args, ina="bar", **kwargs):
479479
assert kwargs['kwA'] == ["A"]
480480
assert kwargs['kwB'] == ["B"]
481481
assert kwargs['kwC'] == ["C"]
482+
assert kwargs['kwD'] == ["D"]
482483

483484
@remote(inputs={'in1': Input(content_type='json'),
484485
'in2': Input(content_type='pickle', load=True),
@@ -526,7 +527,9 @@ def copied(ctx, obj):
526527
ina=blob("barbar", encode='cbor'),
527528
kwA=pickled(["A"]),
528529
kwB=blob(["B"], encode="json"),
529-
kwC=blob(["C"], encode="cbor"))
530+
kwC=blob(["C"], encode="cbor"),
531+
kwD=blob(["D"], encode="arrow")
532+
)
530533
s.submit()
531534
t1.wait()
532535

0 commit comments

Comments
 (0)