Skip to content

Commit d6ed76b

Browse files
Add support for XXHash algos (#719)
1 parent 32d7a14 commit d6ed76b

8 files changed

Lines changed: 401 additions & 1 deletion

File tree

NOTICE

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,30 @@
11
AWS Crt Python
22
Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
33
SPDX-License-Identifier: Apache-2.0.
4+
5+
** XXHash - https://xxhash.com/
6+
Copyright (c) 2012-2021 Yann Collet
7+
All rights reserved.
8+
9+
BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
10+
11+
Redistribution and use in source and binary forms, with or without modification,
12+
are permitted provided that the following conditions are met:
13+
14+
* Redistributions of source code must retain the above copyright notice, this
15+
list of conditions and the following disclaimer.
16+
17+
* Redistributions in binary form must reproduce the above copyright notice, this
18+
list of conditions and the following disclaimer in the documentation and/or
19+
other materials provided with the distribution.
20+
21+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
22+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
25+
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
28+
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,3 +147,6 @@ AWS_EXTRA_LIB_DIR=C:\path\to\libs;D:\another\path python3 -m pip install .
147147
### Windows SDK Version
148148

149149
aws-crt-python builds against windows sdk version `10.0.17763.0` . This is the minimal version required for TLS 1.3 support on Windows. If you need a different Windows SDK version, you can set environment variable `AWS_CRT_WINDOWS_SDK_VERSION=<version>` while building from source:
150+
151+
### Attribution
152+
This library exposes native XXHash implementation (https://github.com/Cyan4973/xxHash).

awscrt/checksums.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
# SPDX-License-Identifier: Apache-2.0.
33

44
import _awscrt
5+
from awscrt import NativeResource
6+
from typing import Union
57

68

79
def crc32(input: bytes, previous_crc32: int = 0) -> int:
@@ -111,3 +113,63 @@ def combine_crc64nvme(crc64nvme_result1: int, crc64nvme_result2: int, data_lengt
111113
The combined CRC64-NVME checksum as if computed over the concatenated data
112114
"""
113115
return _awscrt.checksums_crc64nvme_combine(crc64nvme_result1, crc64nvme_result2, data_length2)
116+
117+
118+
class XXHash(NativeResource):
119+
def __init__(self, binding):
120+
super().__init__()
121+
self._binding = binding
122+
123+
@staticmethod
124+
def new_xxhash64(seed: int = 0) -> 'XXHash':
125+
"""
126+
Generates a new instance of XXHash64 hash.
127+
"""
128+
return XXHash(binding=_awscrt.xxhash64_new(seed))
129+
130+
@staticmethod
131+
def new_xxhash3_64(seed: int = 0) -> 'XXHash':
132+
"""
133+
Generates a new instance of XXHash3_64 hash.
134+
"""
135+
return XXHash(binding=_awscrt.xxhash3_64_new(seed))
136+
137+
@staticmethod
138+
def new_xxhash3_128(seed: int = 0) -> 'XXHash':
139+
"""
140+
Generates a new instance of XXHash3_128 hash.
141+
"""
142+
return XXHash(binding=_awscrt.xxhash3_128_new(seed))
143+
144+
@staticmethod
145+
def compute_xxhash64(input: Union[bytes, bytearray, memoryview], seed: int = 0) -> bytes:
146+
"""
147+
One-shot compute of xxhash64
148+
"""
149+
return _awscrt.xxhash64_compute(input, seed)
150+
151+
@staticmethod
152+
def compute_xxhash3_64(input: Union[bytes, bytearray, memoryview], seed: int = 0) -> bytes:
153+
"""
154+
One-shot compute of xxhash3_64
155+
"""
156+
return _awscrt.xxhash3_64_compute(input, seed)
157+
158+
@staticmethod
159+
def compute_xxhash3_128(input: Union[bytes, bytearray, memoryview], seed: int = 0) -> bytes:
160+
"""
161+
One-shot compute of xxhash3_128
162+
"""
163+
return _awscrt.xxhash3_128_compute(input, seed)
164+
165+
def update(self, input: Union[bytes, bytearray, memoryview]):
166+
"""
167+
Updates hash with the provided input.
168+
"""
169+
_awscrt.xxhash_update(self._binding, input)
170+
171+
def finalize(self) -> bytes:
172+
"""
173+
Finalizes hash.
174+
"""
175+
return _awscrt.xxhash_finalize(self._binding)

source/checksums.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,13 @@ PyObject *aws_py_checksums_crc32_combine(PyObject *self, PyObject *args);
1313
PyObject *aws_py_checksums_crc32c_combine(PyObject *self, PyObject *args);
1414
PyObject *aws_py_checksums_crc64nvme_combine(PyObject *self, PyObject *args);
1515

16+
PyObject *aws_py_xxhash64_new(PyObject *self, PyObject *args);
17+
PyObject *aws_py_xxhash3_64_new(PyObject *self, PyObject *args);
18+
PyObject *aws_py_xxhash3_128_new(PyObject *self, PyObject *args);
19+
PyObject *aws_py_xxhash64_compute(PyObject *self, PyObject *args);
20+
PyObject *aws_py_xxhash3_64_compute(PyObject *self, PyObject *args);
21+
PyObject *aws_py_xxhash3_128_compute(PyObject *self, PyObject *args);
22+
PyObject *aws_py_xxhash_update(PyObject *self, PyObject *args);
23+
PyObject *aws_py_xxhash_finalize(PyObject *self, PyObject *args);
24+
1625
#endif /* AWS_CRT_PYTHON_CHECKSUMS_H */

source/module.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -863,6 +863,16 @@ static PyMethodDef s_module_methods[] = {
863863
AWS_PY_METHOD_DEF(checksums_crc32c_combine, METH_VARARGS),
864864
AWS_PY_METHOD_DEF(checksums_crc64nvme_combine, METH_VARARGS),
865865

866+
/* XXHash Checksum primitives */
867+
AWS_PY_METHOD_DEF(xxhash64_new, METH_VARARGS),
868+
AWS_PY_METHOD_DEF(xxhash3_64_new, METH_VARARGS),
869+
AWS_PY_METHOD_DEF(xxhash3_128_new, METH_VARARGS),
870+
AWS_PY_METHOD_DEF(xxhash64_compute, METH_VARARGS),
871+
AWS_PY_METHOD_DEF(xxhash3_64_compute, METH_VARARGS),
872+
AWS_PY_METHOD_DEF(xxhash3_128_compute, METH_VARARGS),
873+
AWS_PY_METHOD_DEF(xxhash_update, METH_VARARGS),
874+
AWS_PY_METHOD_DEF(xxhash_finalize, METH_VARARGS),
875+
866876
/* HTTP */
867877
AWS_PY_METHOD_DEF(http_connection_close, METH_VARARGS),
868878
AWS_PY_METHOD_DEF(http_connection_is_open, METH_VARARGS),

source/xxhash.c

Lines changed: 243 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,243 @@
1+
/**
2+
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
* SPDX-License-Identifier: Apache-2.0.
4+
*/
5+
6+
#include "checksums.h"
7+
8+
#include "aws/checksums/xxhash.h"
9+
10+
const char *s_capsule_name_xxhash = "aws_xxhash";
11+
12+
static void s_xxhash_destructor(PyObject *xxhash_capsule) {
13+
struct aws_xxhash *hash = PyCapsule_GetPointer(xxhash_capsule, s_capsule_name_xxhash);
14+
assert(hash);
15+
16+
aws_xxhash_destroy(hash);
17+
}
18+
19+
PyObject *aws_py_xxhash64_new(PyObject *self, PyObject *args) {
20+
(void)self;
21+
PyObject *py_seed;
22+
23+
if (!PyArg_ParseTuple(args, "O", &py_seed)) {
24+
return NULL;
25+
}
26+
27+
uint64_t seed = PyLong_AsUnsignedLongLong(py_seed);
28+
29+
if (seed == (uint64_t)-1 && PyErr_Occurred()) {
30+
return NULL;
31+
}
32+
33+
PyObject *capsule = NULL;
34+
struct aws_allocator *allocator = aws_py_get_allocator();
35+
36+
struct aws_xxhash *hash = aws_xxhash64_new(allocator, seed);
37+
38+
if (hash == NULL) {
39+
return PyErr_AwsLastError();
40+
}
41+
42+
capsule = PyCapsule_New(hash, s_capsule_name_xxhash, s_xxhash_destructor);
43+
44+
if (capsule == NULL) {
45+
aws_xxhash_destroy(hash);
46+
}
47+
48+
return capsule;
49+
}
50+
51+
PyObject *aws_py_xxhash3_64_new(PyObject *self, PyObject *args) {
52+
(void)self;
53+
PyObject *py_seed;
54+
55+
if (!PyArg_ParseTuple(args, "O", &py_seed)) {
56+
return NULL;
57+
}
58+
59+
uint64_t seed = PyLong_AsUnsignedLongLong(py_seed);
60+
61+
if (seed == (uint64_t)-1 && PyErr_Occurred()) {
62+
return NULL;
63+
}
64+
65+
PyObject *capsule = NULL;
66+
struct aws_allocator *allocator = aws_py_get_allocator();
67+
68+
struct aws_xxhash *hash = aws_xxhash3_64_new(allocator, seed);
69+
70+
if (hash == NULL) {
71+
return PyErr_AwsLastError();
72+
}
73+
74+
capsule = PyCapsule_New(hash, s_capsule_name_xxhash, s_xxhash_destructor);
75+
76+
if (capsule == NULL) {
77+
aws_xxhash_destroy(hash);
78+
}
79+
80+
return capsule;
81+
}
82+
83+
PyObject *aws_py_xxhash3_128_new(PyObject *self, PyObject *args) {
84+
(void)self;
85+
PyObject *py_seed;
86+
87+
if (!PyArg_ParseTuple(args, "O", &py_seed)) {
88+
return NULL;
89+
}
90+
91+
uint64_t seed = PyLong_AsUnsignedLongLong(py_seed);
92+
93+
if (seed == (uint64_t)-1 && PyErr_Occurred()) {
94+
return NULL;
95+
}
96+
97+
PyObject *capsule = NULL;
98+
struct aws_allocator *allocator = aws_py_get_allocator();
99+
100+
struct aws_xxhash *hash = aws_xxhash3_128_new(allocator, seed);
101+
102+
if (hash == NULL) {
103+
return PyErr_AwsLastError();
104+
}
105+
106+
capsule = PyCapsule_New(hash, s_capsule_name_xxhash, s_xxhash_destructor);
107+
108+
if (capsule == NULL) {
109+
aws_xxhash_destroy(hash);
110+
}
111+
112+
return capsule;
113+
}
114+
115+
PyObject *aws_py_xxhash64_compute(PyObject *self, PyObject *args) {
116+
(void)self;
117+
struct aws_byte_cursor input;
118+
PyObject *py_seed;
119+
if (!PyArg_ParseTuple(args, "y#O", &input.ptr, &input.len, &py_seed)) {
120+
return NULL;
121+
}
122+
123+
uint64_t seed = PyLong_AsUnsignedLongLong(py_seed);
124+
125+
if (seed == (uint64_t)-1 && PyErr_Occurred()) {
126+
return NULL;
127+
}
128+
129+
struct aws_allocator *allocator = aws_py_get_allocator();
130+
struct aws_byte_buf buf;
131+
aws_byte_buf_init(&buf, allocator, 8);
132+
133+
if (aws_xxhash64_compute(seed, input, &buf)) {
134+
aws_byte_buf_clean_up_secure(&buf);
135+
return PyErr_AwsLastError();
136+
}
137+
138+
PyObject *ret = PyBytes_FromStringAndSize((const char *)buf.buffer, buf.len);
139+
aws_byte_buf_clean_up_secure(&buf);
140+
return ret;
141+
}
142+
143+
PyObject *aws_py_xxhash3_64_compute(PyObject *self, PyObject *args) {
144+
(void)self;
145+
struct aws_byte_cursor input;
146+
PyObject *py_seed;
147+
if (!PyArg_ParseTuple(args, "y#O", &input.ptr, &input.len, &py_seed)) {
148+
return NULL;
149+
}
150+
151+
uint64_t seed = PyLong_AsUnsignedLongLong(py_seed);
152+
153+
if (seed == (uint64_t)-1 && PyErr_Occurred()) {
154+
return NULL;
155+
}
156+
157+
struct aws_allocator *allocator = aws_py_get_allocator();
158+
struct aws_byte_buf buf;
159+
aws_byte_buf_init(&buf, allocator, 8);
160+
161+
if (aws_xxhash3_64_compute(seed, input, &buf)) {
162+
aws_byte_buf_clean_up_secure(&buf);
163+
return PyErr_AwsLastError();
164+
}
165+
166+
PyObject *ret = PyBytes_FromStringAndSize((const char *)buf.buffer, buf.len);
167+
aws_byte_buf_clean_up_secure(&buf);
168+
return ret;
169+
}
170+
171+
PyObject *aws_py_xxhash3_128_compute(PyObject *self, PyObject *args) {
172+
(void)self;
173+
struct aws_byte_cursor input;
174+
PyObject *py_seed;
175+
if (!PyArg_ParseTuple(args, "y#O", &input.ptr, &input.len, &py_seed)) {
176+
return NULL;
177+
}
178+
179+
uint64_t seed = PyLong_AsUnsignedLongLong(py_seed);
180+
181+
if (seed == (uint64_t)-1 && PyErr_Occurred()) {
182+
return NULL;
183+
}
184+
185+
struct aws_allocator *allocator = aws_py_get_allocator();
186+
struct aws_byte_buf buf;
187+
aws_byte_buf_init(&buf, allocator, 16);
188+
189+
if (aws_xxhash3_128_compute(seed, input, &buf)) {
190+
aws_byte_buf_clean_up_secure(&buf);
191+
return PyErr_AwsLastError();
192+
}
193+
194+
PyObject *ret = PyBytes_FromStringAndSize((const char *)buf.buffer, buf.len);
195+
aws_byte_buf_clean_up_secure(&buf);
196+
return ret;
197+
}
198+
199+
PyObject *aws_py_xxhash_update(PyObject *self, PyObject *args) {
200+
(void)self;
201+
struct aws_byte_cursor input;
202+
PyObject *xxhash_capsule = NULL;
203+
if (!PyArg_ParseTuple(args, "Oy#", &xxhash_capsule, &input.ptr, &input.len)) {
204+
return NULL;
205+
}
206+
207+
struct aws_xxhash *hash = PyCapsule_GetPointer(xxhash_capsule, s_capsule_name_xxhash);
208+
if (hash == NULL) {
209+
return NULL;
210+
}
211+
212+
if (aws_xxhash_update(hash, input)) {
213+
return PyErr_AwsLastError();
214+
}
215+
216+
Py_RETURN_NONE;
217+
}
218+
219+
PyObject *aws_py_xxhash_finalize(PyObject *self, PyObject *args) {
220+
(void)self;
221+
PyObject *xxhash_capsule = NULL;
222+
if (!PyArg_ParseTuple(args, "O", &xxhash_capsule)) {
223+
return NULL;
224+
}
225+
226+
struct aws_xxhash *hash = PyCapsule_GetPointer(xxhash_capsule, s_capsule_name_xxhash);
227+
if (hash == NULL) {
228+
return NULL;
229+
}
230+
231+
struct aws_allocator *allocator = aws_py_get_allocator();
232+
struct aws_byte_buf buf;
233+
aws_byte_buf_init(&buf, allocator, 16);
234+
235+
if (aws_xxhash_finalize(hash, &buf)) {
236+
aws_byte_buf_clean_up_secure(&buf);
237+
return PyErr_AwsLastError();
238+
}
239+
240+
PyObject *ret = PyBytes_FromStringAndSize((const char *)buf.buffer, buf.len);
241+
aws_byte_buf_clean_up_secure(&buf);
242+
return ret;
243+
}

0 commit comments

Comments
 (0)