-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy pathpyfastpfor.cc
More file actions
244 lines (211 loc) · 7.72 KB
/
pyfastpfor.cc
File metadata and controls
244 lines (211 loc) · 7.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
/**
* PyFastPFOR
*
* Python bindings for the FastPFOR library:
* https://github.com/lemire/FastPFor
*
* This code is released under the
* Apache License Version 2.0 http://www.apache.org/licenses/.
*
*/
#include <cstdint>
#include <iostream>
#include <pybind11/pybind11.h>
#include <pybind11/numpy.h>
#include <fastpfor/codecfactory.h>
#include <fastpfor/deltautil.h>
namespace py = pybind11;
void exportCodecs(py::module& m);
using namespace FastPForLib;
const char * module_name = "pyfastpfor";
struct IntegerCODECWrapper {
public:
IntegerCODECWrapper(const std::string& codecName) {
codec_ = CODECFactory::getFromName(codecName).get();
}
size_t encodeArray(
py::array_t<uint32_t, py::array::c_style> input, size_t inputSize,
py::array_t<uint32_t, py::array::c_style> output, size_t outputSize) {
py::gil_scoped_release l;
const uint32_t* inpBuff = input.data();
uint32_t* outBuff = output.mutable_data();
size_t compSize = outputSize;
codec_->encodeArray(inpBuff, inputSize,
outBuff, compSize);
return compSize;
}
size_t decodeArray(
py::array_t<uint32_t, py::array::c_style> input, size_t inputSize,
py::array_t<uint32_t, py::array::c_style> output, size_t outputSize) {
py::gil_scoped_release l;
const uint32_t* inpBuff = input.data();
uint32_t* outBuff = output.mutable_data();
size_t uncompSize = outputSize;
codec_->decodeArray(inpBuff, inputSize, outBuff, uncompSize);
return uncompSize;
}
private:
IntegerCODEC* codec_;
};
/*
* PYBIND11_MODULE is a replacement for PYBIND11_PLUGIN
* introduced in Pybind 2.2. However, we don't require
* Pybind to be >= 2.0 so we attempt to support older
* Pybind versions as well.
*/
#ifdef PYBIND11_MODULE
PYBIND11_MODULE(pyfastpfor, m) {
m.doc() = "Python Bindings for FastPFor library (fast integer compression).";
#else
PYBIND11_PLUGIN(pyfastpfor) {
py::module m(module_name, "Python Bindings for FastPFor library (fast integer compression).");
#endif
#ifdef VERSION_INFO
m.attr("__version__") = VERSION_INFO;
#else
m.attr("__version__") = "dev";
#endif
py::module codecModule = m.def_submodule("codecs", "Codecs class wrapper.");
exportCodecs(codecModule);
m.def("getCodec",
[](const std::string & codecName) {
// We know that FastPFor will keep this shared pointer alive forever
// so it is safe just to reference codec
return py::cast(new IntegerCODECWrapper(codecName),
py::return_value_policy::take_ownership);
},
py::arg("codecName"),
"This is a codec-factory method.\n\n"
"Parameters\n"
"----------\n"
"codecName: str\n"
" A name of the codec, e.g., simdfastpfor256\n"
"\n"
"Returns\n"
"----------\n"
" A reference to the codec object");
m.def("getCodecList", []() {
py::list ret;
for (const string& codecId : CODECFactory::allNames()) {
ret.append(codecId);
}
return ret;
},
"Return a list of available codecs.\n\n"
"Returns\n"
"----------\n"
"A list with codec names");
m.def("delta1", [](py::array_t<uint32_t, py::array::c_style> input, size_t inputSize) -> void {
uint32_t* buff = input.mutable_data();
py::gil_scoped_release l;
Delta::fastDelta(buff, inputSize);
}, py::arg("input"), py::arg("inputSize"),
"In-place computation of differences between adjacent numbers.\n\n"
"Parameters\n"
"----------\n"
"input: input numpy C-style contiguous array to be uncompressed, e.g.:\n"
" input = numpy.array(range(256), dtype = np.uint32).ravel()\n"
"inputSize: a number of integers to process\n"
"\n"
"Returns\n"
"----------\n"
" None"
);
m.def("delta4",
[](py::array_t<uint32_t, py::array::c_style> input, size_t inputSize) -> void {
uint32_t* buff = input.mutable_data();
py::gil_scoped_release l;
Delta::deltaSIMD(buff, inputSize);
}, py::arg("input"), py::arg("inputSize"),
"In-place computation of differences between numbers that are 4 indices apart.\n"
"Using delta4 and prefixSum4 increases space usage, but processing is faster.\n\n"
"Parameters\n"
"----------\n"
"input: input numpy C-style contiguous array to be uncompressed, e.g.:\n"
" input = numpy.array(range(256), dtype = np.uint32).ravel()\n"
"inputSize: a number of integers to process\n"
"\n"
"Returns\n"
"----------\n"
" None"
)
;
m.def("prefixSum1", [](py::array_t<uint32_t, py::array::c_style> input, size_t inputSize) -> void {
uint32_t* buff = input.mutable_data();
py::gil_scoped_release l;
Delta::fastinverseDelta2(buff, inputSize);
}, py::arg("input"), py::arg("inputSize"),
"In-place inversion of delta1.\n\n"
"Parameters\n"
"----------\n"
"input: input numpy C-style contiguous array to be uncompressed, e.g.:\n"
" input = numpy.array(range(256), dtype = np.uint32).ravel()\n"
"inputSize: a number of integers to process\n"
"\n"
"Returns\n"
"----------\n"
" None"
);
m.def("prefixSum4",
[](py::array_t<uint32_t, py::array::c_style> input, size_t inputSize) -> void {
uint32_t* buff = input.mutable_data();
py::gil_scoped_release l;
Delta::inverseDeltaSIMD(buff, inputSize);
}, py::arg("input"), py::arg("inputSize"),
"In-place computation inversion of delta4.\n"
"Using delta4 and prefixSum4 increases space usage, but processing is faster.\n\n"
"Parameters\n"
"----------\n"
"input: input numpy C-style contiguous array to be uncompressed, e.g.:\n"
" input = numpy.array(range(256), dtype = np.uint32).ravel()\n"
"inputSize: a number of integers to process\n"
"\n"
"Returns\n"
"----------\n"
" None"
);
#ifndef PYBIND11_MODULE
return m.ptr();
#endif
}
void exportCodecs(py::module& m) {
py::class_<IntegerCODECWrapper>(m, "IntegerCODEC")
.def("encodeArray", &IntegerCODECWrapper::encodeArray,
py::arg("input"), py::arg("inputSize"),
py::arg("output"), py::arg("outputSize"),
"Compress input array.\n\n"
"Parameters\n"
"----------\n"
"input: numpy C-style contiguous array to be compressed, e.g.:\n"
" input = numpy.array(range(256), dtype = np.uint32).ravel()\n"
"inputSize: a number of integers to compress: it can be less than\n"
" than the total number of integers in the numpy array.\n"
"output: numpy C-style contiguous array with compressed data, e.g.:\n"
" output = np.zeros(buffSize, dtype = np.uint32).ravel()\n"
"outputSize: a capacity of the output buffer: it can be less than\n"
" the total number of integers in the numpy array.\n"
"\n"
"Returns\n"
"----------\n"
" A number of integers in the compressed output.")
.def("decodeArray", &IntegerCODECWrapper::decodeArray,
py::arg("input"), py::arg("inputSize"),
py::arg("output"), py::arg("outputSize"),
"Uncompress input array.\n\n"
"Parameters\n"
"----------\n"
"input: numpy C-style contiguous array to be uncompressed, e.g.:\n"
" input = numpy.array(range(256), dtype = np.uint32).ravel()\n"
"inputSize: a number of integers to compress: it can be less than\n"
" than the total number of integers in the numpy array.\n"
"output: numpy C-style contiguous array with compressed data, e.g.:\n"
" output = np.zeros(buffSize, dtype = np.uint32).ravel()\n"
"outputSize: a capacity of the output buffer: it can be less than\n"
" the total number of integers in the numpy array.\n"
"\n"
"Returns\n"
"----------\n"
" A number of integers in the decompressed output."
)
;
}