Skip to content

Commit 38c3fcb

Browse files
authored
[mypyc] Use memory copy for vec init and extend using buffer protocol (#21349)
For vecs with value item types, use the buffer protocol for fast bulk item access during construction and `extend()`. Previously each item had to be boxed and then unboxed, which was inefficient. This works with `bytes` and `array.array`, for example, and should also work with NumPy arrays (didn't test with NumPy though). Examples where this matters include `v = vec[u8](b"foo")` and `v = extend(v, b"bar")`. Since the `bool` item type uses a non-standard encoding, it's not supported. We can make `bytes` even faster by adding further specialization. I'll do this in a follow-up PR. I used coding agent assist with small incremental changes.
1 parent be46935 commit 38c3fcb

12 files changed

Lines changed: 394 additions & 47 deletions

File tree

mypyc/irbuild/expression.py

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,10 @@
6262
from mypyc.ir.class_ir import ClassIR
6363
from mypyc.ir.func_ir import FUNC_CLASSMETHOD, FUNC_STATICMETHOD
6464
from mypyc.ir.ops import (
65+
ERR_MAGIC,
6566
Assign,
6667
BasicBlock,
68+
CallC,
6769
ComparisonOp,
6870
Integer,
6971
LoadAddress,
@@ -80,8 +82,11 @@
8082
RTuple,
8183
RVec,
8284
bool_rprimitive,
85+
int64_rprimitive,
8386
int_rprimitive,
8487
is_any_int,
88+
is_bytearray_rprimitive,
89+
is_bytes_rprimitive,
8590
is_fixed_width_rtype,
8691
is_int64_rprimitive,
8792
is_int_rprimitive,
@@ -90,6 +95,7 @@
9095
is_object_rprimitive,
9196
object_rprimitive,
9297
set_rprimitive,
98+
vec_api_by_item_type,
9399
)
94100
from mypyc.irbuild.ast_helpers import is_borrow_friendly_expr, process_conditional
95101
from mypyc.irbuild.builder import IRBuilder, int_borrow_friendly_op
@@ -116,6 +122,7 @@
116122
translate_object_setattr,
117123
)
118124
from mypyc.irbuild.vec import (
125+
as_platform_int,
119126
vec_append,
120127
vec_create,
121128
vec_create_from_values,
@@ -620,9 +627,37 @@ def vec_from_iterable(
620627
capacity: Value | None = None,
621628
) -> Value:
622629
"""Construct a vec from an arbitrary iterable."""
623-
# Translate it as a vec comprehension vec[t]([<name> for <name> in
624-
# iterable]). This way we can use various special casing supported
625-
# by for loops and comprehensions.
630+
item_type = vec_type.item_type
631+
api_name = vec_api_by_item_type.get(item_type)
632+
iterable_rtype = builder.node_type(iterable)
633+
if api_name is not None and (
634+
is_object_rprimitive(iterable_rtype)
635+
or is_bytes_rprimitive(iterable_rtype)
636+
or is_bytearray_rprimitive(iterable_rtype)
637+
):
638+
# For generic iterables (typed as object) and bytes/bytearray
639+
# (which support the buffer protocol for fast memcpy), call the
640+
# C-level from_iterable. For concrete types like range, list,
641+
# vec, etc., the for-loop desugaring below produces better IR.
642+
iterable_val = builder.accept(iterable)
643+
cap = (
644+
as_platform_int(builder.builder, capacity, line)
645+
if capacity is not None
646+
else Integer(0, int64_rprimitive)
647+
)
648+
call = CallC(
649+
f"{api_name}.from_iterable",
650+
[iterable_val, cap],
651+
vec_type,
652+
steals=[False, False],
653+
is_borrowed=False,
654+
error_kind=ERR_MAGIC,
655+
line=line,
656+
)
657+
return builder.add(call)
658+
659+
# Use a for loop with vec_append. The comprehension helper
660+
# special-cases range, list, vec, etc. for efficient iteration.
626661
vec = Register(vec_type)
627662
builder.assign(vec, vec_create(builder.builder, vec_type, 0, line, capacity=capacity), line)
628663
name = f"___tmp_{line}"

mypyc/lib-rt/vecs/librt_vecs.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,7 @@ typedef struct _VecI64API {
278278
VecI64 (*remove)(VecI64, int64_t);
279279
// TODO: Py_ssize_t
280280
VecI64 (*slice)(VecI64, int64_t, int64_t);
281+
VecI64 (*from_iterable)(PyObject *, int64_t);
281282
VecI64 (*extend)(VecI64, PyObject *);
282283
VecI64 (*extend_vec)(VecI64, VecI64);
283284
} VecI64API;
@@ -295,6 +296,7 @@ typedef struct _VecI32API {
295296
VecI32 (*remove)(VecI32, int32_t);
296297
// TODO: Py_ssize_t
297298
VecI32 (*slice)(VecI32, int64_t, int64_t);
299+
VecI32 (*from_iterable)(PyObject *, int64_t);
298300
VecI32 (*extend)(VecI32, PyObject *);
299301
VecI32 (*extend_vec)(VecI32, VecI32);
300302
} VecI32API;
@@ -312,6 +314,7 @@ typedef struct _VecI16API {
312314
VecI16 (*remove)(VecI16, int16_t);
313315
// TODO: Py_ssize_t
314316
VecI16 (*slice)(VecI16, int64_t, int64_t);
317+
VecI16 (*from_iterable)(PyObject *, int64_t);
315318
VecI16 (*extend)(VecI16, PyObject *);
316319
VecI16 (*extend_vec)(VecI16, VecI16);
317320
} VecI16API;
@@ -329,6 +332,7 @@ typedef struct _VecU8API {
329332
VecU8 (*remove)(VecU8, uint8_t);
330333
// TODO: Py_ssize_t
331334
VecU8 (*slice)(VecU8, int64_t, int64_t);
335+
VecU8 (*from_iterable)(PyObject *, int64_t);
332336
VecU8 (*extend)(VecU8, PyObject *);
333337
VecU8 (*extend_vec)(VecU8, VecU8);
334338
} VecU8API;
@@ -346,6 +350,7 @@ typedef struct _VecFloatAPI {
346350
VecFloat (*remove)(VecFloat, double);
347351
// TODO: Py_ssize_t
348352
VecFloat (*slice)(VecFloat, int64_t, int64_t);
353+
VecFloat (*from_iterable)(PyObject *, int64_t);
349354
VecFloat (*extend)(VecFloat, PyObject *);
350355
VecFloat (*extend_vec)(VecFloat, VecFloat);
351356
} VecFloatAPI;
@@ -363,6 +368,7 @@ typedef struct _VecBoolAPI {
363368
VecBool (*remove)(VecBool, char);
364369
// TODO: Py_ssize_t
365370
VecBool (*slice)(VecBool, int64_t, int64_t);
371+
VecBool (*from_iterable)(PyObject *, int64_t);
366372
VecBool (*extend)(VecBool, PyObject *);
367373
VecBool (*extend_vec)(VecBool, VecBool);
368374
} VecBoolAPI;

mypyc/lib-rt/vecs/vec_float.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#define BOX_ITEM VecFloat_BoxItem
1515
#define UNBOX_ITEM VecFloat_UnboxItem
1616
#define IS_UNBOX_ERROR VecFloat_IsUnboxError
17+
#define BUFFER_FORMAT_CHAR_OK(c) ((c) == 'd')
1718

1819
#include "vec_template.c"
1920

mypyc/lib-rt/vecs/vec_i16.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#define BOX_ITEM VecI16_BoxItem
1515
#define UNBOX_ITEM VecI16_UnboxItem
1616
#define IS_UNBOX_ERROR VecI16_IsUnboxError
17+
#define BUFFER_FORMAT_CHAR_OK(c) ((c) == 'h')
1718

1819
#include "vec_template.c"
1920

mypyc/lib-rt/vecs/vec_i32.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#define BOX_ITEM VecI32_BoxItem
1515
#define UNBOX_ITEM VecI32_UnboxItem
1616
#define IS_UNBOX_ERROR VecI32_IsUnboxError
17+
#define BUFFER_FORMAT_CHAR_OK(c) ((c) == 'i' || ((c) == 'l' && sizeof(long) == 4))
1718

1819
#include "vec_template.c"
1920

mypyc/lib-rt/vecs/vec_i64.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#define BOX_ITEM VecI64_BoxItem
1515
#define UNBOX_ITEM VecI64_UnboxItem
1616
#define IS_UNBOX_ERROR VecI64_IsUnboxError
17+
#define BUFFER_FORMAT_CHAR_OK(c) ((c) == 'q' || ((c) == 'l' && sizeof(long) == 8))
1718

1819
#include "vec_template.c"
1920

mypyc/lib-rt/vecs/vec_template.c

Lines changed: 130 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -100,10 +100,72 @@ VEC FUNC(New)(Py_ssize_t size, Py_ssize_t cap) {
100100
return vec;
101101
}
102102

103-
PyObject *FUNC(FromIterable)(PyObject *iterable, int64_t cap) {
103+
#ifdef BUFFER_FORMAT_CHAR_OK
104+
inline static int buffer_format_matches(const char *fmt) {
105+
char c = *fmt;
106+
if (c == '@' || c == '=') {
107+
c = fmt[1];
108+
}
109+
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
110+
else if (c == '<') { c = fmt[1]; }
111+
else if (c == '>' || c == '!') { return 0; }
112+
#else
113+
else if (c == '>') { c = fmt[1]; }
114+
else if (c == '<' || c == '!') { return 0; }
115+
#endif
116+
return c != '\0' && BUFFER_FORMAT_CHAR_OK(c);
117+
}
118+
119+
// Try to get a compatible buffer view from 'obj'. Return 1 if successful
120+
// (view is filled and caller must call PyBuffer_Release), 0 if the object
121+
// doesn't support buffer protocol or the format doesn't match (no cleanup
122+
// needed), or -1 on error.
123+
inline static int vec_get_buffer(PyObject *obj, Py_buffer *view) {
124+
if (PyObject_GetBuffer(obj, view, PyBUF_C_CONTIGUOUS | PyBUF_FORMAT) != 0) {
125+
PyErr_Clear();
126+
return 0;
127+
}
128+
if (view->ndim == 1
129+
&& view->itemsize == sizeof(ITEM_C_TYPE)
130+
&& buffer_format_matches(view->format)) {
131+
return 1;
132+
}
133+
PyBuffer_Release(view);
134+
return 0;
135+
}
136+
#endif
137+
138+
VEC FUNC(FromIterable)(PyObject *iterable, int64_t cap) {
139+
if (cap < 0) {
140+
PyErr_SetString(PyExc_ValueError, "capacity must not be negative");
141+
return vec_error();
142+
}
143+
144+
#ifdef BUFFER_FORMAT_CHAR_OK
145+
Py_buffer view;
146+
int buf_ok = vec_get_buffer(iterable, &view);
147+
if (buf_ok < 0)
148+
return vec_error();
149+
if (buf_ok) {
150+
Py_ssize_t n = view.len / (Py_ssize_t)sizeof(ITEM_C_TYPE);
151+
Py_ssize_t alloc_size = n > cap ? n : cap;
152+
VEC v = vec_alloc(alloc_size);
153+
if (VEC_IS_ERROR(v)) {
154+
PyBuffer_Release(&view);
155+
return vec_error();
156+
}
157+
if (n > 0) {
158+
memcpy(v.buf->items, view.buf, n * sizeof(ITEM_C_TYPE));
159+
}
160+
v.len = n;
161+
PyBuffer_Release(&view);
162+
return v;
163+
}
164+
#endif
165+
104166
VEC v = vec_alloc(cap);
105167
if (VEC_IS_ERROR(v))
106-
return NULL;
168+
return vec_error();
107169
if (cap > 0) {
108170
memset(v.buf->items, 0, sizeof(ITEM_C_TYPE) * cap);
109171
}
@@ -112,7 +174,7 @@ PyObject *FUNC(FromIterable)(PyObject *iterable, int64_t cap) {
112174
PyObject *iter = PyObject_GetIter(iterable);
113175
if (iter == NULL) {
114176
VEC_DECREF(v);
115-
return NULL;
177+
return vec_error();
116178
}
117179
PyObject *item;
118180
while ((item = PyIter_Next(iter)) != NULL) {
@@ -121,21 +183,21 @@ PyObject *FUNC(FromIterable)(PyObject *iterable, int64_t cap) {
121183
if (IS_UNBOX_ERROR(x)) {
122184
Py_DECREF(iter);
123185
VEC_DECREF(v);
124-
return NULL;
186+
return vec_error();
125187
}
126188
v = FUNC(Append)(v, x);
127189
if (VEC_IS_ERROR(v)) {
128190
Py_DECREF(iter);
129191
VEC_DECREF(v);
130-
return NULL;
192+
return vec_error();
131193
}
132194
}
133195
Py_DECREF(iter);
134196
if (PyErr_Occurred()) {
135197
VEC_DECREF(v);
136-
return NULL;
198+
return vec_error();
137199
}
138-
return FUNC(Box)(v);
200+
return v;
139201
}
140202

141203
static PyObject *vec_new(PyTypeObject *self, PyObject *args, PyObject *kw) {
@@ -152,7 +214,10 @@ static PyObject *vec_new(PyTypeObject *self, PyObject *args, PyObject *kw) {
152214
if (init == NULL) {
153215
return FUNC(Box)(FUNC(New)(0, cap));
154216
} else {
155-
return (PyObject *)FUNC(FromIterable)(init, cap);
217+
VEC v = FUNC(FromIterable)(init, cap);
218+
if (VEC_IS_ERROR(v))
219+
return NULL;
220+
return FUNC(Box)(v);
156221
}
157222
}
158223

@@ -332,13 +397,68 @@ VEC FUNC(Append)(VEC vec, ITEM_C_TYPE x) {
332397
}
333398
}
334399

400+
// Extend 'dst' by appending 'n' items from 'items', stealing 'dst'.
401+
// Caller guarantees n > 0 and that 'items' remains valid for the call.
402+
// If force_alloc is true, always allocate a new buffer even when dst has capacity.
403+
inline static VEC vec_extend_items(
404+
VEC dst, const ITEM_C_TYPE *items, Py_ssize_t n, int force_alloc
405+
) {
406+
if (unlikely(n > PY_SSIZE_T_MAX - dst.len)) {
407+
PyErr_NoMemory();
408+
VEC_DECREF(dst);
409+
return vec_error();
410+
}
411+
Py_ssize_t new_len = dst.len + n;
412+
Py_ssize_t cap = dst.buf ? VEC_CAP(dst) : 0;
413+
if (!force_alloc && new_len <= cap) {
414+
memcpy(dst.buf->items + dst.len, items, sizeof(ITEM_C_TYPE) * n);
415+
dst.len = new_len;
416+
return dst;
417+
}
418+
Py_ssize_t new_cap = cap;
419+
while (new_cap < new_len) {
420+
if (unlikely(new_cap > (PY_SSIZE_T_MAX - 1) / 2)) {
421+
new_cap = new_len;
422+
break;
423+
}
424+
new_cap = 2 * new_cap + 1;
425+
}
426+
VEC new = vec_alloc(new_cap);
427+
if (VEC_IS_ERROR(new)) {
428+
VEC_DECREF(dst);
429+
return vec_error();
430+
}
431+
if (dst.len > 0)
432+
memcpy(new.buf->items, dst.buf->items, sizeof(ITEM_C_TYPE) * dst.len);
433+
memcpy(new.buf->items + dst.len, items, sizeof(ITEM_C_TYPE) * n);
434+
new.len = new_len;
435+
Py_XDECREF(dst.buf);
436+
return new;
437+
}
438+
335439
// Extend 'vec' with items from 'iterable', stealing 'vec'.
336440
// Return extended 'vec', or error vec on failure.
337441
VEC FUNC(Extend)(VEC vec, PyObject *iterable) {
338442
if (Py_TYPE(iterable) == &VEC_TYPE) {
339443
return FUNC(ExtendVec)(vec, ((VEC_OBJECT *)iterable)->vec);
340444
}
341445

446+
#ifdef BUFFER_FORMAT_CHAR_OK
447+
Py_buffer view;
448+
int buf_ok = vec_get_buffer(iterable, &view);
449+
if (buf_ok < 0) {
450+
VEC_DECREF(vec);
451+
return vec_error();
452+
}
453+
if (buf_ok) {
454+
Py_ssize_t n = view.len / (Py_ssize_t)sizeof(ITEM_C_TYPE);
455+
if (n > 0)
456+
vec = vec_extend_items(vec, (const ITEM_C_TYPE *)view.buf, n, 0);
457+
PyBuffer_Release(&view);
458+
return vec;
459+
}
460+
#endif
461+
342462
PyObject *iter = PyObject_GetIter(iterable);
343463
if (iter == NULL) {
344464
VEC_DECREF(vec);
@@ -372,39 +492,7 @@ VEC FUNC(Extend)(VEC vec, PyObject *iterable) {
372492
VEC FUNC(ExtendVec)(VEC dst, VEC src) {
373493
if (src.len == 0)
374494
return dst;
375-
if (unlikely(src.len > PY_SSIZE_T_MAX - dst.len)) {
376-
PyErr_NoMemory();
377-
VEC_DECREF(dst);
378-
return vec_error();
379-
}
380-
Py_ssize_t new_len = dst.len + src.len;
381-
Py_ssize_t cap = dst.buf ? VEC_CAP(dst) : 0;
382-
if (new_len <= cap && dst.buf != src.buf) {
383-
// Fast path: enough capacity and no aliasing
384-
memcpy(dst.buf->items + dst.len, src.buf->items, sizeof(ITEM_C_TYPE) * src.len);
385-
dst.len = new_len;
386-
return dst;
387-
}
388-
// Need to reallocate (or dst and src share a buffer)
389-
Py_ssize_t new_cap = cap;
390-
while (new_cap < new_len) {
391-
if (unlikely(new_cap > (PY_SSIZE_T_MAX - 1) / 2)) {
392-
new_cap = new_len;
393-
break;
394-
}
395-
new_cap = 2 * new_cap + 1;
396-
}
397-
VEC new = vec_alloc(new_cap);
398-
if (VEC_IS_ERROR(new)) {
399-
VEC_DECREF(dst);
400-
return vec_error();
401-
}
402-
if (dst.len > 0)
403-
memcpy(new.buf->items, dst.buf->items, sizeof(ITEM_C_TYPE) * dst.len);
404-
memcpy(new.buf->items + dst.len, src.buf->items, sizeof(ITEM_C_TYPE) * src.len);
405-
new.len = new_len;
406-
Py_XDECREF(dst.buf);
407-
return new;
495+
return vec_extend_items(dst, src.buf->items, src.len, dst.buf == src.buf);
408496
}
409497

410498
// Remove item from 'vec', stealing 'vec'. Return 'vec' with item removed.
@@ -575,6 +663,7 @@ NAME(API) FEATURES = {
575663
FUNC(Pop),
576664
FUNC(Remove),
577665
FUNC(Slice),
666+
FUNC(FromIterable),
578667
FUNC(Extend),
579668
FUNC(ExtendVec),
580669
};

0 commit comments

Comments
 (0)