Skip to content

Commit e2923a1

Browse files
authored
[mypyc] Add i32 and i64 read/write functions to librt.strings (#20757)
Also fix a bug related to overlapping error values. Refactor code and tests. Use memcpy() for unaligned access instead of accessing invidual bytes, since this apparently works efficiently with more C compilers. I'll add big endian variants in a follow-up PR. I used coding agent assist, but used it to make small incremental changes. Follow-up to #20745.
1 parent 525c6d1 commit e2923a1

8 files changed

Lines changed: 614 additions & 84 deletions

File tree

mypy/typeshed/stubs/librt/librt/strings.pyi

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,7 @@ class StringWriter:
2222

2323
def write_i16_le(b: BytesWriter, n: i16, /) -> None: ...
2424
def read_i16_le(b: bytes, index: i64, /) -> i16: ...
25+
def write_i32_le(b: BytesWriter, n: i32, /) -> None: ...
26+
def read_i32_le(b: bytes, index: i64, /) -> i32: ...
27+
def write_i64_le(b: BytesWriter, n: i64, /) -> None: ...
28+
def read_i64_le(b: bytes, index: i64, /) -> i64: ...

mypyc/lib-rt/byteswriter_extra_ops.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,15 +32,14 @@ char CPyBytesWriter_Write(PyObject *obj, PyObject *value) {
3232
return CPY_NONE;
3333
}
3434

35-
int16_t CPyBytes_ReadError(int64_t index, Py_ssize_t size) {
35+
void CPyBytes_ReadError(int64_t index, Py_ssize_t size) {
3636
if (index < 0) {
3737
PyErr_SetString(PyExc_ValueError, "index must be non-negative");
3838
} else {
3939
PyErr_Format(PyExc_IndexError,
4040
"index %lld out of range for bytes of length %zd",
4141
(long long)index, size);
4242
}
43-
return CPY_LL_INT_ERROR;
4443
}
4544

4645
#endif // MYPYC_EXPERIMENTAL

mypyc/lib-rt/byteswriter_extra_ops.h

Lines changed: 64 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
#include "strings/librt_strings.h"
1010
#include "strings/librt_strings_common.h"
1111

12+
// BytesWriter: Length and capacity
13+
1214
static inline CPyTagged
1315
CPyBytesWriter_Len(PyObject *obj) {
1416
return (CPyTagged)((BytesWriterObject *)obj)->len << 1;
@@ -23,6 +25,8 @@ CPyBytesWriter_EnsureSize(BytesWriterObject *data, Py_ssize_t n) {
2325
}
2426
}
2527

28+
// BytesWriter: Basic write operations
29+
2630
static inline char
2731
CPyBytesWriter_Append(PyObject *obj, uint8_t value) {
2832
BytesWriterObject *self = (BytesWriterObject *)obj;
@@ -35,19 +39,9 @@ CPyBytesWriter_Append(PyObject *obj, uint8_t value) {
3539
return CPY_NONE;
3640
}
3741

38-
static inline char
39-
CPyBytesWriter_WriteI16LE(PyObject *obj, int16_t value) {
40-
BytesWriterObject *self = (BytesWriterObject *)obj;
41-
if (!CPyBytesWriter_EnsureSize(self, 2))
42-
return CPY_NONE_ERROR;
43-
BytesWriter_write_i16_le_unchecked(self, value);
44-
return CPY_NONE;
45-
}
46-
4742
char CPyBytesWriter_Write(PyObject *obj, PyObject *value);
4843

49-
// Helper function for bytes read error handling (negative index or out of range)
50-
int16_t CPyBytes_ReadError(int64_t index, Py_ssize_t size);
44+
// BytesWriter: Indexing operations
5145

5246
// If index is negative, convert to non-negative index (no range checking)
5347
static inline int64_t CPyBytesWriter_AdjustIndex(PyObject *obj, int64_t index) {
@@ -69,6 +63,40 @@ static inline void CPyBytesWriter_SetItem(PyObject *obj, int64_t index, uint8_t
6963
(((BytesWriterObject *)obj)->buf)[index] = x;
7064
}
7165

66+
// BytesWriter: Write integer operations (little-endian)
67+
68+
static inline char
69+
CPyBytesWriter_WriteI16LE(PyObject *obj, int16_t value) {
70+
BytesWriterObject *self = (BytesWriterObject *)obj;
71+
if (!CPyBytesWriter_EnsureSize(self, 2))
72+
return CPY_NONE_ERROR;
73+
BytesWriter_WriteI16LEUnsafe(self, value);
74+
return CPY_NONE;
75+
}
76+
77+
static inline char
78+
CPyBytesWriter_WriteI32LE(PyObject *obj, int32_t value) {
79+
BytesWriterObject *self = (BytesWriterObject *)obj;
80+
if (!CPyBytesWriter_EnsureSize(self, 4))
81+
return CPY_NONE_ERROR;
82+
BytesWriter_WriteI32LEUnsafe(self, value);
83+
return CPY_NONE;
84+
}
85+
86+
static inline char
87+
CPyBytesWriter_WriteI64LE(PyObject *obj, int64_t value) {
88+
BytesWriterObject *self = (BytesWriterObject *)obj;
89+
if (!CPyBytesWriter_EnsureSize(self, 8))
90+
return CPY_NONE_ERROR;
91+
BytesWriter_WriteI64LEUnsafe(self, value);
92+
return CPY_NONE;
93+
}
94+
95+
// Bytes: Read integer operations (little-endian)
96+
97+
// Helper function for bytes read error handling (negative index or out of range)
98+
void CPyBytes_ReadError(int64_t index, Py_ssize_t size);
99+
72100
static inline int16_t
73101
CPyBytes_ReadI16LE(PyObject *bytes_obj, int64_t index) {
74102
// bytes_obj type is enforced by mypyc
@@ -78,7 +106,31 @@ CPyBytes_ReadI16LE(PyObject *bytes_obj, int64_t index) {
78106
return CPY_LL_INT_ERROR;
79107
}
80108
const unsigned char *data = (const unsigned char *)PyBytes_AS_STRING(bytes_obj);
81-
return read_i16_le_unchecked(data + index);
109+
return CPyBytes_ReadI16LEUnsafe(data + index);
110+
}
111+
112+
static inline int32_t
113+
CPyBytes_ReadI32LE(PyObject *bytes_obj, int64_t index) {
114+
// bytes_obj type is enforced by mypyc
115+
Py_ssize_t size = PyBytes_GET_SIZE(bytes_obj);
116+
if (unlikely(index < 0 || index > size - 4)) {
117+
CPyBytes_ReadError(index, size);
118+
return CPY_LL_INT_ERROR;
119+
}
120+
const unsigned char *data = (const unsigned char *)PyBytes_AS_STRING(bytes_obj);
121+
return CPyBytes_ReadI32LEUnsafe(data + index);
122+
}
123+
124+
static inline int64_t
125+
CPyBytes_ReadI64LE(PyObject *bytes_obj, int64_t index) {
126+
// bytes_obj type is enforced by mypyc
127+
Py_ssize_t size = PyBytes_GET_SIZE(bytes_obj);
128+
if (unlikely(index < 0 || index > size - 8)) {
129+
CPyBytes_ReadError(index, size);
130+
return CPY_LL_INT_ERROR;
131+
}
132+
const unsigned char *data = (const unsigned char *)PyBytes_AS_STRING(bytes_obj);
133+
return CPyBytes_ReadI64LEUnsafe(data + index);
82134
}
83135

84136
#endif // MYPYC_EXPERIMENTAL

mypyc/lib-rt/strings/librt_strings.c

Lines changed: 97 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -831,46 +831,37 @@ StringWriter_len_internal(PyObject *self) {
831831

832832
// End of StringWriter
833833

834-
static PyObject*
835-
write_i16_le(PyObject *module, PyObject *const *args, size_t nargs) {
834+
// Helper for write_i*_le/be functions - validates args and returns BytesWriter
835+
static inline BytesWriterObject *
836+
parse_write_int_args(PyObject *const *args, size_t nargs, const char *func_name) {
836837
if (unlikely(nargs != 2)) {
837838
PyErr_Format(PyExc_TypeError,
838-
"write_i16_le() takes exactly 2 arguments (%zu given)", nargs);
839+
"%s() takes exactly 2 arguments (%zu given)", func_name, nargs);
839840
return NULL;
840841
}
841842
PyObject *writer = args[0];
842843
if (!check_bytes_writer(writer)) {
843844
return NULL;
844845
}
845-
PyObject *value = args[1];
846-
int16_t unboxed = CPyLong_AsInt16(value);
847-
if (unlikely(unboxed == CPY_LL_INT_ERROR && PyErr_Occurred())) {
848-
// Error already set by CPyLong_AsInt16 (ValueError for overflow, TypeError for wrong type)
849-
return NULL;
850-
}
851-
BytesWriterObject *bw = (BytesWriterObject *)writer;
852-
if (unlikely(!ensure_bytes_writer_size(bw, 2))) {
853-
return NULL;
854-
}
855-
BytesWriter_write_i16_le_unchecked(bw, unboxed);
856-
Py_INCREF(Py_None);
857-
return Py_None;
846+
return (BytesWriterObject *)writer;
858847
}
859848

860-
static PyObject*
861-
read_i16_le(PyObject *module, PyObject *const *args, size_t nargs) {
849+
// Helper for read_i*_le/be functions - validates args and returns data pointer
850+
// Returns NULL on error, sets *out_index to the validated index on success
851+
static inline const unsigned char *
852+
parse_read_int_args(PyObject *const *args, size_t nargs, const char *func_name,
853+
Py_ssize_t num_bytes, int64_t *out_index) {
862854
if (unlikely(nargs != 2)) {
863855
PyErr_Format(PyExc_TypeError,
864-
"read_i16_le() takes exactly 2 arguments (%zu given)", nargs);
856+
"%s() takes exactly 2 arguments (%zu given)", func_name, nargs);
865857
return NULL;
866858
}
867859
PyObject *bytes_obj = args[0];
868860
if (unlikely(!PyBytes_Check(bytes_obj))) {
869-
PyErr_SetString(PyExc_TypeError, "read_i16_le() argument 1 must be bytes");
861+
PyErr_Format(PyExc_TypeError, "%s() argument 1 must be bytes", func_name);
870862
return NULL;
871863
}
872-
PyObject *index_obj = args[1];
873-
int64_t index = CPyLong_AsInt64(index_obj);
864+
int64_t index = CPyLong_AsInt64(args[1]);
874865
if (unlikely(index == CPY_LL_INT_ERROR && PyErr_Occurred())) {
875866
return NULL;
876867
}
@@ -879,15 +870,83 @@ read_i16_le(PyObject *module, PyObject *const *args, size_t nargs) {
879870
return NULL;
880871
}
881872
Py_ssize_t size = PyBytes_GET_SIZE(bytes_obj);
882-
if (unlikely(index > size - 2)) {
873+
if (unlikely(index > size - num_bytes)) {
883874
PyErr_Format(PyExc_IndexError,
884875
"index %lld out of range for bytes of length %zd",
885876
(long long)index, size);
886877
return NULL;
887878
}
888-
const unsigned char *data = (const unsigned char *)PyBytes_AS_STRING(bytes_obj);
889-
int16_t value = read_i16_le_unchecked(data + index);
890-
return PyLong_FromLong(value);
879+
*out_index = index;
880+
return (const unsigned char *)PyBytes_AS_STRING(bytes_obj);
881+
}
882+
883+
static PyObject*
884+
write_i16_le(PyObject *module, PyObject *const *args, size_t nargs) {
885+
BytesWriterObject *bw = parse_write_int_args(args, nargs, "write_i16_le");
886+
if (bw == NULL)
887+
return NULL;
888+
int16_t unboxed = CPyLong_AsInt16(args[1]);
889+
if (unlikely(unboxed == CPY_LL_INT_ERROR && PyErr_Occurred()))
890+
return NULL;
891+
if (unlikely(!ensure_bytes_writer_size(bw, 2)))
892+
return NULL;
893+
BytesWriter_WriteI16LEUnsafe(bw, unboxed);
894+
Py_RETURN_NONE;
895+
}
896+
897+
static PyObject*
898+
read_i16_le(PyObject *module, PyObject *const *args, size_t nargs) {
899+
int64_t index;
900+
const unsigned char *data = parse_read_int_args(args, nargs, "read_i16_le", 2, &index);
901+
if (data == NULL)
902+
return NULL;
903+
return PyLong_FromLong(CPyBytes_ReadI16LEUnsafe(data + index));
904+
}
905+
906+
static PyObject*
907+
write_i32_le(PyObject *module, PyObject *const *args, size_t nargs) {
908+
BytesWriterObject *bw = parse_write_int_args(args, nargs, "write_i32_le");
909+
if (bw == NULL)
910+
return NULL;
911+
int32_t unboxed = CPyLong_AsInt32(args[1]);
912+
if (unlikely(unboxed == CPY_LL_INT_ERROR && PyErr_Occurred()))
913+
return NULL;
914+
if (unlikely(!ensure_bytes_writer_size(bw, 4)))
915+
return NULL;
916+
BytesWriter_WriteI32LEUnsafe(bw, unboxed);
917+
Py_RETURN_NONE;
918+
}
919+
920+
static PyObject*
921+
read_i32_le(PyObject *module, PyObject *const *args, size_t nargs) {
922+
int64_t index;
923+
const unsigned char *data = parse_read_int_args(args, nargs, "read_i32_le", 4, &index);
924+
if (data == NULL)
925+
return NULL;
926+
return PyLong_FromLong(CPyBytes_ReadI32LEUnsafe(data + index));
927+
}
928+
929+
static PyObject*
930+
write_i64_le(PyObject *module, PyObject *const *args, size_t nargs) {
931+
BytesWriterObject *bw = parse_write_int_args(args, nargs, "write_i64_le");
932+
if (bw == NULL)
933+
return NULL;
934+
int64_t unboxed = CPyLong_AsInt64(args[1]);
935+
if (unlikely(unboxed == CPY_LL_INT_ERROR && PyErr_Occurred()))
936+
return NULL;
937+
if (unlikely(!ensure_bytes_writer_size(bw, 8)))
938+
return NULL;
939+
BytesWriter_WriteI64LEUnsafe(bw, unboxed);
940+
Py_RETURN_NONE;
941+
}
942+
943+
static PyObject*
944+
read_i64_le(PyObject *module, PyObject *const *args, size_t nargs) {
945+
int64_t index;
946+
const unsigned char *data = parse_read_int_args(args, nargs, "read_i64_le", 8, &index);
947+
if (data == NULL)
948+
return NULL;
949+
return PyLong_FromLongLong(CPyBytes_ReadI64LEUnsafe(data + index));
891950
}
892951

893952
#endif
@@ -900,6 +959,18 @@ static PyMethodDef librt_strings_module_methods[] = {
900959
{"read_i16_le", (PyCFunction) read_i16_le, METH_FASTCALL,
901960
PyDoc_STR("Read a 16-bit signed integer from bytes in little-endian format")
902961
},
962+
{"write_i32_le", (PyCFunction) write_i32_le, METH_FASTCALL,
963+
PyDoc_STR("Write a 32-bit signed integer to BytesWriter in little-endian format")
964+
},
965+
{"read_i32_le", (PyCFunction) read_i32_le, METH_FASTCALL,
966+
PyDoc_STR("Read a 32-bit signed integer from bytes in little-endian format")
967+
},
968+
{"write_i64_le", (PyCFunction) write_i64_le, METH_FASTCALL,
969+
PyDoc_STR("Write a 64-bit signed integer to BytesWriter in little-endian format")
970+
},
971+
{"read_i64_le", (PyCFunction) read_i64_le, METH_FASTCALL,
972+
PyDoc_STR("Read a 64-bit signed integer from bytes in little-endian format")
973+
},
903974
#endif
904975
{NULL, NULL, 0, NULL}
905976
};

0 commit comments

Comments
 (0)