@@ -62,8 +62,7 @@ cdef class DesDecimalType(Deserializer):
6262
6363 # Create a view of the remaining bytes (after the 4-byte scale)
6464 cdef Buffer varint_buf
65- varint_buf.ptr = buf.ptr + 4
66- varint_buf.size = buf.size - 4
65+ from_ptr_and_size(buf.ptr + 4 , buf.size - 4 , & varint_buf)
6766 unscaled = varint_unpack(& varint_buf)
6867
6968 return Decimal(' %d e%d ' % (unscaled, - scale))
@@ -183,6 +182,7 @@ cdef class DesVarcharType(DesUTF8Type):
183182 pass
184183
185184
185+
186186cdef class _DesParameterizedType(Deserializer):
187187
188188 cdef object subtypes
@@ -249,22 +249,40 @@ cdef inline int subelem(
249249 Read the next element from the buffer: first read the size (in bytes) of the
250250 element, then fill elem_buf with a newly sliced buffer of this size (and the
251251 right offset).
252+
253+ Protocol: n >= 0: n bytes follow
254+ n == -1: NULL value
255+ n == -2: not set value
256+ n < -2: invalid
252257 """
253258 cdef int32_t elemlen
254259
255260 _unpack_len(buf, offset[0 ], & elemlen)
256261 offset[0 ] += sizeof(int32_t)
257- # Direct pointer assignment instead of slice_buffer
258- elem_buf.ptr = buf.ptr + offset[0 ]
259- elem_buf.size = elemlen
260- offset[0 ] += elemlen
261- return 0
262+
263+ # Happy path: non-negative length element that fits in buffer
264+ if elemlen >= 0 :
265+ if offset[0 ] + elemlen <= buf.size:
266+ from_ptr_and_size(buf.ptr + offset[0 ], elemlen, elem_buf)
267+ offset[0 ] += elemlen
268+ return 0
269+ raise IndexError (" Element length %d at offset %d exceeds buffer size %d " % (elemlen, offset[0 ], buf.size))
270+ # NULL value (-1) or not set value (-2)
271+ elif elemlen == - 1 or elemlen == - 2 :
272+ from_ptr_and_size(NULL , elemlen, elem_buf)
273+ return 0
274+ # Invalid value (n < -2)
275+ else :
276+ raise ValueError (" Invalid element length %d at offset %d " % (elemlen, offset[0 ]))
262277
263278
264279cdef inline int _unpack_len(Buffer * buf, int offset, int32_t * output) except - 1 :
265- """ Read a big-endian int32 at the given offset using direct pointer access."""
266- cdef uint32_t * src = < uint32_t* > (buf.ptr + offset)
267- output[0 ] = < int32_t> ntohl(src[0 ])
280+ """ Read a big-endian int32 at the given offset using memcpy for alignment safety."""
281+ if offset + sizeof(int32_t) > buf.size:
282+ raise IndexError (" Cannot read length field: offset %d + 4 exceeds buffer size %d " % (offset, buf.size))
283+ cdef uint32_t temp
284+ memcpy(& temp, buf.ptr + offset, sizeof(uint32_t))
285+ output[0 ] = < int32_t> ntohl(temp)
268286 return 0
269287
270288# --------------------------------------------------------------------------
@@ -322,6 +340,7 @@ cdef class DesTupleType(_DesParameterizedType):
322340 cdef deserialize(self , Buffer * buf, int protocol_version):
323341 cdef Py_ssize_t i, p
324342 cdef int32_t itemlen
343+ cdef uint32_t _tuple_tmp
325344 cdef tuple res = tuple_new(self .subtypes_len)
326345 cdef Buffer item_buf
327346 cdef Deserializer deserializer
@@ -334,18 +353,25 @@ cdef class DesTupleType(_DesParameterizedType):
334353 values = []
335354 for i in range (self .subtypes_len):
336355 item = None
337- if p < buf.size:
338- # Read itemlen directly using ntohl instead of slice_buffer
339- itemlen = < int32_t> ntohl((< uint32_t* > (buf.ptr + p))[0 ])
356+ if p + 4 <= buf.size:
357+ # Read itemlen using memcpy for alignment safety
358+ memcpy(& _tuple_tmp, buf.ptr + p, 4 )
359+ itemlen = < int32_t> ntohl(_tuple_tmp)
340360 p += 4
341- if itemlen >= 0 :
342- # Direct pointer assignment instead of slice_buffer
343- item_buf.ptr = buf.ptr + p
344- item_buf.size = itemlen
361+
362+ if itemlen >= 0 and p + itemlen <= buf.size:
363+ from_ptr_and_size(buf.ptr + p, itemlen, & item_buf)
345364 p += itemlen
346365
347366 deserializer = self .deserializers[i]
348367 item = from_binary(deserializer, & item_buf, protocol_version)
368+ elif itemlen < 0 :
369+ # NULL value, item stays None
370+ pass
371+ else :
372+ raise IndexError (" Tuple item length %d at offset %d exceeds buffer size %d " % (itemlen, p, buf.size))
373+ elif p < buf.size:
374+ raise IndexError (" Cannot read tuple item length at offset %d : only %d bytes remain" % (p, buf.size - p))
349375
350376 tuple_set(res, i, item)
351377
@@ -387,19 +413,23 @@ cdef class DesCompositeType(_DesParameterizedType):
387413 break
388414
389415 element_length = unpack_num[uint16_t](buf)
390- # Direct pointer assignment instead of slice_buffer
391- elem_buf.ptr = buf.ptr + 2
392- elem_buf.size = element_length
393-
394- deserializer = self .deserializers[i]
395- item = from_binary(deserializer, & elem_buf, protocol_version)
396- tuple_set(res, i, item)
397416
398- # skip element length, element, and the EOC (one byte)
399- # Advance buffer in-place with direct assignment
400- start = 2 + element_length + 1
401- buf.ptr = buf.ptr + start
402- buf.size = buf.size - start
417+ # Validate that we have enough data for the element and EOC byte (happy path check)
418+ if 2 + element_length + 1 <= buf.size:
419+ from_ptr_and_size(buf.ptr + 2 , element_length, & elem_buf)
420+
421+ deserializer = self .deserializers[i]
422+ item = from_binary(deserializer, & elem_buf, protocol_version)
423+ tuple_set(res, i, item)
424+
425+ # skip element length, element, and the EOC (one byte)
426+ # Advance buffer in-place with direct assignment
427+ start = 2 + element_length + 1
428+ buf.ptr = buf.ptr + start
429+ buf.size = buf.size - start
430+ else :
431+ raise IndexError (" Composite element length %d requires %d bytes but only %d remain" %
432+ (element_length, 2 + element_length + 1 , buf.size))
403433
404434 return res
405435
0 commit comments