@@ -24,6 +24,8 @@ def read_value(data, offset_bits, num_bits, is_signed):
2424
2525 if num_bits < 64 :
2626 result = result & ((1 << num_bits ) - 1 )
27+ elif offset_extra_bits > 0 :
28+ result = result & ((1 << num_bits ) - 1 )
2729
2830 if not is_signed :
2931 return result
@@ -75,21 +77,38 @@ def read_field_vectorized(raw_bytes_2d, field_offset_bits, field_width_bits, is_
7577 :param is_signed: whether to sign-extend the result
7678 :return: numpy array of field values
7779 """
80+ if field_width_bits == 1 :
81+ byte_idx = field_offset_bits // 8
82+ bit_idx = field_offset_bits % 8
83+ return ((raw_bytes_2d [:, byte_idx ].astype (np .uint64 ) >> np .uint64 (bit_idx )) &
84+ np .uint64 (1 ))
85+
7886 byte_start = field_offset_bits // 8
7987 bit_shift = field_offset_bits % 8
8088 bytes_needed = (bit_shift + field_width_bits + 7 ) // 8
8189
90+ # Use Python int arithmetic for the shift to avoid numpy overflow,
91+ # then broadcast back to the array.
8292 result = np .zeros (raw_bytes_2d .shape [0 ], dtype = np .uint64 )
83- for b in range (bytes_needed ):
93+ for b in range (min ( bytes_needed , 8 ) ):
8494 result |= raw_bytes_2d [:, byte_start + b ].astype (np .uint64 ) << np .uint64 (b * 8 )
8595 result >>= np .uint64 (bit_shift )
8696
97+ # If the field spans more than 8 bytes (unaligned 64-bit field), merge the extra byte.
98+ bits_so_far = 8 * min (bytes_needed , 8 ) - bit_shift
99+ if bits_so_far < field_width_bits and bytes_needed > 8 :
100+ extra = raw_bytes_2d [:, byte_start + 8 ].astype (np .uint64 )
101+ result |= extra << np .uint64 (bits_so_far )
102+
87103 if field_width_bits < 64 :
88104 result &= np .uint64 ((1 << field_width_bits ) - 1 )
89105
90106 if is_signed :
107+ if field_width_bits == 64 :
108+ return result .view (np .int64 )
91109 sign_bit = np .uint64 (1 << (field_width_bits - 1 ))
92- signed = result .astype (np .int64 ) - np .int64 (1 << field_width_bits )
110+ offset = - (1 << field_width_bits )
111+ signed = result .astype (np .int64 ) + np .int64 (offset )
93112 result = np .where (result & sign_bit , signed , result .astype (np .int64 ))
94113
95114 return result
0 commit comments