Skip to content

Commit a6f9c38

Browse files
committed
Code review changes
1 parent fec7fb6 commit a6f9c38

3 files changed

Lines changed: 48 additions & 5 deletions

File tree

flatdata-py/flatdata/lib/data_access.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ def read_value(data, offset_bits, num_bits, is_signed):
2424

2525
if num_bits < 64:
2626
result = result & ((1 << num_bits) - 1)
27+
elif offset_extra_bits > 0:
28+
result = result & ((1 << num_bits) - 1)
2729

2830
if not is_signed:
2931
return result
@@ -75,21 +77,38 @@ def read_field_vectorized(raw_bytes_2d, field_offset_bits, field_width_bits, is_
7577
:param is_signed: whether to sign-extend the result
7678
:return: numpy array of field values
7779
"""
80+
if field_width_bits == 1:
81+
byte_idx = field_offset_bits // 8
82+
bit_idx = field_offset_bits % 8
83+
return ((raw_bytes_2d[:, byte_idx].astype(np.uint64) >> np.uint64(bit_idx)) &
84+
np.uint64(1))
85+
7886
byte_start = field_offset_bits // 8
7987
bit_shift = field_offset_bits % 8
8088
bytes_needed = (bit_shift + field_width_bits + 7) // 8
8189

90+
# Use Python int arithmetic for the shift to avoid numpy overflow,
91+
# then broadcast back to the array.
8292
result = np.zeros(raw_bytes_2d.shape[0], dtype=np.uint64)
83-
for b in range(bytes_needed):
93+
for b in range(min(bytes_needed, 8)):
8494
result |= raw_bytes_2d[:, byte_start + b].astype(np.uint64) << np.uint64(b * 8)
8595
result >>= np.uint64(bit_shift)
8696

97+
# If the field spans more than 8 bytes (unaligned 64-bit field), merge the extra byte.
98+
bits_so_far = 8 * min(bytes_needed, 8) - bit_shift
99+
if bits_so_far < field_width_bits and bytes_needed > 8:
100+
extra = raw_bytes_2d[:, byte_start + 8].astype(np.uint64)
101+
result |= extra << np.uint64(bits_so_far)
102+
87103
if field_width_bits < 64:
88104
result &= np.uint64((1 << field_width_bits) - 1)
89105

90106
if is_signed:
107+
if field_width_bits == 64:
108+
return result.view(np.int64)
91109
sign_bit = np.uint64(1 << (field_width_bits - 1))
92-
signed = result.astype(np.int64) - np.int64(1 << field_width_bits)
110+
offset = -(1 << field_width_bits)
111+
signed = result.astype(np.int64) + np.int64(offset)
93112
result = np.where(result & sign_bit, signed, result.astype(np.int64))
94113

95114
return result

flatdata-py/flatdata/lib/resources.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,6 @@ def __init__(self, s, sequence):
7676

7777
def to_numpy(self, limit=None):
7878
raw_2d = self._sequence._as_numpy_2d()
79-
indices = self._slice.indices(len(self._sequence))
8079
sliced = raw_2d[self._slice]
8180
if limit is not None:
8281
sliced = sliced[:limit]
@@ -98,8 +97,11 @@ def __iter__(self):
9897
yield self._sequence[i]
9998

10099
def __getattr__(self, name):
100+
try:
101+
field = self._sequence._element_type._FIELDS[name]
102+
except KeyError:
103+
raise AttributeError("Field %s not found in structure" % name)
101104
raw_2d = self._sequence._as_numpy_2d()[self._slice]
102-
field = self._sequence._element_type._FIELDS[name]
103105
values = read_field_vectorized(raw_2d, field.offset, field.width, field.is_signed)
104106
return pd.DataFrame(data=values, columns=[name])
105107

@@ -148,8 +150,11 @@ def __iter__(self):
148150
yield element_type(mem, SIZE_OFFSET_IN_BYTES + size_bytes * i)
149151

150152
def __getattr__(self, name):
153+
try:
154+
field = self._element_type._FIELDS[name]
155+
except KeyError:
156+
raise AttributeError("Field %s not found in structure" % name)
151157
raw_2d = self._as_numpy_2d()
152-
field = self._element_type._FIELDS[name]
153158
values = read_field_vectorized(raw_2d, field.offset, field.width, field.is_signed)
154159
return pd.DataFrame(data=values, columns=[name])
155160

flatdata-py/tests/test_data_access.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1124,6 +1124,25 @@ def _test_reader(buffer, offset, num_bits, is_signed, expected):
11241124
_test_reader(b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", 3, 2, True, 0)
11251125

11261126

1127+
def test_reader_unaligned_64bit():
1128+
"""read_value must return at most 64 bits for a 64-bit field at a non-byte-aligned offset."""
1129+
data = b"\xff" * 9
1130+
# 64 bits starting at bit 1, all set → 0xFFFFFFFFFFFFFFFF
1131+
result = read_value(data, 1, 64, False)
1132+
assert result == 0xFFFFFFFFFFFFFFFF, \
1133+
f"Expected 0xFFFFFFFFFFFFFFFF, got {result:#x} ({result.bit_length()} bits)"
1134+
1135+
# Signed: all 1s → -1
1136+
result_signed = read_value(data, 1, 64, True)
1137+
assert result_signed == -1
1138+
1139+
# Other non-byte-aligned offsets
1140+
for offset in range(1, 8):
1141+
result = read_value(data, offset, 64, False)
1142+
assert result == 0xFFFFFFFFFFFFFFFF, \
1143+
f"offset={offset}: expected 0xFFFFFFFFFFFFFFFF, got {result:#x}"
1144+
1145+
11271146
def test_writer():
11281147
"""
11291148
Following tests were generated from C++ counterparts. Reasoning: python implementation lacks

0 commit comments

Comments
 (0)