Skip to content

Commit cbd8389

Browse files
committed
Tests
1 parent a6f9c38 commit cbd8389

1 file changed

Lines changed: 232 additions & 0 deletions

File tree

Lines changed: 232 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,232 @@
1+
"""Tests for vectorized numpy access paths."""
2+
3+
import numpy as np
4+
import pytest
5+
6+
from flatdata.generator.engine import Engine
7+
from flatdata.lib.data_access import read_field_vectorized
8+
from common import (
9+
DictResourceStorage,
10+
ARCHIVE_SIGNATURE_PAYLOAD,
11+
VECTOR_TEST_SCHEMA,
12+
RESOURCE_VECTOR_PAYLOAD,
13+
)
14+
15+
16+
def _make_vector_archive():
17+
"""Create a test archive with a vector of SignedStructs."""
18+
module = Engine(VECTOR_TEST_SCHEMA).render_python_module()
19+
valid_data = {
20+
"Archive.archive": ARCHIVE_SIGNATURE_PAYLOAD,
21+
"Archive.archive.schema": module.backward_compatibility_Archive.schema().encode(),
22+
"resource": RESOURCE_VECTOR_PAYLOAD,
23+
"resource.schema": module.backward_compatibility_Archive.resource_schema('resource').encode()
24+
}
25+
archive = module.backward_compatibility_Archive(DictResourceStorage(valid_data))
26+
return archive, module
27+
28+
29+
class TestReadFieldVectorized:
30+
"""Tests for the read_field_vectorized function."""
31+
32+
def test_all_fields_match_element_access(self):
33+
archive, module = _make_vector_archive()
34+
vector = archive.resource
35+
raw_2d = vector._as_numpy_2d()
36+
37+
from flatdata.lib.data_access import read_field_vectorized
38+
39+
for name, field in vector._element_type._FIELDS.items():
40+
values = read_field_vectorized(
41+
raw_2d, field.offset, field.width, field.is_signed
42+
)
43+
for i in range(len(vector)):
44+
expected = getattr(vector[i], name)
45+
actual = int(values[i])
46+
assert expected == actual, \
47+
f"Mismatch in {name}[{i}]: expected={expected}, actual={actual}"
48+
49+
def test_signed_fields_read_correctly(self):
50+
archive, module = _make_vector_archive()
51+
vector = archive.resource
52+
raw_2d = vector._as_numpy_2d()
53+
54+
from flatdata.lib.data_access import read_field_vectorized
55+
56+
# Field 'a' is i16:5 (signed, 5 bits), expected value: -1
57+
field_a = vector._element_type._FIELDS['a']
58+
values_a = read_field_vectorized(raw_2d, field_a.offset, field_a.width, field_a.is_signed)
59+
assert int(values_a[0]) == -1
60+
assert int(values_a[1]) == -1
61+
62+
# Field 'c' is i32:7 (signed, 7 bits), expected value: -0x28 = -40
63+
field_c = vector._element_type._FIELDS['c']
64+
values_c = read_field_vectorized(raw_2d, field_c.offset, field_c.width, field_c.is_signed)
65+
assert int(values_c[0]) == -0x28
66+
assert int(values_c[1]) == -0x28
67+
68+
69+
class TestVectorToNumpy:
70+
"""Tests for vectorized Vector.to_numpy()."""
71+
72+
def test_to_numpy_matches_element_access(self):
73+
archive, module = _make_vector_archive()
74+
vector = archive.resource
75+
arr = vector.to_numpy()
76+
77+
assert len(arr) == len(vector)
78+
for name in vector._element_type._FIELDS:
79+
for i in range(len(vector)):
80+
expected = getattr(vector[i], name)
81+
actual = int(arr[name][i])
82+
assert expected == actual
83+
84+
def test_to_numpy_dtype(self):
85+
archive, module = _make_vector_archive()
86+
vector = archive.resource
87+
arr = vector.to_numpy()
88+
assert arr.dtype == np.dtype(vector._element_type.dtype())
89+
90+
def test_to_data_frame(self):
91+
archive, module = _make_vector_archive()
92+
vector = archive.resource
93+
df = vector.to_data_frame()
94+
assert len(df) == len(vector)
95+
assert list(df.columns) == list(vector._element_type._FIELDS.keys())
96+
97+
98+
class TestVectorSliceToNumpy:
99+
"""Tests for vectorized _VectorSlice.to_numpy()."""
100+
101+
def test_slice_to_numpy(self):
102+
archive, module = _make_vector_archive()
103+
vector = archive.resource
104+
s = vector[0:1]
105+
arr = s.to_numpy()
106+
107+
assert len(arr) == 1
108+
for name in vector._element_type._FIELDS:
109+
expected = getattr(vector[0], name)
110+
actual = int(arr[name][0])
111+
assert expected == actual
112+
113+
def test_slice_to_data_frame(self):
114+
archive, module = _make_vector_archive()
115+
vector = archive.resource
116+
df = vector[0:2].to_data_frame()
117+
assert len(df) == 2
118+
119+
120+
class TestVectorColumnAccess:
121+
"""Tests for vectorized Vector.__getattr__ column access."""
122+
123+
def test_column_access_returns_dataframe(self):
124+
archive, module = _make_vector_archive()
125+
vector = archive.resource
126+
df = vector.a
127+
assert len(df) == len(vector)
128+
assert 'a' in df.columns
129+
130+
def test_column_values_match(self):
131+
archive, module = _make_vector_archive()
132+
vector = archive.resource
133+
df = vector.b
134+
for i in range(len(vector)):
135+
expected = getattr(vector[i], 'b')
136+
actual = int(df['b'].iloc[i])
137+
assert expected == actual
138+
139+
140+
class TestNumpyCache:
141+
"""Tests for the _as_numpy_2d() cache."""
142+
143+
def test_cache_returns_same_object(self):
144+
archive, module = _make_vector_archive()
145+
vector = archive.resource
146+
arr1 = vector._as_numpy_2d()
147+
arr2 = vector._as_numpy_2d()
148+
assert arr1 is arr2
149+
150+
def test_shape(self):
151+
archive, module = _make_vector_archive()
152+
vector = archive.resource
153+
arr = vector._as_numpy_2d()
154+
assert arr.shape == (len(vector), vector._element_type._SIZE_IN_BYTES)
155+
assert arr.dtype == np.uint8
156+
157+
158+
class TestStructureSlots:
159+
"""Tests that Structure uses __slots__."""
160+
161+
def test_has_slots(self):
162+
from flatdata.lib.structure import Structure
163+
assert hasattr(Structure, '__slots__')
164+
assert '_mem' in Structure.__slots__
165+
assert '_pos' in Structure.__slots__
166+
167+
168+
class TestReadFieldVectorizedEdgeCases:
169+
"""Tests for boundary conditions in vectorized field reading."""
170+
171+
def test_1bit_unsigned(self):
172+
raw = np.array([[0x01], [0x00], [0x03]], dtype=np.uint8)
173+
result = read_field_vectorized(raw, 0, 1, False)
174+
assert list(result) == [1, 0, 1]
175+
176+
def test_1bit_signed_matches_scalar(self):
177+
"""1-bit signed fields should return 0 or 1, matching read_value behavior."""
178+
from flatdata.lib.data_access import read_value
179+
raw = np.array([[0x01], [0x00]], dtype=np.uint8)
180+
result = read_field_vectorized(raw, 0, 1, True)
181+
assert int(result[0]) == read_value(b'\x01', 0, 1, True)
182+
assert int(result[1]) == read_value(b'\x00', 0, 1, True)
183+
184+
def test_64bit_unsigned(self):
185+
raw = np.array([[0xFF] * 8], dtype=np.uint8)
186+
result = read_field_vectorized(raw, 0, 64, False)
187+
assert int(result[0]) == 0xFFFFFFFFFFFFFFFF
188+
189+
def test_64bit_signed_negative(self):
190+
raw = np.array([[0xFF] * 8], dtype=np.uint8)
191+
result = read_field_vectorized(raw, 0, 64, True)
192+
assert int(result[0]) == -1
193+
194+
def test_64bit_signed_positive(self):
195+
raw = np.array([[0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]], dtype=np.uint8)
196+
result = read_field_vectorized(raw, 0, 64, True)
197+
assert int(result[0]) == 1
198+
199+
def test_63bit_signed(self):
200+
raw = np.array([[0xFF] * 8], dtype=np.uint8)
201+
result = read_field_vectorized(raw, 0, 63, True)
202+
assert int(result[0]) == -1
203+
204+
def test_unaligned_large_field(self):
205+
"""Fields where offset%8 + width > 64 require extra byte merge."""
206+
raw = np.array([[0xFF] * 9], dtype=np.uint8)
207+
# 64 bits starting at bit 1, all set → should be 0xFFFFFFFFFFFFFFFF
208+
actual = int(read_field_vectorized(raw, 1, 64, False)[0])
209+
assert actual == 0xFFFFFFFFFFFFFFFF
210+
211+
def test_empty_vector(self):
212+
raw = np.zeros((0, 8), dtype=np.uint8)
213+
result = read_field_vectorized(raw, 0, 32, False)
214+
assert len(result) == 0
215+
216+
217+
class TestAttributeErrorContract:
218+
"""Vector/slice __getattr__ must raise AttributeError for unknown fields."""
219+
220+
def test_vector_unknown_field_raises_attribute_error(self):
221+
archive, _ = _make_vector_archive()
222+
with pytest.raises(AttributeError):
223+
archive.resource.nonexistent_field
224+
225+
def test_vector_hasattr_returns_false(self):
226+
archive, _ = _make_vector_archive()
227+
assert not hasattr(archive.resource, "nonexistent_field")
228+
229+
def test_slice_unknown_field_raises_attribute_error(self):
230+
archive, _ = _make_vector_archive()
231+
with pytest.raises(AttributeError):
232+
archive.resource[0:1].nonexistent_field

0 commit comments

Comments
 (0)