|
| 1 | +"""Tests for vectorized numpy access paths.""" |
| 2 | + |
| 3 | +import numpy as np |
| 4 | +import pytest |
| 5 | + |
| 6 | +from flatdata.generator.engine import Engine |
| 7 | +from flatdata.lib.data_access import read_field_vectorized |
| 8 | +from common import ( |
| 9 | + DictResourceStorage, |
| 10 | + ARCHIVE_SIGNATURE_PAYLOAD, |
| 11 | + VECTOR_TEST_SCHEMA, |
| 12 | + RESOURCE_VECTOR_PAYLOAD, |
| 13 | +) |
| 14 | + |
| 15 | + |
| 16 | +def _make_vector_archive(): |
| 17 | + """Create a test archive with a vector of SignedStructs.""" |
| 18 | + module = Engine(VECTOR_TEST_SCHEMA).render_python_module() |
| 19 | + valid_data = { |
| 20 | + "Archive.archive": ARCHIVE_SIGNATURE_PAYLOAD, |
| 21 | + "Archive.archive.schema": module.backward_compatibility_Archive.schema().encode(), |
| 22 | + "resource": RESOURCE_VECTOR_PAYLOAD, |
| 23 | + "resource.schema": module.backward_compatibility_Archive.resource_schema('resource').encode() |
| 24 | + } |
| 25 | + archive = module.backward_compatibility_Archive(DictResourceStorage(valid_data)) |
| 26 | + return archive, module |
| 27 | + |
| 28 | + |
| 29 | +class TestReadFieldVectorized: |
| 30 | + """Tests for the read_field_vectorized function.""" |
| 31 | + |
| 32 | + def test_all_fields_match_element_access(self): |
| 33 | + archive, module = _make_vector_archive() |
| 34 | + vector = archive.resource |
| 35 | + raw_2d = vector._as_numpy_2d() |
| 36 | + |
| 37 | + from flatdata.lib.data_access import read_field_vectorized |
| 38 | + |
| 39 | + for name, field in vector._element_type._FIELDS.items(): |
| 40 | + values = read_field_vectorized( |
| 41 | + raw_2d, field.offset, field.width, field.is_signed |
| 42 | + ) |
| 43 | + for i in range(len(vector)): |
| 44 | + expected = getattr(vector[i], name) |
| 45 | + actual = int(values[i]) |
| 46 | + assert expected == actual, \ |
| 47 | + f"Mismatch in {name}[{i}]: expected={expected}, actual={actual}" |
| 48 | + |
| 49 | + def test_signed_fields_read_correctly(self): |
| 50 | + archive, module = _make_vector_archive() |
| 51 | + vector = archive.resource |
| 52 | + raw_2d = vector._as_numpy_2d() |
| 53 | + |
| 54 | + from flatdata.lib.data_access import read_field_vectorized |
| 55 | + |
| 56 | + # Field 'a' is i16:5 (signed, 5 bits), expected value: -1 |
| 57 | + field_a = vector._element_type._FIELDS['a'] |
| 58 | + values_a = read_field_vectorized(raw_2d, field_a.offset, field_a.width, field_a.is_signed) |
| 59 | + assert int(values_a[0]) == -1 |
| 60 | + assert int(values_a[1]) == -1 |
| 61 | + |
| 62 | + # Field 'c' is i32:7 (signed, 7 bits), expected value: -0x28 = -40 |
| 63 | + field_c = vector._element_type._FIELDS['c'] |
| 64 | + values_c = read_field_vectorized(raw_2d, field_c.offset, field_c.width, field_c.is_signed) |
| 65 | + assert int(values_c[0]) == -0x28 |
| 66 | + assert int(values_c[1]) == -0x28 |
| 67 | + |
| 68 | + |
| 69 | +class TestVectorToNumpy: |
| 70 | + """Tests for vectorized Vector.to_numpy().""" |
| 71 | + |
| 72 | + def test_to_numpy_matches_element_access(self): |
| 73 | + archive, module = _make_vector_archive() |
| 74 | + vector = archive.resource |
| 75 | + arr = vector.to_numpy() |
| 76 | + |
| 77 | + assert len(arr) == len(vector) |
| 78 | + for name in vector._element_type._FIELDS: |
| 79 | + for i in range(len(vector)): |
| 80 | + expected = getattr(vector[i], name) |
| 81 | + actual = int(arr[name][i]) |
| 82 | + assert expected == actual |
| 83 | + |
| 84 | + def test_to_numpy_dtype(self): |
| 85 | + archive, module = _make_vector_archive() |
| 86 | + vector = archive.resource |
| 87 | + arr = vector.to_numpy() |
| 88 | + assert arr.dtype == np.dtype(vector._element_type.dtype()) |
| 89 | + |
| 90 | + def test_to_data_frame(self): |
| 91 | + archive, module = _make_vector_archive() |
| 92 | + vector = archive.resource |
| 93 | + df = vector.to_data_frame() |
| 94 | + assert len(df) == len(vector) |
| 95 | + assert list(df.columns) == list(vector._element_type._FIELDS.keys()) |
| 96 | + |
| 97 | + |
| 98 | +class TestVectorSliceToNumpy: |
| 99 | + """Tests for vectorized _VectorSlice.to_numpy().""" |
| 100 | + |
| 101 | + def test_slice_to_numpy(self): |
| 102 | + archive, module = _make_vector_archive() |
| 103 | + vector = archive.resource |
| 104 | + s = vector[0:1] |
| 105 | + arr = s.to_numpy() |
| 106 | + |
| 107 | + assert len(arr) == 1 |
| 108 | + for name in vector._element_type._FIELDS: |
| 109 | + expected = getattr(vector[0], name) |
| 110 | + actual = int(arr[name][0]) |
| 111 | + assert expected == actual |
| 112 | + |
| 113 | + def test_slice_to_data_frame(self): |
| 114 | + archive, module = _make_vector_archive() |
| 115 | + vector = archive.resource |
| 116 | + df = vector[0:2].to_data_frame() |
| 117 | + assert len(df) == 2 |
| 118 | + |
| 119 | + |
| 120 | +class TestVectorColumnAccess: |
| 121 | + """Tests for vectorized Vector.__getattr__ column access.""" |
| 122 | + |
| 123 | + def test_column_access_returns_dataframe(self): |
| 124 | + archive, module = _make_vector_archive() |
| 125 | + vector = archive.resource |
| 126 | + df = vector.a |
| 127 | + assert len(df) == len(vector) |
| 128 | + assert 'a' in df.columns |
| 129 | + |
| 130 | + def test_column_values_match(self): |
| 131 | + archive, module = _make_vector_archive() |
| 132 | + vector = archive.resource |
| 133 | + df = vector.b |
| 134 | + for i in range(len(vector)): |
| 135 | + expected = getattr(vector[i], 'b') |
| 136 | + actual = int(df['b'].iloc[i]) |
| 137 | + assert expected == actual |
| 138 | + |
| 139 | + |
| 140 | +class TestNumpyCache: |
| 141 | + """Tests for the _as_numpy_2d() cache.""" |
| 142 | + |
| 143 | + def test_cache_returns_same_object(self): |
| 144 | + archive, module = _make_vector_archive() |
| 145 | + vector = archive.resource |
| 146 | + arr1 = vector._as_numpy_2d() |
| 147 | + arr2 = vector._as_numpy_2d() |
| 148 | + assert arr1 is arr2 |
| 149 | + |
| 150 | + def test_shape(self): |
| 151 | + archive, module = _make_vector_archive() |
| 152 | + vector = archive.resource |
| 153 | + arr = vector._as_numpy_2d() |
| 154 | + assert arr.shape == (len(vector), vector._element_type._SIZE_IN_BYTES) |
| 155 | + assert arr.dtype == np.uint8 |
| 156 | + |
| 157 | + |
| 158 | +class TestStructureSlots: |
| 159 | + """Tests that Structure uses __slots__.""" |
| 160 | + |
| 161 | + def test_has_slots(self): |
| 162 | + from flatdata.lib.structure import Structure |
| 163 | + assert hasattr(Structure, '__slots__') |
| 164 | + assert '_mem' in Structure.__slots__ |
| 165 | + assert '_pos' in Structure.__slots__ |
| 166 | + |
| 167 | + |
| 168 | +class TestReadFieldVectorizedEdgeCases: |
| 169 | + """Tests for boundary conditions in vectorized field reading.""" |
| 170 | + |
| 171 | + def test_1bit_unsigned(self): |
| 172 | + raw = np.array([[0x01], [0x00], [0x03]], dtype=np.uint8) |
| 173 | + result = read_field_vectorized(raw, 0, 1, False) |
| 174 | + assert list(result) == [1, 0, 1] |
| 175 | + |
| 176 | + def test_1bit_signed_matches_scalar(self): |
| 177 | + """1-bit signed fields should return 0 or 1, matching read_value behavior.""" |
| 178 | + from flatdata.lib.data_access import read_value |
| 179 | + raw = np.array([[0x01], [0x00]], dtype=np.uint8) |
| 180 | + result = read_field_vectorized(raw, 0, 1, True) |
| 181 | + assert int(result[0]) == read_value(b'\x01', 0, 1, True) |
| 182 | + assert int(result[1]) == read_value(b'\x00', 0, 1, True) |
| 183 | + |
| 184 | + def test_64bit_unsigned(self): |
| 185 | + raw = np.array([[0xFF] * 8], dtype=np.uint8) |
| 186 | + result = read_field_vectorized(raw, 0, 64, False) |
| 187 | + assert int(result[0]) == 0xFFFFFFFFFFFFFFFF |
| 188 | + |
| 189 | + def test_64bit_signed_negative(self): |
| 190 | + raw = np.array([[0xFF] * 8], dtype=np.uint8) |
| 191 | + result = read_field_vectorized(raw, 0, 64, True) |
| 192 | + assert int(result[0]) == -1 |
| 193 | + |
| 194 | + def test_64bit_signed_positive(self): |
| 195 | + raw = np.array([[0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]], dtype=np.uint8) |
| 196 | + result = read_field_vectorized(raw, 0, 64, True) |
| 197 | + assert int(result[0]) == 1 |
| 198 | + |
| 199 | + def test_63bit_signed(self): |
| 200 | + raw = np.array([[0xFF] * 8], dtype=np.uint8) |
| 201 | + result = read_field_vectorized(raw, 0, 63, True) |
| 202 | + assert int(result[0]) == -1 |
| 203 | + |
| 204 | + def test_unaligned_large_field(self): |
| 205 | + """Fields where offset%8 + width > 64 require extra byte merge.""" |
| 206 | + raw = np.array([[0xFF] * 9], dtype=np.uint8) |
| 207 | + # 64 bits starting at bit 1, all set → should be 0xFFFFFFFFFFFFFFFF |
| 208 | + actual = int(read_field_vectorized(raw, 1, 64, False)[0]) |
| 209 | + assert actual == 0xFFFFFFFFFFFFFFFF |
| 210 | + |
| 211 | + def test_empty_vector(self): |
| 212 | + raw = np.zeros((0, 8), dtype=np.uint8) |
| 213 | + result = read_field_vectorized(raw, 0, 32, False) |
| 214 | + assert len(result) == 0 |
| 215 | + |
| 216 | + |
| 217 | +class TestAttributeErrorContract: |
| 218 | + """Vector/slice __getattr__ must raise AttributeError for unknown fields.""" |
| 219 | + |
| 220 | + def test_vector_unknown_field_raises_attribute_error(self): |
| 221 | + archive, _ = _make_vector_archive() |
| 222 | + with pytest.raises(AttributeError): |
| 223 | + archive.resource.nonexistent_field |
| 224 | + |
| 225 | + def test_vector_hasattr_returns_false(self): |
| 226 | + archive, _ = _make_vector_archive() |
| 227 | + assert not hasattr(archive.resource, "nonexistent_field") |
| 228 | + |
| 229 | + def test_slice_unknown_field_raises_attribute_error(self): |
| 230 | + archive, _ = _make_vector_archive() |
| 231 | + with pytest.raises(AttributeError): |
| 232 | + archive.resource[0:1].nonexistent_field |
0 commit comments