Skip to content

Commit 156fc31

Browse files
committed
Phase 2
Signed-off-by: Christian Vetter <christian.vetter@here.com>
1 parent 7a1f0fa commit 156fc31

13 files changed

Lines changed: 185 additions & 162 deletions

flatdata-py/flatdata/lib/archive.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
ResourceSignature = namedtuple("ResourceSignature",
1414
["container", "initializer", "schema", "is_optional", "doc"])
1515

16-
def _is_archive_signature(resource_signature):
17-
return resource_signature.container == Archive
16+
def _is_archive_signature(resource_signature: Any) -> bool:
17+
return bool(resource_signature.container == Archive)
1818

1919
_SCHEMA_EXT = ".schema"
2020

@@ -28,34 +28,34 @@ class Archive:
2828
_SCHEMA: str
2929
_RESOURCES: dict[str, Any]
3030

31-
def __init__(self, resource_storage):
31+
def __init__(self, resource_storage: Any) -> None:
3232
"""
3333
Opens archive from a given resource storage.
3434
:raises flatdata.errors.CorruptArchiveError
3535
:raises flatdata.errors.SchemaMismatchError
3636
:param resource_storage: Resource storage to use.
3737
"""
38-
self._resource_storage = resource_storage
39-
self._loaded_resources = {}
38+
self._resource_storage: Any = resource_storage
39+
self._loaded_resources: dict[str, Any] = {}
4040

4141
# Preload resources and check their schemas
4242
for name, _ in sorted(list(self._RESOURCES.items())):
4343
self.__getattr__(name)
4444

45-
def __getattr__(self, name):
45+
def __getattr__(self, name: str) -> Any:
4646
if name not in self._RESOURCES:
4747
raise AttributeError("Resource %s not defined in archive." % name)
4848
if name not in self._loaded_resources:
4949
self._loaded_resources[name] = self._open_resource(name)
5050
return self._loaded_resources[name]
5151

52-
def __dir__(self):
52+
def __dir__(self) -> list[str]:
5353
return list(self._RESOURCES.keys()) + ['schema']
5454

55-
def __repr__(self):
56-
return self.to_data_frame().__repr__()
55+
def __repr__(self) -> str:
56+
return repr(self.to_data_frame())
5757

58-
def to_data_frame(self):
58+
def to_data_frame(self) -> pd.DataFrame:
5959
result = []
6060
for name, signature in self._RESOURCES.items():
6161
resource = self.__getattr__(name)
@@ -66,34 +66,34 @@ def to_data_frame(self):
6666
columns=["Name", "Type", "Optional", "SizeInBytes", "Size"])
6767

6868
@classmethod
69-
def name(cls):
69+
def name(cls) -> str:
7070
return cls._NAME
7171

7272
@classmethod
73-
def schema(cls):
73+
def schema(cls) -> str:
7474
return cls._SCHEMA
7575

7676
@classmethod
77-
def resource_schema(cls, resource):
78-
return cls._RESOURCES[resource].schema
77+
def resource_schema(cls, resource: str) -> str:
78+
return str(cls._RESOURCES[resource].schema)
7979

8080
@classmethod
81-
def open(cls, storage, name, initializer, is_optional=False):
81+
def open(cls, storage: Any, name: str, initializer: Any, is_optional: bool = False) -> Any:
8282
nested_storage = storage.get(name, is_optional)
8383
assert nested_storage is not None or is_optional
8484
if nested_storage is None:
8585
return None
8686
return initializer(nested_storage)
8787

88-
def size_in_bytes(self):
88+
def size_in_bytes(self) -> int:
8989
return sum(resource_value.size_in_bytes() for resource_value in
9090
(self.__getattr__(resource) for resource in self._RESOURCES.keys())
9191
if resource_value)
9292

93-
def __len__(self):
93+
def __len__(self) -> int:
9494
return len(self._RESOURCES)
9595

96-
def _schema_validated_resource_signature(self, name):
96+
def _schema_validated_resource_signature(self, name: str) -> Any:
9797
resource_signature = self._RESOURCES[name]
9898
# We check only schema for non-subarchives, since the subarchives schema is checked,
9999
# when it is initialized.
@@ -107,7 +107,7 @@ def _schema_validated_resource_signature(self, name):
107107
return None
108108
return resource_signature
109109

110-
def _open_resource(self, name):
110+
def _open_resource(self, name: str) -> Any:
111111
resource_signature = self._schema_validated_resource_signature(name)
112112
if resource_signature:
113113
resource = resource_signature.container.open(storage=self._resource_storage,
@@ -120,7 +120,7 @@ def _open_resource(self, name):
120120
return None
121121

122122
@staticmethod
123-
def _check_non_subarchive_schema(name, resource_signature, storage):
123+
def _check_non_subarchive_schema(name: str, resource_signature: Any, storage: Any) -> None:
124124
actual_schema = bytes(storage).decode()
125125
if actual_schema != resource_signature.schema:
126126
raise SchemaMismatchError(

flatdata-py/flatdata/lib/archive_builder.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ class IndexWriter:
2424
IndexWriter class. Only applicable when multivector is present in archive schema.
2525
"""
2626

27-
def __init__(self, name, size, resource_storage):
27+
def __init__(self, name: str, size: int, resource_storage: Any) -> None:
2828
"""
2929
Create IndexWriter class.
3030
@@ -39,15 +39,15 @@ def __init__(self, name, size, resource_storage):
3939
self._index_size = size
4040
self._fout = resource_storage.get(f'{self._name}_index', False)
4141

42-
def add(self, index):
42+
def add(self, index: int) -> None:
4343
"""
4444
Convert index(number) to bytearray and add to in memory store
4545
"""
4646
index_bytes = int(index).to_bytes(self._index_size,
4747
byteorder="little", signed=False)
4848
self._fout.write(index_bytes)
4949

50-
def finish(self):
50+
def finish(self) -> None:
5151
"""
5252
Complete index resource by adding size and padding followed by writing to file
5353
"""
@@ -65,7 +65,7 @@ class ArchiveBuilder:
6565
_SCHEMA: str
6666
_RESOURCES: dict[str, Any]
6767

68-
def __init__(self, resource_storage, path=""):
68+
def __init__(self, resource_storage: Any, path: str = "") -> None:
6969
"""
7070
Opens archive from a given resource writer.
7171
:param resource_storage: storage manager to store and write to disc
@@ -78,16 +78,16 @@ def __init__(self, resource_storage, path=""):
7878
self._resources_written = [f"{self._NAME}.archive"]
7979

8080
@classmethod
81-
def name(cls):
81+
def name(cls) -> str:
8282
'''Returns archive name'''
8383
return cls._NAME
8484

8585
@classmethod
86-
def schema(cls):
86+
def schema(cls) -> str:
8787
'''Returns archive schema'''
8888
return cls._SCHEMA
8989

90-
def _write_raw_data(self, name, data):
90+
def _write_raw_data(self, name: str, data: bytes | bytearray) -> None:
9191
'''
9292
Helper function to write data
9393
@@ -98,7 +98,7 @@ def _write_raw_data(self, name, data):
9898
storage.write(data)
9999
storage.close()
100100

101-
def _write_schema(self, name):
101+
def _write_schema(self, name: str) -> None:
102102
'''
103103
Writes resource schema
104104
@@ -107,20 +107,20 @@ def _write_schema(self, name):
107107
self._write_raw_data(f"{name}.schema", bytes(
108108
self._RESOURCES[name].schema, 'utf-8'))
109109

110-
def _write_archive_signature(self):
110+
def _write_archive_signature(self) -> None:
111111
'''Writes archive's signature'''
112112
self._write_raw_data(f"{self._NAME}.archive", b'\x00' * 16)
113113

114-
def _write_archive_schema(self):
114+
def _write_archive_schema(self) -> None:
115115
'''Writes archive schema'''
116116
self._write_raw_data(
117117
f"{self._NAME}.archive.schema", bytes(self._SCHEMA, 'utf-8'))
118118

119-
def _write_index_schema(self, resource_name, schema):
119+
def _write_index_schema(self, resource_name: str, schema: str) -> None:
120120
self._write_raw_data(
121121
f"{resource_name}_index.schema", bytes(schema, 'utf-8'))
122122

123-
def subarchive(self, name):
123+
def subarchive(self, name: str) -> None:
124124
"""
125125
Returns an archive builder for the sub-archive `name`.
126126
:raises $name_not_subarchive_error
@@ -129,7 +129,7 @@ def subarchive(self, name):
129129
NotImplemented
130130

131131
@classmethod
132-
def __validate_structure_fields(cls, name, struct, initializer):
132+
def __validate_structure_fields(cls, name: str, struct: dict[str, Any], initializer: Any) -> None:
133133
'''
134134
Validates whether passed object has all required fields
135135
@@ -146,7 +146,7 @@ def __validate_structure_fields(cls, name, struct, initializer):
146146
if key not in initializer._FIELD_KEYS:
147147
raise UnknownFieldError(key, name)
148148

149-
def __set_instance(self, storage, name, value):
149+
def __set_instance(self, storage: Any, name: str, value: dict[str, Any]) -> None:
150150
'''
151151
Creates and writes instance type resource
152152
@@ -164,7 +164,7 @@ def __set_instance(self, storage, name, value):
164164

165165
storage.write(bout)
166166

167-
def __set_vector(self, storage, name, vector):
167+
def __set_vector(self, storage: Any, name: str, vector: list[dict[str, Any]]) -> None:
168168
'''
169169
Creates and writes vector resource
170170
@@ -183,7 +183,7 @@ def __set_vector(self, storage, name, vector):
183183
field.is_signed, value[key])
184184
storage.write(bout)
185185

186-
def __set_multivector(self, storage, name, value):
186+
def __set_multivector(self, storage: Any, name: str, value: list[list[dict[str, Any]]]) -> None:
187187
'''
188188
Creates and writes multivector resource
189189
@@ -252,7 +252,7 @@ def validate_fields(_obj):
252252
self._resources_written.append(name)
253253
self._resources_written.append(f'{name}_index')
254254

255-
def set(self, name, value):
255+
def set(self, name: str, value: Any) -> None:
256256
"""
257257
Write a resource for this archive at once.
258258
Can only be done once. `set` and `start` can't be used for the same resource.
@@ -288,7 +288,7 @@ def set(self, name, value):
288288

289289
self._resources_written.append(name)
290290

291-
def finish(self):
291+
def finish(self) -> None:
292292
"""
293293
Closes the storage manager
294294
"""

flatdata-py/flatdata/lib/data_access.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,17 @@
33
See the LICENSE file in the root of this project for license details.
44
'''
55

6+
from collections.abc import Callable
7+
from typing import Any
8+
69
import numpy as np
10+
from numpy.typing import NDArray
711

812
# Sign bits cache for the value reading.
913
_SIGN_BITS = [0] + [(1 << (bits - 1)) for bits in range(1, 65)]
1014

1115

12-
def make_field_reader(offset_bits, num_bits, is_signed):
16+
def make_field_reader(offset_bits: int, num_bits: int, is_signed: bool) -> Callable[[Any, int], int]:
1317
"""Build a specialized closure for reading a single field from a structure.
1418
1519
Returns a function reader(data, pos_bytes) that reads the field value
@@ -76,7 +80,7 @@ def reader(data, pos):
7680
return reader
7781

7882

79-
def read_field_vectorized(raw_bytes_2d, field_offset_bits, field_width_bits, is_signed):
83+
def read_field_vectorized(raw_bytes_2d: NDArray[np.uint8], field_offset_bits: int, field_width_bits: int, is_signed: bool) -> NDArray[Any]:
8084
"""Read a bit-packed field from all elements at once, returning a numpy array.
8185
8286
:param raw_bytes_2d: numpy uint8 array shaped (num_elements, struct_size_bytes)
@@ -122,7 +126,7 @@ def read_field_vectorized(raw_bytes_2d, field_offset_bits, field_width_bits, is_
122126
return result
123127

124128

125-
def read_value(data, offset_bits, num_bits, is_signed):
129+
def read_value(data: Any, offset_bits: int, num_bits: int, is_signed: bool) -> int:
126130
"""Read a bit-packed value from data at the given bit offset.
127131
128132
This is a convenience wrapper around :func:`make_field_reader` for one-off
@@ -133,7 +137,7 @@ def read_value(data, offset_bits, num_bits, is_signed):
133137
return reader(data, 0)
134138

135139

136-
def write_value(data, offset_bits, num_bits, is_signed, value):
140+
def write_value(data: bytearray, offset_bits: int, num_bits: int, is_signed: bool, value: int) -> None:
137141
assert num_bits <= 64, f'Number of bits to write is greater than 64'
138142

139143
offset_bytes, offset_extra_bits = divmod(offset_bits, 8)

0 commit comments

Comments
 (0)