From 81958602397cee39923731f21dc74fabda8545dd Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Tue, 24 Jun 2025 10:57:50 +0200 Subject: [PATCH 01/41] Type stubs for shared.py and abc.py --- abc.py: + removed unused '_ContextPathStr' + Removed unnecessary 'abstractmethod' definitions + '_Action' was renamed to '_ActionLike' + removed '_EnumLike' + '_Switch' was renamed to '_SwitchLike' + Added two new protocols: '_SupportsBits' and '_ContainsBits' + The following constants and methods were moved into shared.py: STRUCT_FIELD -> ATTR_STRUCT, hasstruct, getstruct and typeof --- src/caterpillar/abc.py | 78 +++++------------------------------- src/caterpillar/abc.pyi | 82 ++++++++++++++++++++++++++++++++++++++ src/caterpillar/shared.py | 39 ++++++++++++++++++ src/caterpillar/shared.pyi | 27 +++++++++++++ 4 files changed, 159 insertions(+), 67 deletions(-) mode change 100644 => 100755 src/caterpillar/abc.py create mode 100755 src/caterpillar/abc.pyi create mode 100755 src/caterpillar/shared.pyi diff --git a/src/caterpillar/abc.py b/src/caterpillar/abc.py old mode 100644 new mode 100755 index 00bedbbb..f0a450ed --- a/src/caterpillar/abc.py +++ b/src/caterpillar/abc.py @@ -15,7 +15,7 @@ from abc import abstractmethod from io import IOBase from typing import Protocol, runtime_checkable -from typing import Dict, Any, Optional, Union, Callable +from typing import Any, Optional, Union, Callable #: Type alias for IOBase to indicate a stream type _StreamType = IOBase @@ -23,7 +23,6 @@ _GreedyType = type(...) _PrefixedType = slice -_ContextPathStr = str @runtime_checkable @@ -34,25 +33,20 @@ class _ContextLike(Protocol): It allows direct attribute access and modification. """ - @abstractmethod def __context_getattr__(self, path: str) -> Any: pass # TODO - @abstractmethod def __context_setattr__(self, path: str, value: Any) -> None: pass @property - @abstractmethod def _root(self) -> Optional["_ContextLike"]: pass - @abstractmethod def __getitem__(self, key: str) -> Any: pass - @abstractmethod def __setitem__(self, key: str, value: Any) -> None: pass @@ -60,16 +54,16 @@ def __setitem__(self, key: str, value: Any) -> None: @runtime_checkable class _ContextLambda(Protocol): """ - An abstract base class for a lambda function that takes a _ContextLike object as an argument. + An abstract base class for a lambda function that takes a _ContextLike + object as an argument. """ - @abstractmethod def __call__(self, context: _ContextLike) -> Any: pass @runtime_checkable -class _Action(Protocol): +class _ActionLike(Protocol): @abstractmethod def __action_pack__(self, context: _ContextLike) -> None: pass @@ -85,7 +79,6 @@ class _SupportsPack(Protocol): An abstract base class for objects that support packing data into a binary stream. """ - @abstractmethod def __pack__(self, obj: Any, context: _ContextLike) -> None: pass @@ -96,7 +89,6 @@ class _SupportsUnpack(Protocol): An abstract base class for objects that support unpacking data from a binary stream. """ - @abstractmethod def __unpack__(self, context: _ContextLike) -> Any: pass @@ -107,9 +99,8 @@ class _SupportsSize(Protocol): An abstract base class for objects that support determining the size of packed data. """ - @abstractmethod def __size__(self, context: _ContextLike) -> int: - pass + return 0 @runtime_checkable @@ -118,15 +109,12 @@ class _StructLike(Protocol): An abstract base class for struct-like objects that can be packed, unpacked, and have a size. """ - @abstractmethod def __size__(self, context: _ContextLike) -> int: - pass + return 0 - @abstractmethod def __unpack__(self, context: _ContextLike) -> Any: pass - @abstractmethod def __pack__(self, obj: Any, context: _ContextLike) -> None: pass @@ -142,19 +130,8 @@ class _ContainsStruct(Protocol): __struct__: _StructLike -class _EnumLike(Protocol): - """ - An abstract base class for enum-like objects with a value, name, and mappings. - """ - - value: Any - name: str - _member_map_: Dict[str, "_EnumLike"] - _value2member_map_: Dict[Any, "_EnumLike"] - - @runtime_checkable -class _Switch(Protocol): +class _SwitchLike(Protocol): """ An abstract base class for a switch-like object that generates a _StructLike based on a value and context. @@ -165,42 +142,9 @@ def __call__(self, value: Any, context: _ContextLike, **kwds) -> _StructLike: pass -# TODO: place this somewhere else -STRUCT_FIELD = "__struct__" - - -def hasstruct(obj: Any) -> bool: - """ - Check if the given object has a structure attribute. - - :param obj: The object to check. - :return: True if the object has a structure attribute, else False. - """ - cls_dict = getattr(obj.__class__ if not isinstance(obj, type) else obj, "__dict__") - return STRUCT_FIELD in cls_dict - - -def getstruct(obj: Any, /, __default: Any = None) -> _StructLike | None: - """ - Get the structure attribute of the given object. - - :param obj: The object to get the structure attribute from. - :return: The structure attribute of the object. - """ - obj = obj.__class__ if not isinstance(obj, type) else obj - cls_dict = getattr(obj, "__dict__", None) - if cls_dict is None: - return getattr(obj, "__struct__", None) - - return cls_dict.get(STRUCT_FIELD, __default) - +class _SupportsBits(Protocol): + def __bits__(self) -> int: ... -def typeof(struct: Union[_StructLike, _ContainsStruct]) -> object: - if hasstruct(struct): - struct = getstruct(struct) - __type__ = getattr(struct, "__type__", None) - if not __type__: - return Any - # this function must return a type - return __type__() or Any +class _ContainsBits(Protocol): + __bits__: int diff --git a/src/caterpillar/abc.pyi b/src/caterpillar/abc.pyi new file mode 100755 index 00000000..e79c9991 --- /dev/null +++ b/src/caterpillar/abc.pyi @@ -0,0 +1,82 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from io import IOBase +from types import EllipsisType +from typing import Any, Callable, Optional, Protocol, TypeVar, Union + +_IT = TypeVar("_IT") +_IT_co = TypeVar("_IT_co") +_IT_contra = TypeVar("_IT_contra", contravariant=True) +_OT = TypeVar("_OT") +_OT_co = TypeVar("_OT_co", covariant=True) + +_StreamType = IOBase +_StreamFactory = Callable[[], _StreamType] + +_GreedyType = EllipsisType +_PrefixedType = slice # [_StructLike[int, int], NoneType, NoneType] + +_LengthT = Union[int, _PrefixedType, _GreedyType, _ContextLambda] + +class _ContextLike(Protocol): + @property + def _root(self) -> Optional[_ContextLike]: ... + def __context_getattr__(self, path: str) -> Any: ... + def __context_setattr__(self, path: str, value: Any) -> None: ... + def __getitem__(self, key, /) -> Any: ... + def __setitem__(self, key, value: Any, /) -> None: ... + +_ContextLambdaReturnT_co = TypeVar( + "_ContextLambdaReturnT_co", covariant=True, default=Any +) + +class _ContextLambda(Protocol[_ContextLambdaReturnT_co]): + def __call__(self, context: _ContextLike) -> _ContextLambdaReturnT_co: ... + +class _StructLike(Protocol[_IT_contra, _OT_co]): + def __size__(self, context: _ContextLike) -> int: ... + def __unpack__(self, context: _ContextLike) -> _OT_co: ... + def __pack__(self, obj: _IT_contra, context: _ContextLike) -> None: ... + def __type__(self) -> Optional[Union[type, str]]: ... + +_StructT = Union[_ContainsStruct[_IT, _OT], _StructLike[_IT, _OT], _ContextLambda] + +class _ContainsStruct(Protocol[_IT_contra, _OT]): + __struct__: _StructLike[_IT_contra, _OT] + +class _ActionLike(Protocol): + def __action_pack__(self, context: _ContextLike) -> None: ... + def __action_unpack__(self, context: _ContextLike) -> None: ... + +class _SupportsPack(Protocol[_IT_contra]): + def __pack__(self, obj: _IT_contra, context: _ContextLike) -> None: ... + +class _SupportsSize(Protocol): + def __size__(self, context: _ContextLike) -> int: ... + +class _SupportsUnpack(Protocol[_OT_co]): + def __unpack__(self, context: _ContextLike) -> _OT_co: ... + +_SwitchLambda = Callable[[Any, _ContextLike], _StructLike[_IT, _OT]] +_Switch = Union[ + dict[Any, Union[_ContainsStruct[_IT, _OT], _StructLike[_IT, _OT]]], + _SwitchLambda[_IT, _OT], +] + +class _SupportsBits(Protocol): + def __bits__(self) -> int: ... + +class _ContainsBits(Protocol): + __bits__: int diff --git a/src/caterpillar/shared.py b/src/caterpillar/shared.py index 0dcb3b17..5fb1e98c 100644 --- a/src/caterpillar/shared.py +++ b/src/caterpillar/shared.py @@ -31,6 +31,8 @@ #: must be conforming to the _StructLike protocol. ATTR_STRUCT = "__struct__" +ATTR_TYPE = "__type__" + # TODO: add to reference # NEW CONCEPT: Actions # An annotation that is equipped with an action attribute indicates that @@ -147,3 +149,40 @@ def is_action(obj: Any) -> bool: return any( getattr(obj, attr, None) for attr in (ATTR_ACTION_PACK, ATTR_ACTION_UNPACK) ) + + +def hasstruct(obj) -> bool: + """ + Check if the given object has a structure attribute. + + :param obj: The object to check. + :return: True if the object has a structure attribute, else False. + """ + cls_dict = getattr(obj.__class__ if not isinstance(obj, type) else obj, "__dict__") + return ATTR_STRUCT in cls_dict + + +def getstruct(obj, /, __default=None): + """ + Get the structure attribute of the given object. + + :param obj: The object to get the structure attribute from. + :return: The structure attribute of the object. + """ + obj = obj.__class__ if not isinstance(obj, type) else obj + cls_dict = getattr(obj, "__dict__", None) + if cls_dict is None: + return getattr(obj, ATTR_STRUCT, None) + + return cls_dict.get(ATTR_STRUCT, __default) + + +def typeof(struct): + if hasstruct(struct): + struct = getstruct(struct) + + __type__ = getattr(struct, ATTR_TYPE, None) + if not __type__: + return Any + # this function must return a type + return __type__() or Any diff --git a/src/caterpillar/shared.pyi b/src/caterpillar/shared.pyi new file mode 100755 index 00000000..7fabcbfd --- /dev/null +++ b/src/caterpillar/shared.pyi @@ -0,0 +1,27 @@ +from caterpillar.abc import _ContextLambda, _StructLike, _ContainsStruct +from typing import Any, Optional, Union + +MODE_PACK: int = ... +MODE_UNPACK: int = ... +ATTR_STRUCT: str = ... +ATTR_TYPE: str = ... +ATTR_ACTION_PACK: str = ... +ATTR_ACTION_UNPACK: str = ... + +class Action: + __action_pack__: _ContextLambda + __action_unpack__: _ContextLambda + def __init__( + self, + pack: _ContextLambda | None = None, + unpack: _ContextLambda | None = None, + both: _ContextLambda | None = None, + ) -> None: ... + @staticmethod + def is_action(obj: Any) -> bool: ... + +def hasstruct(obj: Any) -> bool: ... +def getstruct( + obj: Any, /, __default: Optional[_StructLike] = None +) -> Optional[_StructLike]: ... +def typeof(struct: Union[_StructLike, _ContainsStruct]) -> type: ... From 0a49a4ebf52b2a98cdd5d45c7f45a4577d318e9a Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Tue, 24 Jun 2025 11:01:26 +0200 Subject: [PATCH 02/41] Added type stubs for _common.py --- _common.py: + Removed any existing typing information + unpack_seq and pack_seq now return or use a collection instead of a list --- src/caterpillar/_common.py | 12 ++++++------ src/caterpillar/_common.pyi | 30 ++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 6 deletions(-) mode change 100644 => 100755 src/caterpillar/_common.py create mode 100755 src/caterpillar/_common.pyi diff --git a/src/caterpillar/_common.py b/src/caterpillar/_common.py old mode 100644 new mode 100755 index dafb1ae4..3ad1289b --- a/src/caterpillar/_common.py +++ b/src/caterpillar/_common.py @@ -14,9 +14,9 @@ # along with this program. If not, see . import itertools -from typing import Collection, List, Any +from typing import Collection -from caterpillar.abc import _ContextLike, _StreamType, _PrefixedType +from caterpillar.abc import _PrefixedType from caterpillar.context import ( Context, CTX_PATH, @@ -31,7 +31,7 @@ class WithoutContextVar: - def __init__(self, context: _ContextLike, name, value) -> None: + def __init__(self, context, name, value) -> None: self.context = context self.old_value = context[name] self.value = value @@ -48,7 +48,7 @@ def __exit__(self, exc_type, exc_value, traceback) -> None: self.context[CTX_FIELD] = self.field -def unpack_seq(context: _ContextLike, unpack_one) -> List[Any]: +def unpack_seq(context, unpack_one) -> Collection: """Generic function to unpack sequenced elements. :param stream: the input stream @@ -113,7 +113,7 @@ def unpack_seq(context: _ContextLike, unpack_one) -> List[Any]: return values -def pack_seq(seq: Collection[Any], context: _ContextLike, pack_one) -> None: +def pack_seq(seq, context, pack_one) -> None: """Generic function to pack sequenced elements. :param seq: the iterable of elements @@ -164,7 +164,7 @@ def pack_seq(seq: Collection[Any], context: _ContextLike, pack_one) -> None: raise StructException(str(exc), seq_context) from exc -def iseof(stream: _StreamType) -> bool: +def iseof(stream) -> bool: """ Check if the stream is at the end of the file. diff --git a/src/caterpillar/_common.pyi b/src/caterpillar/_common.pyi new file mode 100755 index 00000000..8de9c887 --- /dev/null +++ b/src/caterpillar/_common.pyi @@ -0,0 +1,30 @@ +import types + +from typing import Any, Callable, Collection +from caterpillar.abc import _ContextLike, _OT, _IT, _StreamType +from caterpillar.fields._base import Field + +class WithoutContextVar: + context: _ContextLike + old_value: Any + value: Any + name: str + field: Field + def __init__(self, context: _ContextLike, name: str, value: Any) -> None: ... + def __enter__(self) -> None: ... + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: types.TracebackType | None, + ) -> None: ... + +def unpack_seq( + context: _ContextLike, unpack_one: Callable[[_ContextLike], _OT] +) -> Collection[_OT]: ... +def pack_seq( + seq: Collection[_IT], + context: _ContextLike, + pack_one: Callable[[_IT, _ContextLike], None], +) -> None: ... +def iseof(stream: _StreamType) -> bool: ... From 6b77117e75e661e04cb4e4fbff006a6c767447db Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Tue, 24 Jun 2025 11:06:26 +0200 Subject: [PATCH 03/41] Added type stubs for byteorder.py --- byteorder.py: + moved 'BYTEORDER_FIELD' to shared.py as ATTR_BYTEORDER --- src/caterpillar/_common.pyi | 14 +++++++ src/caterpillar/byteorder.py | 9 ++--- src/caterpillar/byteorder.pyi | 76 +++++++++++++++++++++++++++++++++++ src/caterpillar/shared.py | 2 +- src/caterpillar/shared.pyi | 15 +++++++ 5 files changed, 110 insertions(+), 6 deletions(-) mode change 100644 => 100755 src/caterpillar/byteorder.py create mode 100755 src/caterpillar/byteorder.pyi diff --git a/src/caterpillar/_common.pyi b/src/caterpillar/_common.pyi index 8de9c887..2d6e5df1 100755 --- a/src/caterpillar/_common.pyi +++ b/src/caterpillar/_common.pyi @@ -1,3 +1,17 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . import types from typing import Any, Callable, Collection diff --git a/src/caterpillar/byteorder.py b/src/caterpillar/byteorder.py old mode 100644 new mode 100755 index 9c44030d..04982adf --- a/src/caterpillar/byteorder.py +++ b/src/caterpillar/byteorder.py @@ -17,8 +17,7 @@ from dataclasses import dataclass from enum import Enum -# constant to identify the byteorder of an object -BYTEORDER_FIELD = "__byteorder__" +from caterpillar.shared import ATTR_BYTEORDER @dataclass(frozen=True) @@ -56,7 +55,7 @@ def apply(self, other): :param other: The object to which the byte order information should be applied. """ - setattr(other, BYTEORDER_FIELD, self) + setattr(other, ATTR_BYTEORDER, self) def __add__(self, other): """ @@ -89,14 +88,14 @@ def __or__(self, other): ) -def byteorder(obj, default: ByteOrder = None) -> ByteOrder: +def byteorder(obj, default=None) -> ByteOrder: """ Get the byte order of an object, defaulting to SysNative if not explicitly set. :param obj: The object to retrieve the byte order from. :return: The byte order of the object. """ - return getattr(obj, BYTEORDER_FIELD, default or SysNative) + return getattr(obj, ATTR_BYTEORDER, default or SysNative) @dataclass(frozen=True) diff --git a/src/caterpillar/byteorder.pyi b/src/caterpillar/byteorder.pyi new file mode 100755 index 00000000..8851d09e --- /dev/null +++ b/src/caterpillar/byteorder.pyi @@ -0,0 +1,76 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from enum import Enum +from typing import Optional + +BYTEORDER_FIELD: str = ... + +class ByteOrder: + name: str + ch: str + + class Alignment(Enum): + NONE = 0 + NATIVE = 1 + + alignment: Alignment + + class Size(Enum): + STANDARD = 0 + NATIVE = 1 + + size: Size + + def apply(self, other: ByteOrder) -> None: ... + def __add__(self, other: ByteOrder): ... + def __or__(self, other: ByteOrder): ... + def __init__( + self, + name: str, + ch: str, + alignment: Optional[Alignment] = ..., + size: Optional[Size] = ..., + ) -> None: ... + +Native: ByteOrder = ... +BigEndian: ByteOrder = ... +LittleEndian: ByteOrder = ... +NetEndian: ByteOrder = ... +SysNative: ByteOrder = ... + +def byteorder(obj, default: Optional[ByteOrder] = None) -> ByteOrder: ... + +class Arch: + name: str + ptr_size: int + + def __init__(self, name:str, ptr_size: int) -> None: ... + +system_arch: Arch = ... +x86: Arch = ... +x86_64: Arch = ... +ARM: Arch = ... +ARM64: Arch = ... +AARCH64: Arch = ... +PowerPC: Arch = ... +PowerPC64: Arch = ... +MIPS: Arch = ... +MIPS64: Arch = ... +SPARC: Arch = ... +SPARC64: Arch = ... +RISC_V64: Arch = ... +RISC_V: Arch = ... +AMD: Arch = ... +AMD64: Arch = ... \ No newline at end of file diff --git a/src/caterpillar/shared.py b/src/caterpillar/shared.py index 5fb1e98c..32fa5ff2 100644 --- a/src/caterpillar/shared.py +++ b/src/caterpillar/shared.py @@ -30,7 +30,7 @@ #: struct, bitfield, or sequence definition. The type of the stored value #: must be conforming to the _StructLike protocol. ATTR_STRUCT = "__struct__" - +ATTR_BYTEORDER = "__byteorder__" ATTR_TYPE = "__type__" # TODO: add to reference diff --git a/src/caterpillar/shared.pyi b/src/caterpillar/shared.pyi index 7fabcbfd..03ffbdc2 100755 --- a/src/caterpillar/shared.pyi +++ b/src/caterpillar/shared.pyi @@ -1,3 +1,17 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . from caterpillar.abc import _ContextLambda, _StructLike, _ContainsStruct from typing import Any, Optional, Union @@ -5,6 +19,7 @@ MODE_PACK: int = ... MODE_UNPACK: int = ... ATTR_STRUCT: str = ... ATTR_TYPE: str = ... +ATTR_BYTEORDER: str = ... ATTR_ACTION_PACK: str = ... ATTR_ACTION_UNPACK: str = ... From 383af662b016e24217ab063763b2f14152822f06 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Tue, 24 Jun 2025 11:10:20 +0200 Subject: [PATCH 04/41] Created type stubs for context.py --- src/caterpillar/context.py | 23 +++--- src/caterpillar/context.pyi | 135 ++++++++++++++++++++++++++++++++++++ 2 files changed, 146 insertions(+), 12 deletions(-) mode change 100644 => 100755 src/caterpillar/context.py create mode 100755 src/caterpillar/context.pyi diff --git a/src/caterpillar/context.py b/src/caterpillar/context.py old mode 100644 new mode 100755 index 4bc08466..ba15a74b --- a/src/caterpillar/context.py +++ b/src/caterpillar/context.py @@ -18,12 +18,11 @@ import sys import warnings -from typing import Callable, Any, Union, Self +from typing import Callable, Any, Self from types import FrameType from dataclasses import dataclass -from caterpillar.abc import _ContextLambda, _ContextLike from caterpillar.exception import StructException from caterpillar.registry import to_struct @@ -92,7 +91,7 @@ def __context_setattr__(self, path: str, value: Any) -> None: setattr(obj, nodes[1], value) @property - def _root(self) -> _ContextLike: + def _root(self): current = self while CTX_PARENT in current: # dict-like access is much faster @@ -239,7 +238,7 @@ class Format: __slots__ = "func", "annotations", "namelist", "depth" - def __init__(self, condition: Union[_ContextLambda, bool], depth=2): + def __init__(self, condition, depth=2): if (sys.version_info.major, sys.version_info.minor) >= (3, 14): warnings.warn( "Python3.14 breaks support for Contitional fields. Conditional " @@ -316,8 +315,8 @@ class BinaryExpression(ExprMixin): """ operand: Callable[[Any, Any], Any] - left: Union[Any, _ContextLambda] - right: Union[Any, _ContextLambda] + left: Any + right: Any def __call__(self, context: Context, **kwds): lhs = self.left(context, **kwds) if callable(self.left) else self.left @@ -349,9 +348,9 @@ class UnaryExpression: name: str operand: Callable[[Any], Any] - value: Union[Any, _ContextLambda] + value: Any - def __call__(self, context: Context, **kwds): + def __call__(self, context, **kwds): value = self.value(context, **kwds) if callable(self.value) else self.value return self.operand(value) @@ -373,7 +372,7 @@ class ContextPath(ExprMixin): Represents a lambda function for retrieving a value from a Context based on a specified path. """ - def __init__(self, path: str = None) -> None: + def __init__(self, path=None) -> None: """ Initializes a ContextPath instance with an optional path. @@ -384,7 +383,7 @@ def __init__(self, path: str = None) -> None: self.call_kwargs = None self.getitem_args = None - def __call__(self, context: _ContextLike = None, **kwds): + def __call__(self, context=None, **kwds): """ Calls the lambda function to retrieve a value from a Context. @@ -405,7 +404,7 @@ def __getitem__(self, key) -> Self: return self def __type__(self) -> type: - return Any + return object def __getattribute__(self, key: str) -> ContextPath: """ @@ -461,7 +460,7 @@ class ContextLength(ExprMixin): def __init__(self, path: ContextPath) -> None: self.path = path - def __call__(self, context: Context = None, **kwds): + def __call__(self, context=None, **kwds): """ Calls the lambda function to retrieve a value from a Context. diff --git a/src/caterpillar/context.pyi b/src/caterpillar/context.pyi new file mode 100755 index 00000000..ec37afe7 --- /dev/null +++ b/src/caterpillar/context.pyi @@ -0,0 +1,135 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from dataclasses import dataclass +from types import FrameType +from typing import ( + Any, + Callable, + Dict, + List, + NewType, + Optional, + Self, + Tuple, + Type, + Union, + dataclass_transform, +) +from caterpillar.abc import _ContextLike, _ContextLambda + +CTX_PARENT: str = ... +CTX_OBJECT: str = ... +CTX_OFFSETS: str = ... +CTX_STREAM: str = ... +CTX_FIELD: str = ... +CTX_VALUE: str = ... +CTX_POS: str = ... +CTX_INDEX: str = ... +CTX_PATH: str = ... +CTX_SEQ: str = ... +CTX_ARCH: str = ... + +class Context(dict, _ContextLike): + def __setattr__(self, key: str, value: Any) -> None: ... + def __getattribute__(self, key: str): ... + def __context_getattr__(self, path: str): ... + def __context_setattr__(self, path: str, value: Any) -> None: ... + +class ExprMixin: + def __add__(self, other: Any) -> BinaryExpression: ... + def __sub__(self, other: Any) -> BinaryExpression: ... + def __mul__(self, other: Any) -> BinaryExpression: ... + def __floordiv__(self, other: Any) -> BinaryExpression: ... + def __truediv__(self, other: Any) -> BinaryExpression: ... + def __mod__(self, other: Any) -> BinaryExpression: ... + def __pow__(self, other: Any) -> BinaryExpression: ... + def __xor__(self, other: Any) -> BinaryExpression: ... + def __and__(self, other: Any) -> BinaryExpression: ... + def __or__(self, other: Any) -> BinaryExpression: ... + def __rshift__(self, other: Any) -> BinaryExpression: ... + def __lshift__(self, other: Any) -> BinaryExpression: ... + __div__ = __truediv__ + def __radd__(self, other: Any) -> BinaryExpression: ... + def __rsub__(self, other: Any) -> BinaryExpression: ... + def __rmul__(self, other: Any) -> BinaryExpression: ... + def __rfloordiv__(self, other: Any) -> BinaryExpression: ... + def __rtruediv__(self, other: Any) -> BinaryExpression: ... + def __rmod__(self, other: Any) -> BinaryExpression: ... + def __rpow__(self, other: Any) -> BinaryExpression: ... + def __rxor__(self, other: Any) -> BinaryExpression: ... + def __rand__(self, other: Any) -> BinaryExpression: ... + def __ror__(self, other: Any) -> BinaryExpression: ... + def __rrshift__(self, other: Any) -> BinaryExpression: ... + def __rlshift__(self, other: Any) -> BinaryExpression: ... + def __neg__(self) -> UnaryExpression: ... + def __pos__(self) -> UnaryExpression: ... + def __invert__(self) -> UnaryExpression: ... + def __contains__(self, other: Any) -> BinaryExpression: ... + def __gt__(self, other: Any) -> BinaryExpression: ... + def __ge__(self, other: Any) -> BinaryExpression: ... + def __lt__(self, other: Any) -> BinaryExpression: ... + def __le__(self, other: Any) -> BinaryExpression: ... + def __eq__(self, other: Any) -> BinaryExpression: ... + def __ne__(self, other: Any) -> BinaryExpression: ... + +class ConditionContext: + func: Union[_ContextLambda[bool], bool] + annotations: dict + namelist: List[str] + depth: int + def __init__(self, condition: _ContextLambda[bool] | bool, depth: int = 2) -> None: ... + def getframe(self, num: int, msg: str | None = None) -> FrameType: ... + def __enter__(self) -> Self: ... + def __exit__(self, *_) -> None: ... + + +@dataclass_transform() +class BinaryExpression(ExprMixin, _ContextLambda): + operand: Callable[[Any, Any], Any] + left: Any | _ContextLambda + right: Any | _ContextLambda + def __call__(self, context: _ContextLike) -> Any: ... + def __enter__(self) -> Self: ... + def __exit__(self, *_) -> None: ... + +@dataclass_transform() +class UnaryExpression(_ContextLambda): + name: str + operand: Callable[[Any], Any] + value: Any | _ContextLambda + def __call__(self, context: _ContextLike): ... + def __enter__(self): ... + def __exit__(self, *_) -> None: ... + +class ContextPath(ExprMixin, _ContextLambda): + path: str + call_kwargs: Dict[str, Any] + getitem_args: List[Any] + def __init__(self, path: Optional[str] = None) -> None: ... + def __call__(self, context: Optional[_ContextLike] = None, **kwds): ... + def __getitem__(self, key) -> Self: ... + def __type__(self) -> Type[Any]: ... + def __getattribute__(self, key: str) -> ContextPath: ... + @property + def parent(self) -> ContextPath: ... + +class ContextLength(ExprMixin, _ContextLambda): + path: str + def __init__(self, path: ContextPath) -> None: ... + def __call__(self, context: Optional[_ContextLike] = None, **kwds) -> Any: ... + +this: ContextPath +ctx: ContextPath +parent: ContextPath \ No newline at end of file From a6b3abc71b934b32e3dcbdb675837d98482f0e23 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Tue, 24 Jun 2025 11:12:13 +0200 Subject: [PATCH 05/41] Added type stubs for exception.py and options..py --- src/caterpillar/exception.pyi | 28 +++++++++++++++++++ src/caterpillar/options.py | 23 ++++++++-------- src/caterpillar/options.pyi | 51 +++++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 12 deletions(-) create mode 100755 src/caterpillar/exception.pyi mode change 100644 => 100755 src/caterpillar/options.py create mode 100755 src/caterpillar/options.pyi diff --git a/src/caterpillar/exception.pyi b/src/caterpillar/exception.pyi new file mode 100755 index 00000000..8dac6a79 --- /dev/null +++ b/src/caterpillar/exception.pyi @@ -0,0 +1,28 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from caterpillar.abc import _ContextLike + +class StructException(Exception): + context: _ContextLike + def __init__(self, message: str, context: _ContextLike | None = None) -> None: ... + +class DynamicSizeError(StructException): ... +class OptionError(StructException): ... +class ValidationError(StructException): ... +class UnsupportedOperation(StructException): ... +class InvalidValueError(StructException): ... +class StreamError(StructException): ... +class DelegationError(StructException): ... +class Stop(StructException): ... diff --git a/src/caterpillar/options.py b/src/caterpillar/options.py old mode 100644 new mode 100755 index fd1524e3..497c1ad4 --- a/src/caterpillar/options.py +++ b/src/caterpillar/options.py @@ -14,8 +14,7 @@ # along with this program. If not, see . from dataclasses import dataclass -from typing import Any, Optional, Set, Union - +from typing import Any @dataclass(init=False) class Flag: @@ -24,12 +23,12 @@ class Flag: name: str """The name of this flag""" - value: Optional[Any] = None + value: Any = None """ Optional configuration value. """ - def __init__(self, name: str, value: Optional[Any] = None) -> None: + def __init__(self, name: str, value=None) -> None: self.name = name self.value = value self._hash_ = hash(name) @@ -44,19 +43,19 @@ def __hash__(self) -> int: #: Defaults that will be applied to **all** structs. -GLOBAL_STRUCT_OPTIONS: Set[Flag] = set() +GLOBAL_STRUCT_OPTIONS = set() #: Defaults that will be applied on **all** unions. -GLOBAL_UNION_OPTIONS: Set[Flag] = set() +GLOBAL_UNION_OPTIONS = set() #: Default field flags that will be applied on **all** fields. -GLOBAL_FIELD_FLAGS: Set[Flag] = set() +GLOBAL_FIELD_FLAGS = set() #: Default field flags that will be applied on **all** bit-fields. -GLOBAL_BITFIELD_FLAGS: Set[Flag] = set() +GLOBAL_BITFIELD_FLAGS = set() -def configure(base: Set[Flag], *flags: Flag) -> None: +def configure(base, *flags: Flag) -> None: """ Update the base set of flags with additional flags. @@ -96,7 +95,7 @@ def set_union_flags(*flags: Flag) -> None: configure(GLOBAL_UNION_OPTIONS, *flags) -def get_flags(obj: Any, attr: Optional[str] = None) -> Optional[Set[Flag]]: +def get_flags(obj, attr=None): """ Get the flags associated with an object. @@ -107,7 +106,7 @@ def get_flags(obj: Any, attr: Optional[str] = None) -> Optional[Set[Flag]]: return getattr(obj, attr or "flags", None) -def has_flag(flag: Union[str, Flag], obj: Any, attr: Optional[str] = None) -> bool: +def has_flag(flag, obj, attr=None) -> bool: """ Check if an object has a specific flag. @@ -125,7 +124,7 @@ def has_flag(flag: Union[str, Flag], obj: Any, attr: Optional[str] = None) -> bo return flag in flags -def get_flag(name: str, obj: Any, attr: Optional[str] = None) -> Optional[Flag]: +def get_flag(name: str, obj, attr=None): """ Get a specific flag associated with an object. diff --git a/src/caterpillar/options.pyi b/src/caterpillar/options.pyi new file mode 100755 index 00000000..b437a0d8 --- /dev/null +++ b/src/caterpillar/options.pyi @@ -0,0 +1,51 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from types import NoneType +from typing import Any, Callable, Collection, Generic, Iterable, TypeVar + +_VT = TypeVar("_VT") + +class Flag(Generic[_VT]): + name: str + value: _VT | None = ... + _hash_: int + def __init__(self, name: str, value: _VT | None = None) -> None: ... + def __hash__(self) -> int: ... + +GLOBAL_STRUCT_OPTIONS: set[Flag] = ... +GLOBAL_UNION_OPTIONS: set[Flag] = ... +GLOBAL_FIELD_FLAGS: set[Flag] = ... +GLOBAL_BITFIELD_FLAGS: set[Flag] = ... + +def configure(base: set[Flag], *flags: Flag) -> None: ... +def set_struct_flags(*flags: Flag, with_union: bool = False) -> None: ... +def set_field_flags(*flags: Flag) -> None: ... +def set_union_flags(*flags: Flag) -> None: ... +def get_flags(obj: Any, attr: str | None = None) -> set[Flag] | None: ... +def has_flag(flag: str | Flag, obj: Any, attr: str | None = None) -> bool: ... +def get_flag(name: str, obj: Any, attr: str | None = None) -> Flag | None: ... + +S_DISCARD_UNNAMED: Flag[NoneType] +S_DISCARD_CONST: Flag[NoneType] +S_UNION: Flag[NoneType] +S_REPLACE_TYPES: Flag[NoneType] +S_EVAL_ANNOTATIONS: Flag[NoneType] +S_ADD_BYTES: Flag[NoneType] +S_SLOTS: Flag[NoneType] +F_KEEP_POSITION: Flag[NoneType] +F_DYNAMIC: Flag[NoneType] +F_SEQUENTIAL: Flag[NoneType] +F_OFFSET_OVERRIDE: Flag[NoneType] +O_ARRAY_FACTORY: Flag[Callable[[Iterable], Collection]] From 07a1fe9c5e1e43a81666d5ed318f11dc567572aa Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Tue, 24 Jun 2025 11:18:19 +0200 Subject: [PATCH 06/41] Updated shortcuts and registry stubs --- + shortcuts now include typeof, to_struct, hasstruct, getstruct and sizeof --- src/caterpillar/registry.pyi | 32 ++++++++++++++++++++++++ src/caterpillar/shortcuts.py | 42 +++++++++++++++++++++++--------- src/caterpillar/shortcuts.pyi | 46 +++++++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+), 11 deletions(-) create mode 100755 src/caterpillar/registry.pyi create mode 100755 src/caterpillar/shortcuts.pyi diff --git a/src/caterpillar/registry.pyi b/src/caterpillar/registry.pyi new file mode 100755 index 00000000..c4117657 --- /dev/null +++ b/src/caterpillar/registry.pyi @@ -0,0 +1,32 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from caterpillar.abc import _StructLike +from typing import Any, Callable, Self + +class TypeConverter: + target: type + delegate: Callable[[Any, dict], _StructLike] + def __init__( + self, + target: type | None = None, + delegate: Callable[[Any, dict], _StructLike] | None = None, + ) -> None: ... + def matches(self, annotation: Any) -> bool: ... + def convert(self, annotation: Any, kwargs: dict) -> _StructLike: ... + def __call__(self, delegate: Callable[[Any, dict], _StructLike]) -> Self: ... + +annotation_registry: list[TypeConverter] + +def to_struct(obj: Any, **kwargs) -> _StructLike: ... diff --git a/src/caterpillar/shortcuts.py b/src/caterpillar/shortcuts.py index b004bd46..3a2aad9b 100644 --- a/src/caterpillar/shortcuts.py +++ b/src/caterpillar/shortcuts.py @@ -12,15 +12,35 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from .model import pack, pack_file, pack_into -from .model import unpack, unpack_file -from .model import struct, union -from .model import bitfield, Sequence as Seq - -from .context import ContextPath, this, ctx, parent, ContextLength as lenof -from .byteorder import LittleEndian, BigEndian -from .byteorder import x86, x86_64, ARM, ARM64, AMD, AMD64, AARCH64 -from .byteorder import PowerPC, PowerPC64, RISC_V, RISC_V64 - -from .fields import Field as F +from .byteorder import ( + AARCH64, + AMD, + AMD64, + ARM, + ARM64, + BigEndian, + LittleEndian, + PowerPC, + PowerPC64, + RISC_V, + RISC_V64, + x86, + x86_64, +) +from .context import ContextPath, ctx, parent, this, ContextLength as lenof +from .model import ( + bitfield, + pack, + pack_file, + pack_into, + struct, + union, + unpack, + unpack_file, + sizeof, + Sequence as Seq, +) +from .shared import typeof, getstruct, hasstruct +from .registry import to_struct from . import options as opt +from .fields import Field as F diff --git a/src/caterpillar/shortcuts.pyi b/src/caterpillar/shortcuts.pyi new file mode 100755 index 00000000..63219055 --- /dev/null +++ b/src/caterpillar/shortcuts.pyi @@ -0,0 +1,46 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from .byteorder import ( + AARCH64, + AMD, + AMD64, + ARM, + ARM64, + BigEndian, + LittleEndian, + PowerPC, + PowerPC64, + RISC_V, + RISC_V64, + x86, + x86_64, +) +from .context import ContextPath, ctx, parent, this, ContextLength as lenof +from .model import ( + bitfield, + pack, + pack_file, + pack_into, + struct, + union, + unpack, + unpack_file, + sizeof, + Sequence as Seq, +) +from .shared import typeof, getstruct, hasstruct +from .registry import to_struct +from . import options as opt +from .fields import Field as F \ No newline at end of file From 8e06f96b304a83bbb43529940c43d0dc2b137d48 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Tue, 24 Jun 2025 15:38:36 +0200 Subject: [PATCH 07/41] Type hints for model._base.py (Sequence) class --- + Removed unnecessary type hints in python code + Fixed an issue with unions declared using the Sequence class --- src/caterpillar/model/_base.py | 82 ++++++++++++++++----------------- src/caterpillar/model/_base.pyi | 44 ++++++++++++++++++ 2 files changed, 85 insertions(+), 41 deletions(-) mode change 100644 => 100755 src/caterpillar/model/_base.py create mode 100755 src/caterpillar/model/_base.pyi diff --git a/src/caterpillar/model/_base.py b/src/caterpillar/model/_base.py old mode 100644 new mode 100755 index 1cfad671..ff7d2a9f --- a/src/caterpillar/model/_base.py +++ b/src/caterpillar/model/_base.py @@ -14,13 +14,16 @@ # along with this program. If not, see . import re -from typing import Optional, Self, Tuple -from typing import List, Dict, Any -from typing import Set, Iterable, Union - - -from caterpillar.abc import _StructLike, _ContextLike, _StreamType, _Action -from caterpillar.context import Context, CTX_PATH, CTX_OBJECT, CTX_STREAM, CTX_SEQ +from typing import Optional, Self, Iterable + +from caterpillar.context import ( + CTX_FIELD, + Context, + CTX_PATH, + CTX_OBJECT, + CTX_STREAM, + CTX_SEQ, +) from caterpillar.byteorder import ( BYTEORDER_FIELD, ByteOrder, @@ -58,13 +61,13 @@ class Sequence(FieldMixin): Sequence(fields=['a']) """ - model: Any + model: type """ Specifies the target class/dictionary used as the base model. """ # second value with action in tuple is reserved - fields: List[Field | Tuple[_Action, None]] + fields: list """A list of all fields defined in this struct. This attribute stores the fields in an *ordered* collection, whereby ordered @@ -83,12 +86,12 @@ class Sequence(FieldMixin): Global architecture definition (will be inferred on all fields) """ - options: Set[Flag] + options: set """ Additional options specifying what to include in the final class. """ - field_options: Set[Flag] + field_options: set """ Global field flags that will be applied on all fields. """ @@ -106,7 +109,7 @@ class Sequence(FieldMixin): def __init__( self, - model: Optional[Dict[str, Field]] = None, + model: Optional[dict] = None, order: Optional[ByteOrder] = None, arch: Optional[Arch] = None, options: Iterable[Flag] | None = None, @@ -119,7 +122,7 @@ def __init__( self.field_options = set(field_options or []) # these fields will be set or used while processing the model type - self._member_map_: Dict[str, Field] = {} + self._member_map_ = {} self.fields = [] self.is_union = S_UNION in self.options # Process all fields in the model @@ -161,7 +164,7 @@ def has_option(self, option: Flag) -> bool: """ return option in self.options - def _included(self, name: str, default: Optional[Any], annotation: Any) -> bool: + def _included(self, name: str, default, annotation) -> bool: """ Check if a field with the given name should be included. @@ -179,10 +182,10 @@ def _included(self, name: str, default: Optional[Any], annotation: Any) -> bool: return True - def _set_default(self, name: str, value: Any) -> None: + def _set_default(self, name: str, value) -> None: pass - def _process_default(self, name, annotation: Any, had_default=False) -> Any: + def _process_default(self, name, annotation, had_default=False): default = getattr(self.model, name, INVALID_DEFAULT) # constant values that are not in the form of fields, structs or types should # be wrapped into constant values. For more information, see _process_field @@ -244,17 +247,13 @@ def _process_model(self) -> None: for name in removables: self._remove_from_model(name) - def _prepare_fields(self) -> Dict[str, Any]: + def _prepare_fields(self): return self.model - def _process_annotation( - self, annotation: Any, default: Optional[Any], order: ByteOrder, arch: Arch - ) -> Union[_StructLike, Field]: + def _process_annotation(self, annotation, default, order: ByteOrder, arch: Arch): return registry.to_struct(annotation, arch=arch, order=order) - def _process_field( - self, name: str, annotation: Any, default: Optional[Any] - ) -> Field: + def _process_field(self, name: str, annotation, default) -> Field: """ Process a field in the model. @@ -263,8 +262,8 @@ def _process_field( :param default: The default value of the field. :return: The processed field. """ - field: Field = None - struct: _StructLike = None + field = None + struct = None order = getattr(annotation, BYTEORDER_FIELD, self.order or SysNative) arch = self.arch or system_arch @@ -303,7 +302,7 @@ def add_field(self, name: str, field: Field, included: bool = False) -> None: if included: self._member_map_[name] = field - def add_action(self, action: _Action) -> None: + def add_action(self, action) -> None: self.fields.append((action, None)) def del_field(self, name: str, field: Field) -> None: @@ -316,10 +315,10 @@ def del_field(self, name: str, field: Field) -> None: self._member_map_.pop(name, None) self.fields.remove(field) - def get_members(self) -> Dict[str, Field]: + def get_members(self): return self._member_map_.copy() - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: """ Get the size of the struct. @@ -334,15 +333,16 @@ def __size__(self, context: _ContextLike) -> int: return max(sizes) if self.is_union else sum(sizes) - def unpack_one(self, context: _ContextLike) -> Optional[Any]: + def unpack_one(self, context): # At first, we define the object context where the parsed values # will be stored - init_data: Dict[str, Any] = Context() + init_data = Context() context[CTX_OBJECT] = Context(_parent=context) base_path = context[CTX_PATH] if self.is_union: - start = context[CTX_STREAM].tell() + stream = context[CTX_STREAM] + start = stream.tell() max_size = 0 for field in self.fields: @@ -354,7 +354,7 @@ def unpack_one(self, context: _ContextLike) -> Optional[Any]: continue if self.is_union: - pos = context[CTX_STREAM].tell() + pos = stream.tell() # REVISIT: make this a real attribute name = field.__name__ @@ -368,8 +368,8 @@ def unpack_one(self, context: _ContextLike) -> Optional[Any]: if self.is_union: # This union implementation will cover the max size - max_size = max(context[CTX_STREAM], stream.tell() - pos) - context[CTX_STREAM].seek(start) + max_size = max(max_size, stream.tell() - pos) + stream.seek(start) obj = init_data if self.is_union: @@ -377,7 +377,7 @@ def unpack_one(self, context: _ContextLike) -> Optional[Any]: stream.seek(start + max_size) return obj - def __unpack__(self, context: _ContextLike) -> Optional[Any]: + def __unpack__(self, context): """ Unpack the struct from the stream. @@ -396,12 +396,12 @@ def __unpack__(self, context: _ContextLike) -> Optional[Any]: return unpack_seq(context, self.unpack_one) return self.unpack_one(this_context) - def get_value(self, obj: Any, name: str, field: Field) -> Optional[Any]: + def get_value(self, obj, name: str, field: Field): return obj.get(name, None) - def pack_one(self, obj: Dict[str, Any], context: _ContextLike) -> None: + def pack_one(self, obj, context) -> None: max_size = 0 - union_field: Optional[_StructLike] = None + union_field = None base_path: str = context[CTX_PATH] for field in self.fields: @@ -442,11 +442,11 @@ def pack_one(self, obj: Dict[str, Any], context: _ContextLike) -> None: value = self.get_value(obj, name, union_field) union_field.__pack__(value, context) - def __pack__(self, obj: Any, context: _ContextLike) -> None: + def __pack__(self, obj, context) -> None: # As structs can be used in field definitions a field will call this struct # and could potentially be a sequence. Therefore, we have to check whether we # should unpack multiple objects. - field: Optional[Field] = context.get("_field") + field: Optional[Field] = context.get(CTX_FIELD) if field and context[CTX_SEQ]: pack_seq(obj, context, self.pack_one) else: @@ -467,7 +467,7 @@ def __str__(self) -> str: # --- private sequence tyoe converter --- @registry.TypeConverter(dict) -def _type_converter(annotation: Any, kwargs: dict) -> _StructLike: +def _type_converter(annotation, kwargs: dict): arch = kwargs.pop("arch", None) order = kwargs.pop("order", None) return Sequence(model=annotation, order=order, arch=arch) diff --git a/src/caterpillar/model/_base.pyi b/src/caterpillar/model/_base.pyi new file mode 100755 index 00000000..20e8a2c9 --- /dev/null +++ b/src/caterpillar/model/_base.pyi @@ -0,0 +1,44 @@ +from typing import Any, Dict, Generic, Iterable, Self, Optional, Type, TypeVar, Union + +from caterpillar.abc import _StructLike, _ActionLike, _ContextLike, _ContainsStruct +from caterpillar.byteorder import ByteOrder, Arch +from caterpillar.fields._base import Field +from caterpillar.fields._mixin import FieldMixin +from caterpillar.options import Flag + +_SeqModelT = TypeVar( + "_SeqModelT", default=Dict[str, Union[_StructLike, _ContainsStruct, type]] +) + +class Sequence(FieldMixin, Generic[_SeqModelT], _StructLike[_SeqModelT, _SeqModelT]): + model: Any + fields: list[Field | tuple[_ActionLike, None]] + order: Optional[ByteOrder] + arch: Optional[Arch] + options: set[Flag] + field_options: set[Flag] + is_union: bool + def __init__( + self, + model: Optional[dict[str, Field]] = None, + order: Optional[ByteOrder] = None, + arch: Optional[Arch] = None, + options: Optional[Iterable[Flag]] = None, + field_options: Optional[Iterable[Flag]] = None, + ) -> None: ... + def __add__(self, sequence) -> Self: ... + def __sub__(self, sequence) -> Self: ... + __iadd__ = __add__ + __isub__ = __sub__ + def has_option(self, option: Flag) -> bool: ... + def add_field(self, name: str, field: Field, included: bool = False) -> None: ... + def add_action(self, action: _ActionLike) -> None: ... + def del_field(self, name: str, field: Field) -> None: ... + def get_members(self) -> dict[str, Field]: ... + def unpack_one(self, context: _ContextLike) -> _SeqModelT: ... + def get_value(self, obj: Any, name: str, field: Field) -> Any: ... + def pack_one(self, obj: _SeqModelT, context: _ContextLike) -> None: ... + def __size__(self, context: _ContextLike) -> int: ... + def __unpack__(self, context: _ContextLike) -> _SeqModelT: ... + def __type__(self) -> Type[_SeqModelT]: ... + def __pack__(self, obj: _SeqModelT, context: _ContextLike) -> None: ... From ba38ee9543104588863085aae00062b5cafa2952 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Wed, 25 Jun 2025 08:07:38 +0200 Subject: [PATCH 08/41] Added model._struct python type hints --- _struct.py: + Removed type hints in original python file + Sizeof() now raises a TypeError when the target object does not implement the __size__ protocol --- src/caterpillar/abc.pyi | 12 +- src/caterpillar/model/_base.pyi | 14 ++ src/caterpillar/model/_struct.py | 187 ++++++++++++++++-------- src/caterpillar/model/_struct.pyi | 226 ++++++++++++++++++++++++++++++ 4 files changed, 376 insertions(+), 63 deletions(-) mode change 100644 => 100755 src/caterpillar/model/_struct.py create mode 100755 src/caterpillar/model/_struct.pyi diff --git a/src/caterpillar/abc.pyi b/src/caterpillar/abc.pyi index e79c9991..75ce32bd 100755 --- a/src/caterpillar/abc.pyi +++ b/src/caterpillar/abc.pyi @@ -14,7 +14,7 @@ # along with this program. If not, see . from io import IOBase from types import EllipsisType -from typing import Any, Callable, Optional, Protocol, TypeVar, Union +from typing import Any, Callable, Optional, Protocol, TypeVar, Union, runtime_checkable _IT = TypeVar("_IT") _IT_co = TypeVar("_IT_co") @@ -30,6 +30,7 @@ _PrefixedType = slice # [_StructLike[int, int], NoneType, NoneType] _LengthT = Union[int, _PrefixedType, _GreedyType, _ContextLambda] +@runtime_checkable class _ContextLike(Protocol): @property def _root(self) -> Optional[_ContextLike]: ... @@ -42,9 +43,11 @@ _ContextLambdaReturnT_co = TypeVar( "_ContextLambdaReturnT_co", covariant=True, default=Any ) +@runtime_checkable class _ContextLambda(Protocol[_ContextLambdaReturnT_co]): def __call__(self, context: _ContextLike) -> _ContextLambdaReturnT_co: ... +@runtime_checkable class _StructLike(Protocol[_IT_contra, _OT_co]): def __size__(self, context: _ContextLike) -> int: ... def __unpack__(self, context: _ContextLike) -> _OT_co: ... @@ -53,19 +56,24 @@ class _StructLike(Protocol[_IT_contra, _OT_co]): _StructT = Union[_ContainsStruct[_IT, _OT], _StructLike[_IT, _OT], _ContextLambda] +@runtime_checkable class _ContainsStruct(Protocol[_IT_contra, _OT]): __struct__: _StructLike[_IT_contra, _OT] +@runtime_checkable class _ActionLike(Protocol): def __action_pack__(self, context: _ContextLike) -> None: ... def __action_unpack__(self, context: _ContextLike) -> None: ... +@runtime_checkable class _SupportsPack(Protocol[_IT_contra]): def __pack__(self, obj: _IT_contra, context: _ContextLike) -> None: ... +@runtime_checkable class _SupportsSize(Protocol): def __size__(self, context: _ContextLike) -> int: ... +@runtime_checkable class _SupportsUnpack(Protocol[_OT_co]): def __unpack__(self, context: _ContextLike) -> _OT_co: ... @@ -75,8 +83,10 @@ _Switch = Union[ _SwitchLambda[_IT, _OT], ] +@runtime_checkable class _SupportsBits(Protocol): def __bits__(self) -> int: ... +@runtime_checkable class _ContainsBits(Protocol): __bits__: int diff --git a/src/caterpillar/model/_base.pyi b/src/caterpillar/model/_base.pyi index 20e8a2c9..d82fa00d 100755 --- a/src/caterpillar/model/_base.pyi +++ b/src/caterpillar/model/_base.pyi @@ -1,3 +1,17 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . from typing import Any, Dict, Generic, Iterable, Self, Optional, Type, TypeVar, Union from caterpillar.abc import _StructLike, _ActionLike, _ContextLike, _ContainsStruct diff --git a/src/caterpillar/model/_struct.py b/src/caterpillar/model/_struct.py old mode 100644 new mode 100755 index cec2a4b5..3f6b6ffa --- a/src/caterpillar/model/_struct.py +++ b/src/caterpillar/model/_struct.py @@ -17,12 +17,12 @@ from tempfile import TemporaryFile from io import BytesIO, IOBase -from typing import Optional, Type, TypeVar, Union, Callable +from typing import Optional, Union, Callable from typing import Dict, Any, Iterable from collections import OrderedDict from shutil import copyfileobj -from caterpillar.abc import getstruct, hasstruct, STRUCT_FIELD +from caterpillar.shared import getstruct, hasstruct, ATTR_STRUCT from caterpillar.abc import _StructLike, _StreamType, _SupportsUnpack, _SupportsPack from caterpillar.abc import _ContainsStruct, _ContextLike, _SupportsSize from caterpillar.context import Context, CTX_STREAM @@ -43,8 +43,6 @@ from ._base import Sequence -_T = TypeVar("_T") - # REVISIT: remove dataclasses dependency class Struct(Sequence): @@ -57,7 +55,7 @@ class Struct(Sequence): :param options: Additional options specifying what to include in the final class. """ - _member_map_: Dict[str, Field] + _member_map_: dict # An internal field that maps the field names of all class attributes to their # corresponding struct fields. @@ -66,12 +64,12 @@ class Struct(Sequence): def __init__( self, model: type, - order: Optional[ByteOrder] = None, - arch: Optional[Arch] = None, - options: Iterable[Flag] | None = None, - field_options: Iterable[Flag] | None = None, - kw_only: bool = False, - hook_cls: Optional[type] = None, + order=None, + arch=None, + options=None, + field_options=None, + kw_only=False, + hook_cls=None, ) -> None: self.kw_only = kw_only options = set(options or []) @@ -85,7 +83,7 @@ def __init__( options=options, field_options=field_options, ) - setattr(self.model, STRUCT_FIELD, self) + setattr(self.model, ATTR_STRUCT, self) # Add additional options based on the struct's type slots = self.has_option(S_SLOTS) self.model = dc.dataclass(self.model, kw_only=self.kw_only, slots=slots) @@ -99,10 +97,10 @@ def __init__( if self.has_option(S_ADD_BYTES): setattr(self.model, "__bytes__", _struct_bytes(self)) - def __type__(self) -> type: + def __type__(self): return self.model - def _prepare_fields(self) -> Dict[str, Any]: + def _prepare_fields(self): # We will inspect all base classes in reverse order and selectively # utilize classes that store a struct instance. Beginning at position # -1, concluding at 0, and using a step size of -1: @@ -117,10 +115,10 @@ def _prepare_fields(self) -> Dict[str, Any]: # The why is described in detail here: https://docs.python.org/3/howto/annotations.html return inspect.get_annotations(self.model, eval_str=eval_str) - def _set_default(self, name: str, value: Any) -> None: + def _set_default(self, name: str, value) -> None: setattr(self.model, name, value) - def _process_default(self, name, annotation: Any, had_default=False) -> Any: + def _process_default(self, name, annotation, had_default=False): default = super()._process_default(name, annotation, had_default) if default is INVALID_DEFAULT and had_default: self.kw_only = True @@ -132,10 +130,10 @@ def _replace_type(self, name: str, type_: type) -> None: def _remove_from_model(self, name: str) -> None: self.model.__annotations__.pop(name) - def unpack_one(self, context: _ContextLike) -> Optional[Any]: + def unpack_one(self, context): return self.model(**super().unpack_one(context)) - def get_value(self, obj: Any, name: str, field: Field) -> Optional[Any]: + def get_value(self, obj, name: str, field: Field): return getattr(obj, name, None) @@ -145,24 +143,24 @@ class _StructTypeConverter(registry.TypeConverter): def __init__(self) -> None: super().__init__() - def matches(self, annotation: Any) -> bool: + def matches(self, annotation) -> bool: return isinstance(annotation, type) and getstruct(annotation) is not None - def convert(self, annotation: Any, kwargs: dict) -> _StructLike: + def convert(self, annotation, kwargs: dict): return getstruct(annotation) registry.annotation_registry.append(_StructTypeConverter()) -def _struct_bytes(model: Struct) -> Callable: +def _struct_bytes(model: Struct): def to_bytes(self) -> bytes: return pack(self, model) return to_bytes -def _struct_getitem(model: Struct) -> Field: +def _struct_getitem(model: Struct): def class_getitem(*args): if len(args) == 2: _, dim = args @@ -178,12 +176,12 @@ def class_getitem(*args): def _make_struct( cls: type, - options: Iterable[Flag] = None, - order: Optional[ByteOrder] = None, - arch: Optional[Arch] = None, - field_options: Iterable[Flag] = None, - kw_only: bool = False, - hook_cls: Optional[type] = None, + options=None, + order=None, + arch=None, + field_options=None, + kw_only=False, + hook_cls=None, ) -> type: """ Helper function to create a Struct class. @@ -207,7 +205,16 @@ def _make_struct( return _.model -def struct(cls: Type[_T] | None = None, /, **kwds) -> Type[_T]: +def struct( + cls=None, + /, + *, + options=None, + order=None, + arch=None, + field_options=None, + kw_only=False, +): """ Decorator to create a Struct class. @@ -220,10 +227,24 @@ def struct(cls: Type[_T] | None = None, /, **kwds) -> Type[_T]: """ def wrap(cls): - return _make_struct(cls, **kwds) + return _make_struct( + cls, + order=order, + arch=arch, + options=options, + field_options=field_options, + kw_only=kw_only, + ) if cls is not None: - return _make_struct(cls, **kwds) + return _make_struct( + cls, + order=order, + arch=arch, + options=options, + field_options=field_options, + kw_only=kw_only, + ) return wrap @@ -264,17 +285,17 @@ def __exit__(self, exc_type, exc_value, traceback) -> None: # This variable MUST be reset afterward self._processing_ = False - def __model_init__(self, obj: Any, *args, **kwargs) -> None: + def __model_init__(self, obj, *args, **kwargs) -> None: # since it is possible now, to specify non-kw_only constructors, # we have to capture both, args and kwargs with self: return self._model_init_(obj, *args, **kwargs) - def __model_setattr__(self, obj: Any, key: str, new_value: Any) -> None: + def __model_setattr__(self, obj, key: str, new_value) -> None: # The target attribute will alyaws be set object.__setattr__(obj, key, new_value) - members: Dict[str, Field] = self.struct.get_members() + members = self.struct.get_members() if self._processing_ or key not in members: # Refresh can't be done if: # 1) the current instance is alredy being processed @@ -285,9 +306,7 @@ def __model_setattr__(self, obj: Any, key: str, new_value: Any) -> None: # delegation into method allows for customisation self.refresh(obj, key, new_value, members) - def refresh( - self, obj: Any, key: str, new_value: Any, members: Dict[str, Field] - ) -> None: + def refresh(self, obj, key: str, new_value, members) -> None: # DEFAULT: retrieve the current field and temporarily pack its data field = members[key] data = pack(new_value, field) @@ -304,7 +323,7 @@ def refresh( stream.seek(0) -def _union_init(hook: UnionHook) -> Callable: +def _union_init(hook): # wrapper function to capture the calling instance def init(self, *args, **kwargs) -> None: return hook.__model_init__(self, *args, **kwargs) @@ -312,15 +331,25 @@ def init(self, *args, **kwargs) -> None: return init -def _union_setattr(hook: UnionHook) -> Callable: +def _union_setattr(hook): # wrapper function to capture the calling instance - def setattribute(self, key: str, value: Any) -> None: + def setattribute(self, key: str, value) -> None: hook.__model_setattr__(self, key, value) return setattribute -def union(cls: type = None, /, *, options: Iterable[Flag] = None, **kwds): +def union( + cls=None, + /, + *, + options=None, + order=None, + arch=None, + field_options=None, + kw_only=False, + hook_cls=None, +): """ Decorator to create a Union class. @@ -334,17 +363,37 @@ def union(cls: type = None, /, *, options: Iterable[Flag] = None, **kwds): options = set(list(options or []) + [S_UNION]) def wrap(cls): - return _make_struct(cls, options=options, **kwds) + return _make_struct( + cls, + order=order, + arch=arch, + options=options, + field_options=field_options, + kw_only=kw_only, + hook_cls=hook_cls, + ) if cls is not None: - return _make_struct(cls, options=options, **kwds) + return _make_struct( + cls, + order=order, + arch=arch, + options=options, + field_options=field_options, + kw_only=kw_only, + hook_cls=hook_cls, + ) return wrap def pack( - obj: Union[Any, _ContainsStruct], - struct: Optional[_SupportsPack] = None, + obj, + struct=None, + /, + *, + use_tempfile=False, + as_field=False, **kwds, ) -> bytes: """ @@ -357,16 +406,18 @@ def pack( :return: The packed bytes. """ buffer = BytesIO() - pack_into(obj, buffer, struct, **kwds) + pack_into(obj, buffer, struct, use_tempfile=use_tempfile, as_field=as_field, **kwds) return buffer.getvalue() def pack_into( - obj: Union[Any, _ContainsStruct], - buffer: _StreamType, - struct: Optional[_StructLike] = None, - use_tempfile: bool = False, - as_field: bool = False, + obj, + buffer, + struct=None, + /, + *, + use_tempfile=False, + as_field=False, **kwds, ) -> None: """ @@ -452,10 +503,13 @@ def pack_into( def pack_file( - obj: Union[Any, _ContainsStruct], + obj, filename: str, - struct: Optional[_StructLike] = None, - use_tempfile: bool = False, + struct=None, + /, + *, + use_tempfile=False, + as_field=False, **kwds, ) -> None: """ @@ -469,15 +523,17 @@ def pack_file( :return: None """ with open(filename, "w+b") as fp: - pack_into(obj, fp, struct, use_tempfile, **kwds) + pack_into(obj, fp, struct, use_tempfile=use_tempfile, as_field=as_field, **kwds) def unpack( - struct: Union[_SupportsUnpack, _ContainsStruct], - buffer: Union[bytes, _StreamType], - as_field: bool = False, + struct, + buffer, + /, + *, + as_field=False, **kwds, -) -> Any: +): """ Unpack an object from a bytes buffer or stream using the specified struct. @@ -525,10 +581,13 @@ def unpack( def unpack_file( - struct: Union[_StructLike, _ContainsStruct], + struct, filename: str, + /, + *, + as_field=False, **kwds, -) -> Any: +): """ Unpack an object from a file using the specified struct. @@ -539,7 +598,7 @@ def unpack_file( :return: The unpacked object. """ with open(filename, "rb") as fp: - return unpack(struct, fp, **kwds) + return unpack(struct, fp, as_field=as_field, **kwds) def sizeof(obj: Union[_StructLike, _ContainsStruct, _SupportsSize], **kwds) -> int: @@ -547,4 +606,8 @@ def sizeof(obj: Union[_StructLike, _ContainsStruct, _SupportsSize], **kwds) -> i struct_ = obj if hasstruct(struct_): struct_ = getstruct(struct_) + + if not isinstance(struct_, _SupportsSize): + raise TypeError(f"{type(struct_).__name__} does not support size calculation!") + return struct_.__size__(context) diff --git a/src/caterpillar/model/_struct.pyi b/src/caterpillar/model/_struct.pyi new file mode 100755 index 00000000..fc772f7d --- /dev/null +++ b/src/caterpillar/model/_struct.pyi @@ -0,0 +1,226 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +import types +from typing import ( + Any, + Callable, + Iterable, + Optional, + TypeVar, + Union, + Type, + Generic, + overload, +) + +from caterpillar import registry +from caterpillar.abc import ( + _ContainsStruct, + _OT, + _IT, + _SupportsPack, + _StreamType, + _SupportsUnpack, + _ContextLike, + _StructLike, + _SupportsSize, +) +from caterpillar.byteorder import Arch, ByteOrder +from caterpillar.options import Flag +from caterpillar.fields._base import Field +from caterpillar.model._base import Sequence + +_ModelT = TypeVar("_ModelT") + +class Struct(Sequence[_ModelT]): + kw_only: bool + model: Type[_ModelT] + def __init__( + self, + model: Type[_ModelT], + options: Optional[Iterable[Flag]] = None, + order: Optional[ByteOrder] = None, + arch: Optional[Arch] = None, + field_options: Optional[Flag] = None, + kw_only: bool = False, + hook_cls: Optional[Type[_UnionHookLike[_ModelT]]] = None, + ) -> None: ... + def __type__(self) -> Type[_ModelT]: ... + def unpack_one(self, context: _ContextLike) -> _ModelT: ... + def get_value(self, obj: Any, name: str, field: Field) -> Any | None: ... + +class _StructTypeConverter(registry.TypeConverter): + def __init__(self) -> None: ... + def matches(self, annotation: Any) -> bool: ... + def convert(self, annotation: Any, kwargs: dict) -> Struct: ... + +class _UnionHookLike(Generic[_ModelT]): + def __model_init__(self, obj: _ModelT, *args, **kwargs) -> None: ... + def __model_setattr__(self, obj: _ModelT, key: str, new_value: Any) -> None: ... + +class UnionHook(Generic[_ModelT]): + struct: Struct[_ModelT] + max_size: int + def __init__(self, struct_: Struct[_ModelT]) -> None: ... + def __enter__(self) -> None: ... + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: types.TracebackType | None, + ) -> None: ... + def __model_init__(self, obj: Any, *args, **kwargs) -> None: ... + def __model_setattr__(self, obj: Any, key: str, new_value: Any) -> None: ... + def refresh( + self, obj: Any, key: str, new_value: Any, members: dict[str, Field] + ) -> None: ... + +@overload +def struct( + cls: Type[_ModelT], + /, + *, + options: Optional[Iterable[Flag]] = None, + order: Optional[ByteOrder] = None, + arch: Optional[Arch] = None, + field_options: Optional[Flag] = None, + kw_only: bool = False, +) -> Type[_ModelT]: ... +@overload +def struct( + cls: None = None, + /, + *, + options: Optional[Iterable[Flag]] = None, + order: Optional[ByteOrder] = None, + arch: Optional[Arch] = None, + field_options: Optional[Flag] = None, + kw_only: bool = False, +) -> Callable[[_ModelT], _ModelT]: ... +@overload +def union( + cls: Type[_ModelT], + /, + *, + options: Optional[Iterable[Flag]] = None, + order: Optional[ByteOrder] = None, + arch: Optional[Arch] = None, + field_options: Optional[Flag] = None, + kw_only: bool = False, + hook_cls: Optional[Type[_UnionHookLike[_ModelT]]] = None, +) -> Type[_ModelT]: ... +@overload +def union( + cls: None = None, + /, + *, + options: Optional[Iterable[Flag]] = None, + order: Optional[ByteOrder] = None, + arch: Optional[Arch] = None, + field_options: Optional[Flag] = None, + kw_only: bool = False, + hook_cls: Optional[Type[_UnionHookLike[_ModelT]]] = None, +) -> Callable[[_ModelT], _ModelT]: ... +@overload +def pack( + obj: _ContainsStruct[_ModelT, _ModelT], + struct: None = None, + /, + *, + use_tempfile: bool = ..., + as_field: bool = ..., + **kwds, +) -> bytes: ... +@overload +def pack( + obj: _IT, + struct: Union[_ModelT, _ContainsStruct[_IT, _OT], _SupportsPack[_IT]] = None, + /, + *, + use_tempfile: bool = ..., + as_field: bool = ..., + **kwds, +) -> bytes: ... +@overload +def pack_into( + obj: _ContainsStruct[_ModelT, _ModelT], + buffer: _StreamType, + struct: None = None, + /, + *, + use_tempfile: bool = ..., + as_field: bool = ..., + **kwds, +) -> bytes: ... +@overload +def pack_into( + obj: _IT, + buffer: _StreamType, + struct: Union[_SupportsPack[_IT], _ContainsStruct[_IT, _OT]], + /, + *, + use_tempfile: bool = ..., + as_field: bool = ..., + **kwds, +) -> bytes: ... +@overload +def pack_file( + obj: _ContainsStruct[_ModelT, _ModelT], + filename: str, + struct: None = None, + /, + *, + use_tempfile: bool = ..., + as_field: bool = ..., + **kwds, +) -> bytes: ... +@overload +def pack_file( + obj: _IT, + filename: str, + struct: Union[_SupportsPack[_IT], _ContainsStruct[_IT, _OT]], + /, + *, + use_tempfile: bool = ..., + as_field: bool = ..., + **kwds, +) -> bytes: ... +@overload +def unpack( + struct: Union[_SupportsUnpack[_OT], _ContainsStruct[_IT, _OT]], + buffer: bytes | _StreamType, + /, + *, + as_field: bool = ..., + **kwds, +) -> _OT: ... +@overload +def unpack( + struct: Type[_ModelT], + buffer: bytes | _StreamType, + /, + *, + as_field: bool = ..., + **kwds, +) -> _ModelT: ... +def unpack_file( + struct: Union[_SupportsUnpack[_OT], _ContainsStruct[_IT, _OT]], + filename: str, + /, + *, + as_field: bool = ..., + **kwds, +) -> _OT: ... +def sizeof(obj: Union[_SupportsSize, _ContainsStruct, _StructLike], **kwds) -> int: ... From 928ffcd343fece8cdf67a31906e8a0685ce922d1 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Wed, 25 Jun 2025 08:32:52 +0200 Subject: [PATCH 09/41] Added type hints for model._bitfield. --- _bitfield.py: + Moved attribute constants to caterpillar.shared. + Removed getformat(). + Removed type hints from the original file. + Fixed issue where field options defined in BitField were not populated when creating fields. _base.py: + Fixed issue where field options defined in Sequence were not populated when creating fields. + Added private methods to stubs. --- src/caterpillar/model/_base.py | 2 +- src/caterpillar/model/_base.pyi | 13 +++++ src/caterpillar/model/_bitfield.py | 84 +++++++++++++---------------- src/caterpillar/model/_bitfield.pyi | 84 +++++++++++++++++++++++++++++ src/caterpillar/shared.py | 2 + src/caterpillar/shared.pyi | 2 + 6 files changed, 140 insertions(+), 47 deletions(-) mode change 100644 => 100755 src/caterpillar/model/_bitfield.py create mode 100755 src/caterpillar/model/_bitfield.pyi diff --git a/src/caterpillar/model/_base.py b/src/caterpillar/model/_base.py index ff7d2a9f..703fa823 100755 --- a/src/caterpillar/model/_base.py +++ b/src/caterpillar/model/_base.py @@ -286,7 +286,7 @@ def _process_field(self, name: str, annotation, default) -> Field: field.default = default field.order = self.order or field.order field.arch = self.arch or field.arch - field.flags.update(self.field_options) + field.flags.update({hash(x): x for x in self.field_options}) return field def add_field(self, name: str, field: Field, included: bool = False) -> None: diff --git a/src/caterpillar/model/_base.pyi b/src/caterpillar/model/_base.pyi index d82fa00d..a192be91 100755 --- a/src/caterpillar/model/_base.pyi +++ b/src/caterpillar/model/_base.pyi @@ -56,3 +56,16 @@ class Sequence(FieldMixin, Generic[_SeqModelT], _StructLike[_SeqModelT, _SeqMode def __unpack__(self, context: _ContextLike) -> _SeqModelT: ... def __type__(self) -> Type[_SeqModelT]: ... def __pack__(self, obj: _SeqModelT, context: _ContextLike) -> None: ... + def _set_default(self, name: str, value: Any) -> Any: ... + def _included(self, name: str, default: Any | None, annotation: Any) -> bool: ... + def _process_default( + self, name: str, annotation: Any, had_default: bool = ... + ) -> Any: ... + def _replace_type(self, name: str, type_: type) -> None: ... + def _remove_from_model(self, name: str) -> None: ... + def _process_model(self) -> None: ... + def _prepare_fields(self) -> Dict[str, Any]: ... + def _process_annotation( + self, annotation: Any, default: Any, order: ByteOrder, arch: Arch + ) -> _StructLike: ... + def _process_field(self, name: str, annotation: Any, default: Any) -> Field: ... diff --git a/src/caterpillar/model/_bitfield.py b/src/caterpillar/model/_bitfield.py old mode 100644 new mode 100755 index 91c4aace..ac212e24 --- a/src/caterpillar/model/_bitfield.py +++ b/src/caterpillar/model/_bitfield.py @@ -19,7 +19,8 @@ from typing import Self, List from dataclasses import dataclass, field as dcfield -from caterpillar.abc import _StructLike, _ContextLike, _StreamType, typeof +from caterpillar.abc import _StructLike, _ContextLike, _StreamType +from caterpillar.shared import typeof, ATTR_BITS, ATTR_SIGNED from caterpillar.byteorder import ( Arch, ByteOrder, @@ -47,25 +48,16 @@ from ._struct import Struct -BitTuple = Tuple[int, int, type] +BitTuple = tuple -BITS_ATTR = "__bits__" -SIGNED_ATTR = "__signed__" - - -def getbits(obj: Any) -> int: - __bits__ = getattr(obj, BITS_ATTR) +def getbits(obj) -> int: + __bits__ = getattr(obj, ATTR_BITS) return __bits__() if callable(__bits__) else __bits__ -def issigned(obj: Any) -> bool: - return bool(getattr(obj, SIGNED_ATTR, None)) - - -def getformat(obj: Any) -> str: - attr = getattr(obj, "__fmt__") - return attr() if callable(attr) else attr +def issigned(obj) -> bool: + return bool(getattr(obj, ATTR_SIGNED, None)) @dataclass(init=False) @@ -73,9 +65,9 @@ class BitFieldGroup: size: int pos: int fmt: str - fields: Dict[BitTuple, Field] = dcfield(default_factory=dict) + fields: dict = dcfield(default_factory=dict) - def __init__(self, size: int, pos: int, fields: Dict = None) -> None: + def __init__(self, size: int, pos: int, fields=None) -> None: self.size = size self.pos = pos self.fields = fields or {} @@ -104,17 +96,17 @@ class BitField(Struct): def __init__( self, - model: type, - order: Optional[ByteOrder] = None, - arch: Optional[Arch] = None, - options: Iterable[Flag] = None, - field_options: Iterable[Flag] = None, + model, + order=None, + arch=None, + options=None, + field_options=None, ) -> None: - self.groups: List[BitFieldGroup] = [] + self.groups = [] # These fields remain private and will be deleted after processing - self._bit_pos: int = 0 - self._abs_bit_pos: int = 0 - self._current_group: BitFieldGroup = None + self._bit_pos = 0 + self._abs_bit_pos = 0 + self._current_group = None super().__init__( model=model, @@ -133,7 +125,7 @@ def __init__( del self._abs_bit_pos del self._current_group - def __add__(self, other: "BitField") -> Self: + def __add__(self, other): if not isinstance(other, BitField): raise ValidationError( f"Attempted to add a non-bitfield struct to a bitfield! (type={type(other)})" @@ -141,9 +133,7 @@ def __add__(self, other: "BitField") -> Self: # REVISIT: undefined bahaviour when parsing return super(Struct, self).__add__(other) - def _process_field( - self, name: str, annotation: Any, default: Optional[Any] - ) -> Field: + def _process_field(self, name: str, annotation, default): """ Process a field in the model. @@ -157,11 +147,11 @@ def _process_field( # name : bit_count [ - struct ] [ = default_value ] # or # name : struct [ = default_value ] - struct: _StructLike = None - field: Field = None + struct = None + field = None order = byteorder(annotation, self.order) - group: BitFieldGroup = self._current_group + group = self._current_group arch = self.arch or system_arch width = 0 @@ -232,7 +222,7 @@ def _process_field( field.order = self.order or field.order field.arch = self.arch or field.arch field.bits = field.bits or width - field.flags.update(self.field_options) + field.flags.update({hash(x): x for x in self.field_options}) # Now, we have to check whether a new byte has to be started if group.size - self._bit_pos < width: @@ -257,7 +247,7 @@ def _process_field( self._abs_bit_pos += width return field - def _included(self, name: str, default: Optional[Any], annotation: Any) -> bool: + def _included(self, name: str, default, annotation) -> bool: if not super()._included(name, default, annotation): return False @@ -267,7 +257,7 @@ def _included(self, name: str, default: Optional[Any], annotation: Any) -> bool: return True def group(self, bit_index: int) -> Optional[BitFieldGroup]: - grp: BitFieldGroup = None + grp = None for candidate in self.groups: if bit_index > candidate.pos: break @@ -313,7 +303,7 @@ def unpack_one(self, context: _ContextLike) -> Optional[Any]: return self.model(**init_data) - def pack_one(self, obj: Any, context: _ContextLike) -> None: + def pack_one(self, obj, context: _ContextLike) -> None: # REVISIT: this function is very time consuming. should be do something # about that? stream: _StreamType = context[CTX_STREAM] @@ -351,10 +341,12 @@ def pack_one(self, obj: Any, context: _ContextLike) -> None: def _make_bitfield( cls: type, - options: Iterable[Flag], - order: Optional[ByteOrder] = None, - arch: Optional[Arch] = None, - field_options: Iterable[Flag] = None, + /, + *, + options, + order=None, + arch=None, + field_options=None, ) -> type: _ = BitField( cls, order=order, arch=arch, options=options, field_options=field_options @@ -363,13 +355,13 @@ def _make_bitfield( def bitfield( - cls: type = None, + cls=None, /, *, - options: Iterable[Flag] = None, - order: Optional[ByteOrder] = None, - arch: Optional[Arch] = None, - field_options: Iterable[Flag] = None, + options=None, + order=None, + arch=None, + field_options=None, ): def wrap(cls): return _make_bitfield( diff --git a/src/caterpillar/model/_bitfield.pyi b/src/caterpillar/model/_bitfield.pyi new file mode 100755 index 00000000..831ad5c5 --- /dev/null +++ b/src/caterpillar/model/_bitfield.pyi @@ -0,0 +1,84 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from typing import Any, Callable, Dict, Iterable, Optional, Self, Type, TypeVar, overload + +from caterpillar.abc import _ContextLike +from caterpillar.byteorder import Arch, ByteOrder +from caterpillar.options import Flag +from caterpillar.fields._base import Field +from caterpillar.model._struct import Struct + +_ModelT = TypeVar("_ModelT") + +BitTuple = tuple[int, int, type] +BITS_ATTR: str +SIGNED_ATTR: str + +def getbits(obj: Any) -> int: ... +def issigned(obj: Any) -> bool: ... + +class BitFieldGroup: + size: int + pos: int + fmt: str + fields: dict[BitTuple, Field] = ... + def __init__( + self, + size: int, + pos: int, + fields: Optional[Dict[BitTuple, Field]] = None, + ) -> None: ... + +class BitField(Struct[_ModelT]): + groups: list[BitFieldGroup] + __bits__: int + __fmt__: str + + def __init__( + self, + model: Type[_ModelT], + options: Optional[Iterable[Flag]] = None, + order: Optional[ByteOrder] = None, + arch: Optional[Arch] = None, + field_options: Optional[Flag] = None, + ) -> None: ... + def __add__(self, other: BitField) -> Self: ... + def __size__(self, context: _ContextLike) -> int: ... + def group(self, bit_index: int) -> BitFieldGroup | None: ... + def unpack_one(self, context: _ContextLike) -> _ModelT: ... + def pack_one(self, obj: _ModelT, context: _ContextLike) -> None: ... + +@overload +def bitfield( + cls: None = None, + /, + *, + options: Iterable[Flag] | None = None, + order: ByteOrder | None = None, + arch: Arch | None = None, + field_options: Iterable[Flag] | None = None, +) -> Callable[[Type[_ModelT]], Type[_ModelT]]: ... + +@overload +def bitfield( + cls: Type[_ModelT], + /, + *, + options: Iterable[Flag] | None = None, + order: ByteOrder | None = None, + arch: Arch | None = None, + field_options: Iterable[Flag] | None = None, +) -> Type[_ModelT]:... + diff --git a/src/caterpillar/shared.py b/src/caterpillar/shared.py index 32fa5ff2..aacde0c0 100644 --- a/src/caterpillar/shared.py +++ b/src/caterpillar/shared.py @@ -32,6 +32,8 @@ ATTR_STRUCT = "__struct__" ATTR_BYTEORDER = "__byteorder__" ATTR_TYPE = "__type__" +ATTR_BITS = "__bits__" +ATTR_SIGNED = "__signed__" # TODO: add to reference # NEW CONCEPT: Actions diff --git a/src/caterpillar/shared.pyi b/src/caterpillar/shared.pyi index 03ffbdc2..74f6af2d 100755 --- a/src/caterpillar/shared.pyi +++ b/src/caterpillar/shared.pyi @@ -20,6 +20,8 @@ MODE_UNPACK: int = ... ATTR_STRUCT: str = ... ATTR_TYPE: str = ... ATTR_BYTEORDER: str = ... +ATTR_BITS: str = ... +ATTR_SIGNED: str = ... ATTR_ACTION_PACK: str = ... ATTR_ACTION_UNPACK: str = ... From af088ae9650049a8a5c4fd2b10a7f18f5c0d5b0a Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Wed, 25 Jun 2025 08:47:49 +0200 Subject: [PATCH 10/41] Added model._template type stubs --- _template.py: + Moved template attribute constant to shared.py + removed all type hints + Updated imports in _bitfield, _struct and _base --- src/caterpillar/model/_bitfield.py | 25 +++++-------- src/caterpillar/model/_struct.py | 11 ++---- src/caterpillar/model/_template.py | 56 +++++++++++------------------ src/caterpillar/model/_template.pyi | 53 +++++++++++++++++++++++++++ src/caterpillar/shared.py | 3 +- src/caterpillar/shared.pyi | 1 + 6 files changed, 88 insertions(+), 61 deletions(-) mode change 100644 => 100755 src/caterpillar/model/_template.py create mode 100755 src/caterpillar/model/_template.pyi diff --git a/src/caterpillar/model/_bitfield.py b/src/caterpillar/model/_bitfield.py index ac212e24..517f3424 100755 --- a/src/caterpillar/model/_bitfield.py +++ b/src/caterpillar/model/_bitfield.py @@ -14,22 +14,15 @@ # along with this program. If not, see . import struct as libstruct -from typing import Optional, Any, Dict -from typing import Iterable, Tuple -from typing import Self, List from dataclasses import dataclass, field as dcfield -from caterpillar.abc import _StructLike, _ContextLike, _StreamType +from caterpillar.abc import _StructLike from caterpillar.shared import typeof, ATTR_BITS, ATTR_SIGNED from caterpillar.byteorder import ( - Arch, - ByteOrder, byteorder, system_arch, - LittleEndian, ) from caterpillar.options import ( - Flag, GLOBAL_BITFIELD_FLAGS, GLOBAL_STRUCT_OPTIONS, GLOBAL_UNION_OPTIONS, @@ -44,7 +37,7 @@ Pass, ) from caterpillar.exception import ValidationError, DelegationError -from caterpillar.context import Context, CTX_PATH, CTX_OBJECT, CTX_STREAM +from caterpillar.context import Context, CTX_OBJECT, CTX_STREAM from ._struct import Struct @@ -83,8 +76,6 @@ def __init__(self, size: int, pos: int, fields=None) -> None: class BitField(Struct): - groups: List[BitFieldGroup] - __slots__ = ( "groups", "_bit_pos", @@ -256,7 +247,7 @@ def _included(self, name: str, default, annotation) -> bool: return False return True - def group(self, bit_index: int) -> Optional[BitFieldGroup]: + def group(self, bit_index: int): grp = None for candidate in self.groups: if bit_index > candidate.pos: @@ -264,14 +255,14 @@ def group(self, bit_index: int) -> Optional[BitFieldGroup]: grp = candidate return grp - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: # The size of a bitfield is alsways static return self.__bits__ // 8 - def unpack_one(self, context: _ContextLike) -> Optional[Any]: + def unpack_one(self, context): # At first, we define the object context where the parsed values # will be stored - init_data: Dict[str, Any] = Context() + init_data = Context() context[CTX_OBJECT] = Context(_parent=context) values = libstruct.unpack( f"{self.order.ch}{self.__fmt__}", @@ -303,10 +294,10 @@ def unpack_one(self, context: _ContextLike) -> Optional[Any]: return self.model(**init_data) - def pack_one(self, obj, context: _ContextLike) -> None: + def pack_one(self, obj, context) -> None: # REVISIT: this function is very time consuming. should be do something # about that? - stream: _StreamType = context[CTX_STREAM] + stream = context[CTX_STREAM] values = [] for group in self.groups: # The same applies here, but we convert all values to int instead of reading diff --git a/src/caterpillar/model/_struct.py b/src/caterpillar/model/_struct.py index 3f6b6ffa..d00c620f 100755 --- a/src/caterpillar/model/_struct.py +++ b/src/caterpillar/model/_struct.py @@ -17,23 +17,18 @@ from tempfile import TemporaryFile from io import BytesIO, IOBase -from typing import Optional, Union, Callable -from typing import Dict, Any, Iterable from collections import OrderedDict from shutil import copyfileobj from caterpillar.shared import getstruct, hasstruct, ATTR_STRUCT -from caterpillar.abc import _StructLike, _StreamType, _SupportsUnpack, _SupportsPack -from caterpillar.abc import _ContainsStruct, _ContextLike, _SupportsSize +from caterpillar.abc import _SupportsUnpack, _SupportsSize from caterpillar.context import Context, CTX_STREAM -from caterpillar.byteorder import ByteOrder, Arch from caterpillar.exception import InvalidValueError from caterpillar.options import ( S_EVAL_ANNOTATIONS, S_UNION, S_ADD_BYTES, S_SLOTS, - Flag, GLOBAL_STRUCT_OPTIONS, GLOBAL_UNION_OPTIONS, ) @@ -455,7 +450,7 @@ def pack_into( :raises TypeError: If no `struct` is specified and cannot be inferred from the object. """ - offsets: Dict[int, memoryview] = OrderedDict() + offsets = OrderedDict() context = Context( _parent=None, _path="", _pos=0, _offsets=offsets, mode=MODE_PACK, **kwds ) @@ -601,7 +596,7 @@ def unpack_file( return unpack(struct, fp, as_field=as_field, **kwds) -def sizeof(obj: Union[_StructLike, _ContainsStruct, _SupportsSize], **kwds) -> int: +def sizeof(obj, **kwds) -> int: context = Context(_parent=None, _path="", **kwds) struct_ = obj if hasstruct(struct_): diff --git a/src/caterpillar/model/_template.py b/src/caterpillar/model/_template.py old mode 100644 new mode 100755 index 9d818a3b..9de8c45b --- a/src/caterpillar/model/_template.py +++ b/src/caterpillar/model/_template.py @@ -13,25 +13,16 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from __future__ import annotations - import sys import inspect import types import dataclasses -from typing import Dict, Any -from typing import Union, Self -from typing import Optional -from typing import Callable, TypeVar - -from caterpillar.byteorder import ByteOrder, Arch -from caterpillar.abc import _GreedyType, _PrefixedType -from caterpillar.abc import _ContextLambda, _Switch -from caterpillar.abc import _StructLike +from caterpillar.abc import _GreedyType from caterpillar.fields import Field, INVALID_DEFAULT from caterpillar.model import Struct from caterpillar.options import S_UNION +from caterpillar.shared import ATTR_TEMPLATE class TemplateTypeVar: @@ -55,7 +46,7 @@ class TemplateTypeVar: name: str """The bound name of this type variable""" - field_kwds: Dict[str, Any] + field_kwds: dict """Arguments that will be passed to the created field instance.""" def __init__(self, name: str, **field_kwds) -> None: @@ -73,31 +64,29 @@ def __repr__(self) -> str: return f"~{self.name}[{count}]" # Now we have to implement all special operators defined in FieldMixin - def __getitem__( - self, amount: Union[int, _GreedyType, _PrefixedType, _ContextLambda] - ) -> TemplateTypeVar: + def __getitem__(self, amount): return TemplateTypeVar(self.name, amount=amount, **self.field_kwds) - def __rshift__(self, switch: Union[dict, _Switch]) -> Self: + def __rshift__(self, switch): return TemplateTypeVar(self.name, options=switch, **self.field_kwds) - def __matmul__(self, offset: Union[int, _ContextLambda]) -> Self: + def __matmul__(self, offset): return TemplateTypeVar(self.name, offset=offset, **self.field_kwds) - def __set_byteorder__(self, order: ByteOrder) -> Self: + def __set_byteorder__(self, order): return TemplateTypeVar(self.name, order=order, **self.field_kwds) - def __rsub__(self, bits: Union[int, _ContextLambda]) -> Self: + def __rsub__(self, bits): return TemplateTypeVar(self.name, bits=bits, **self.field_kwds) # @scheduled_for_removal - def __floordiv__(self, condition: Union[_ContextLambda, bool]) -> Self: + def __floordiv__(self, condition): return TemplateTypeVar(self.name, condition=condition, **self.field_kwds) def to_field( self, - struct: Union[_StructLike, _ContextLambda], - arch: Optional[Arch] = None, + struct, + arch, default=INVALID_DEFAULT, ) -> Field: # REVISIT: what about flags? @@ -122,13 +111,10 @@ def get_caller_module(frame: int = 1) -> str: raise ModuleNotFoundError("Could not load module from caller!") from e -TEMPLATE_ATTR = "__template__" - - @dataclasses.dataclass class TemplateInfo: - required_tys: Dict[str, _StructLike] - positional_tys: Dict[str, _StructLike] + required_tys: dict + positional_tys: dict def is_defined(self, name: str) -> bool: return name in list(self.required_tys) + list(self.positional_tys) @@ -144,12 +130,12 @@ def add_positional(self, name: str, default=None) -> None: self.positional_tys[name] = default -def istemplate(obj: Any) -> bool: +def istemplate(obj) -> bool: """Return true if the object is a template.""" - return hasattr(obj, TEMPLATE_ATTR) + return hasattr(obj, ATTR_TEMPLATE) -def template(*args: Union[str, TemplateTypeVar], **kwargs) -> Callable[[type], type]: +def template(*args, **kwargs): """ Defines required template type variables if necessary and prepares template class definition. @@ -197,13 +183,13 @@ def create_template_class(cls) -> type: for name in disposable: # Only temporary template vars will be removed delattr(module, name) - setattr(cls, TEMPLATE_ATTR, info) + setattr(cls, ATTR_TEMPLATE, info) return cls return create_template_class -def get_mangled_name(model_ty: type, annotations: Dict[str, Any]) -> str: +def get_mangled_name(model_ty: type, annotations: dict) -> str: ty_name = model_ty.__name__ parts = [] for name, value in annotations.items(): @@ -214,7 +200,7 @@ def get_mangled_name(model_ty: type, annotations: Dict[str, Any]) -> str: def derive( - template_ty: type, *tys_args, partial=False, name=None, union=False, **tys_kwargs + template_ty, *tys_args, partial=False, name=None, union=False, **tys_kwargs ) -> type: """Creates a new struct class based on the given template class. @@ -237,7 +223,7 @@ def derive( if not istemplate(template_ty): raise TypeError(f"{template_ty.__name__} is not a template class!") - info: TemplateInfo = getattr(template_ty, TEMPLATE_ATTR) + info: TemplateInfo = getattr(template_ty, ATTR_TEMPLATE) if len(tys_args) > len(info.required_tys): raise ValueError( f"Expected max. {len(info.required_tys)} positional arguments - got {len(tys_args)}!" @@ -321,5 +307,5 @@ def derive( new_info.required_tys[name] = replacement elif name in info.positional_tys: new_info.positional_tys[name] = replacement - setattr(new_ty, TEMPLATE_ATTR, new_info) + setattr(new_ty, ATTR_TEMPLATE, new_info) return new_ty diff --git a/src/caterpillar/model/_template.pyi b/src/caterpillar/model/_template.pyi new file mode 100755 index 00000000..e5ef5de3 --- /dev/null +++ b/src/caterpillar/model/_template.pyi @@ -0,0 +1,53 @@ +from typing import Any, Callable, Protocol, Type, TypeVar + +from caterpillar.abc import ( + _LengthT, + _StructLike, + _ContextLambda, + _Switch, +) +from caterpillar.byteorder import ByteOrder, Arch +from caterpillar.fields._base import Field + +_TemplateModelT = TypeVar("_TemplateModelT") + +class _ContainsTemplate(Protocol): + __template__: TemplateInfo + +class TemplateTypeVar: + name: str + field_kwds: dict[str, Any] + def __init__(self, name: str, **field_kwds) -> None: ... + def __getitem__(self, amount: _LengthT) -> TemplateTypeVar: ... + def __rshift__(self, switch: dict | _Switch) -> TemplateTypeVar: ... + def __matmul__(self, offset: int | _ContextLambda) -> TemplateTypeVar: ... + def __set_byteorder__(self, order: ByteOrder) -> TemplateTypeVar: ... + def __rsub__(self, bits: int | _ContextLambda) -> TemplateTypeVar: ... + def __floordiv__(self, condition: _ContextLambda | bool) -> TemplateTypeVar: ... + def to_field( + self, + struct: _StructLike | _ContextLambda, + arch: Arch | None = None, + default=..., + ) -> Field: ... + +class TemplateInfo: + required_tys: dict[str, _StructLike] + positional_tys: dict[str, _StructLike] + def is_defined(self, name: str) -> bool: ... + def add_required(self, name: str) -> None: ... + def add_positional(self, name: str, default: Any = None) -> None: ... + +def istemplate(obj: Any) -> bool: ... +def template( + *args: str | TemplateTypeVar, **kwargs +) -> Callable[[Type[_TemplateModelT]], Type[_TemplateModelT]]: ... +def get_mangled_name(model_ty: type, annotations: dict[str, Any]) -> str: ... +def derive( + template_ty: Type[_ContainsTemplate], + *tys_args, + partial: bool = False, + name: str | None = None, + union: bool = False, + **tys_kwargs, +) -> type: ... diff --git a/src/caterpillar/shared.py b/src/caterpillar/shared.py index aacde0c0..4a2c78e6 100644 --- a/src/caterpillar/shared.py +++ b/src/caterpillar/shared.py @@ -12,7 +12,7 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from typing import Any, Callable +from typing import Any from caterpillar.abc import _ContextLambda # --- Shared Concepts --- @@ -34,6 +34,7 @@ ATTR_TYPE = "__type__" ATTR_BITS = "__bits__" ATTR_SIGNED = "__signed__" +ATTR_TEMPLATE = "__template__" # TODO: add to reference # NEW CONCEPT: Actions diff --git a/src/caterpillar/shared.pyi b/src/caterpillar/shared.pyi index 74f6af2d..c871645d 100755 --- a/src/caterpillar/shared.pyi +++ b/src/caterpillar/shared.pyi @@ -22,6 +22,7 @@ ATTR_TYPE: str = ... ATTR_BYTEORDER: str = ... ATTR_BITS: str = ... ATTR_SIGNED: str = ... +ATTR_TEMPLATE: str = ... ATTR_ACTION_PACK: str = ... ATTR_ACTION_UNPACK: str = ... From 5707d0aec670bffc5193d4b74730a5280511e770 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Wed, 25 Jun 2025 09:26:42 +0200 Subject: [PATCH 11/41] Added stubs file for fields._base --- _base.py: + Removed all type hints --- src/caterpillar/fields/_base.py | 237 +++++++++++++---------------- src/caterpillar/fields/_base.pyi | 101 ++++++++++++ src/caterpillar/model/__init__.py | 2 +- src/caterpillar/model/__init__.pyi | 29 ++++ 4 files changed, 241 insertions(+), 128 deletions(-) mode change 100644 => 100755 src/caterpillar/fields/_base.py create mode 100755 src/caterpillar/fields/_base.pyi create mode 100755 src/caterpillar/model/__init__.pyi diff --git a/src/caterpillar/fields/_base.py b/src/caterpillar/fields/_base.py old mode 100644 new mode 100755 index 6398516f..31ebd409 --- a/src/caterpillar/fields/_base.py +++ b/src/caterpillar/fields/_base.py @@ -12,21 +12,14 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from typing import Self, Union, Set, Any, Dict, Optional, List +from typing import Union, Any, List from io import BytesIO from caterpillar.abc import ( _StructLike, - _ContextLambda, - _Switch, - _StreamType, - _ContextLike, _GreedyType, _PrefixedType, - hasstruct, - getstruct, - typeof, ) -from caterpillar.byteorder import ByteOrder, SysNative, Arch, system_arch +from caterpillar.byteorder import ByteOrder, SysNative, system_arch from caterpillar.exception import ( DynamicSizeError, StructException, @@ -41,10 +34,9 @@ F_SEQUENTIAL, Flag, ) -from caterpillar.context import CTX_OFFSETS, CTX_STREAM -from caterpillar.context import CTX_FIELD -from caterpillar.context import CTX_VALUE, CTX_SEQ +from caterpillar.context import CTX_OFFSETS, CTX_STREAM, CTX_FIELD, CTX_VALUE, CTX_SEQ from caterpillar import registry +from caterpillar.shared import hasstruct, getstruct, typeof def singleton(cls): @@ -57,80 +49,79 @@ def singleton(cls): DEFAULT_OPTION = object() -# @dataclass(init=False) class Field: """Represents a field in a data structure.""" - struct: Union[_StructLike, _ContextLambda] - """ - Stores a reference to the actual parsing struct that will be used to parse or - build our data. This attribute is never null. - """ - - order: ByteOrder - """ - An automatically inferred or explicitly specified byte order. Note that this - attribute may have no impact on the underlying struct. The default byte order - is ``SysNative``. - """ - - offset: Union[_ContextLambda, int] - """ - Using the ``@`` operator an offset can be assigned to a field. If set, the - stream will be reset and set to the original position. - - The minus one indicates that no offset has been associated with this field. - """ - - flags: Dict[int, Flag] - """ - Additional options that can be enabled using the logical OR operator ``|``. - - Note that there are default options that will be set automatically: - - * ``keep_position``: - Persists the streams position after parsing data using the underlying - struct. In relation to ``offset``, this option will reset the stream to - its original position if deactivated. - * ``dynamic``: - Specifies that this field does not store a constant size. - * ``sequential``: - An automatic flag that indicates this field stores a sequential struct. - """ - - amount: Union[_ContextLambda, int, _GreedyType, _PrefixedType] - """ - A constant or dynamic value to represent the amount of structs. Zero indicates - there are no sequence types associated with this field. - """ - - options: Union[_Switch, Dict[Any, _StructLike], None] - """ - An extra attribute that stores additional options that can be translates as a - switch statement. - """ - - condition: Union[_ContextLambda, bool] - """ - Given optional execution this attribute should be used to return a boolean value - that decides whether the value of this field should be set. Using ``//`` the - condition can be set during class declaration. - """ - - arch: Arch - """ - The field's architecture (inferred or explicitly specified). - """ - - default: Optional[Any] - """ - The configured default value. - """ - - bits: Union[_ContextLambda, int, None] - """ - The configured bits. - """ + # struct + # """ + # Stores a reference to the actual parsing struct that will be used to parse or + # build our data. This attribute is never null. + # """ + + # order: ByteOrder + # """ + # An automatically inferred or explicitly specified byte order. Note that this + # attribute may have no impact on the underlying struct. The default byte order + # is ``SysNative``. + # """ + + # offset + # """ + # Using the ``@`` operator an offset can be assigned to a field. If set, the + # stream will be reset and set to the original position. + + # The minus one indicates that no offset has been associated with this field. + # """ + + # flags: Dict[int, Flag] + # """ + # Additional options that can be enabled using the logical OR operator ``|``. + + # Note that there are default options that will be set automatically: + + # * ``keep_position``: + # Persists the streams position after parsing data using the underlying + # struct. In relation to ``offset``, this option will reset the stream to + # its original position if deactivated. + # * ``dynamic``: + # Specifies that this field does not store a constant size. + # * ``sequential``: + # An automatic flag that indicates this field stores a sequential struct. + # """ + + # amount: Union[_ContextLambda, int, _GreedyType, _PrefixedType] + # """ + # A constant or dynamic value to represent the amount of structs. Zero indicates + # there are no sequence types associated with this field. + # """ + + # options + # """ + # An extra attribute that stores additional options that can be translates as a + # switch statement. + # """ + + # condition + # """ + # Given optional execution this attribute should be used to return a boolean value + # that decides whether the value of this field should be set. Using ``//`` the + # condition can be set during class declaration. + # """ + + # arch + # """ + # The field's architecture (inferred or explicitly specified). + # """ + + # default + # """ + # The configured default value. + # """ + + # bits + # """ + # The configured bits. + # """ __slots__ = ( "struct", @@ -148,18 +139,17 @@ class Field: def __init__( self, - struct: Union[_StructLike, _ContextLambda], - order: ByteOrder | None = None, - offset: Union[_ContextLambda, int] = -1, - flags: Set[Flag] = None, - amount: Union[_ContextLambda, int, _PrefixedType] = 0, - options: Union[_Switch, Dict[Any, _StructLike], None] = None, - condition: Union[_ContextLambda, bool] = True, - arch: Arch = None, - default: Optional[Any] = INVALID_DEFAULT, - bits: Union[_ContextLambda, int, None] = None, + struct, + order=None, + offset=-1, + flags=None, + amount=0, + options=None, + condition=True, + arch=None, + default=INVALID_DEFAULT, + bits=None, ) -> None: - # NOTE: we use a custom init method to automatically set flags self.struct = struct self.order = order or SysNative self.flags = {hash(x): x for x in flags or set([F_KEEP_POSITION])} @@ -178,9 +168,7 @@ def __init__( # that None is still usable as default self.default = default - def _verify_context_value( - self, value: Union[_ContextLambda, Any], expected: type - ) -> None: + def _verify_context_value(self, value, expected) -> None: # As the offset value or amount may be dynamic, we have to candidate # types. There should be an error if none applies. if not isinstance(value, expected) and not callable(value): @@ -188,18 +176,18 @@ def _verify_context_value( f"Expected a valid value or context lambda, got {type(value)}" ) - def __or__(self, flag: Flag) -> Self: # add flags + def __or__(self, flag: Flag): # add flags if not isinstance(flag, Flag): raise TypeError(f"Expected a flag, got {type(flag)}") self.flags[hash(flag)] = flag return self - def __xor__(self, flag: Flag) -> Self: # remove flags: + def __xor__(self, flag: Flag): # remove flags: self.flags.pop(hash(flag), None) return self - def __matmul__(self, offset: Union[_ContextLambda, int]) -> Self: + def __matmul__(self, offset): self._verify_context_value(offset, int) self.offset = offset # This operation automatically removes the "keep_position" @@ -208,7 +196,7 @@ def __matmul__(self, offset: Union[_ContextLambda, int]) -> Self: self.flags.pop(F_KEEP_POSITION._hash_, None) return self - def __getitem__(self, dim: Union[_ContextLambda, int, _GreedyType]) -> Self: + def __getitem__(self, dim): self._verify_context_value(dim, (_GreedyType, int, _PrefixedType)) self.amount = dim if self.amount != 0: @@ -216,28 +204,28 @@ def __getitem__(self, dim: Union[_ContextLambda, int, _GreedyType]) -> Self: self.flags[F_SEQUENTIAL._hash_] = F_SEQUENTIAL return self - def __rshift__(self, switch: Union[_Switch, dict]) -> Self: + def __rshift__(self, switch): if not isinstance(switch, dict) and not callable(switch): raise TypeError(f"Expected a valid switch context, got {type(switch)}") self.options = switch return self - def __floordiv__(self, condition: Union[_ContextLambda, bool]) -> Self: + def __floordiv__(self, condition): self._verify_context_value(condition, bool) self.condition = condition return self - def __rsub__(self, bits: Union[_ContextLambda, int]) -> Self: + def __rsub__(self, bits): self._verify_context_value(bits, int) self.bits = bits return self - def __set_byteorder__(self, order: ByteOrder) -> Self: + def __set_byteorder__(self, order: ByteOrder): self.order = order return self - def __type__(self) -> type: + def __type__(self): return self.get_type() __ixor__ = __xor__ @@ -256,7 +244,7 @@ def is_seq(self) -> bool: # pylint: disable-next=protected-access return F_SEQUENTIAL._hash_ in self.flags - def is_enabled(self, context: _ContextLike) -> bool: + def is_enabled(self, context) -> bool: """Evaluates the condition of this field. :param context: the context on which to operate @@ -281,7 +269,7 @@ def has_flag(self, flag: Flag) -> bool: # pylint: disable-next=protected-access return flag._hash_ in self.flags or flag in GLOBAL_FIELD_FLAGS - def length(self, context: _ContextLike) -> Union[int, _GreedyType, _PrefixedType]: + def length(self, context): """Calculates the sequence length of this field. :param context: the context on which to operate @@ -298,11 +286,11 @@ def length(self, context: _ContextLike) -> Union[int, _GreedyType, _PrefixedType except Exception as exc: raise DynamicSizeError("Dynamic sized field!", context) from exc - def get_struct(self, value: Any, context: _ContextLike) -> _StructLike: + def get_struct(self, value, context): """Returns the struct from stored options. :param value: the unpacked or packed value - :type value: Any + :type value :param context: the current context :type context: _ContextLike :return: the struct that packs or unpacks the data @@ -327,11 +315,11 @@ def get_struct(self, value: Any, context: _ContextLike) -> _StructLike: return getstruct(struct) return struct - def get_offset(self, context: _ContextLike) -> int: + def get_offset(self, context) -> int: """Returns the offset position of this field""" return self.offset(context) if callable(self.offset) else self.offset - def get_type(self) -> type: + def get_type(self): """Returns the annotation type for this field :return: the annotation type @@ -348,24 +336,21 @@ def get_type(self) -> type: types = [typeof(s) for s in self.options.values()] return Union[*types, Any] - def get_name(self) -> Optional[str]: + def get_name(self): return getattr(self, "__name__", None) # IO related stuff - def __unpack__(self, context: _ContextLike) -> Optional[Any]: + def __unpack__(self, context): """Reads packed data from the given stream. This method returns nothing if this field is disabled and applies switch if additional options are configured. - :param stream: the data stream - :type stream: _StreamType :param context: the current context :type context: _ContextLike :return: the parsed data - :rtype: Optional[Any] """ - stream: _StreamType = context[CTX_STREAM] + stream = context[CTX_STREAM] if self.condition is not True and not self.is_enabled(context): # Disabled fields or context lambdas won't pack any data return @@ -413,7 +398,7 @@ def __unpack__(self, context: _ContextLike) -> Optional[Any]: return value - def __pack__(self, obj: Any, context: _ContextLike) -> None: + def __pack__(self, obj, context) -> None: """Writes the given object to the provided stream. There are several options associated with this function. First, disabled @@ -425,16 +410,14 @@ def __pack__(self, obj: Any, context: _ContextLike) -> None: flag ``KEEP_POSITION`` is not found. :param obj: the value to write - :type obj: Any - :param stream: the output stream - :type stream: _StreamType + :type obj :param context: the current context with a qualified path :type context: _ContextLike :raises TypeError: if the value is not iterable but this field is marked to be sequential """ # TODO: revisit code - stream: _StreamType = context[CTX_STREAM] + stream = context[CTX_STREAM] if self.condition is not True and not self.is_enabled(context): # Disabled fields or context lambdas won't pack any data return @@ -481,7 +464,7 @@ def __pack__(self, obj: Any, context: _ContextLike) -> None: context._root[CTX_OFFSETS][offset] = stream.getbuffer() context[CTX_STREAM] = base_stream - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: """Calculates the size of this field. There are several situations to bear in mind when executing this function: @@ -548,7 +531,7 @@ def __repr__(self) -> str: # --- private type converter --- @registry.TypeConverter(_StructLike) -def _type_converter(annotation: _StructLike, kwargs: dict) -> Field: +def _type_converter(annotation, kwargs): # REVISIT: more options ? arch = kwargs.pop("arch", None) order = kwargs.pop("order", None) @@ -559,11 +542,11 @@ def _type_converter(annotation: _StructLike, kwargs: dict) -> Field: class _CallableTypeConverter(registry.TypeConverter): - def matches(self, annotation: Any) -> bool: + def matches(self, annotation) -> bool: # must be a callable but not a type return callable(annotation) and not isinstance(annotation, type) - def convert(self, annotation: Any, kwargs: dict) -> _StructLike: + def convert(self, annotation, kwargs): arch = kwargs.pop("arch", None) order = kwargs.pop("order", None) # callables are treates as context lambdas diff --git a/src/caterpillar/fields/_base.pyi b/src/caterpillar/fields/_base.pyi new file mode 100755 index 00000000..ea78de10 --- /dev/null +++ b/src/caterpillar/fields/_base.pyi @@ -0,0 +1,101 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from caterpillar import registry +from caterpillar.abc import ( + _ContextLambda, + _ContextLike, + _GreedyType, + _PrefixedType, + _StructLike, + _Switch, + _IT, + _OT, + _LengthT, + _StructT, +) +from caterpillar.byteorder import ( + Arch, + ByteOrder, +) +from caterpillar.options import ( + Flag, +) +from typing import Any, Optional, Self, Type, TypeVar + +_T = TypeVar("_T") + +def singleton(cls: Type[_T]) -> _T: ... + +INVALID_DEFAULT: object = ... +DEFAULT_OPTION: object = ... + +class Field(_StructLike[_IT, _OT]): + struct: _StructT[_IT, _OT] + order: ByteOrder + offset: _ContextLambda | int + flags: dict[int, Flag] + amount: _LengthT + options: Optional[_Switch[_IT, _OT]] + condition: _ContextLambda | bool + arch: Arch + default: _OT | None + bits: _ContextLambda | int | None + def __init__( + self, + struct: _StructT[_IT, _OT], + order: ByteOrder | None = None, + offset: _ContextLambda | int = -1, + flags: Optional[set[Flag]] = None, + amount: _ContextLambda | int | _PrefixedType = 0, + options: _Switch | dict[Any, _StructLike] | None = None, + condition: _ContextLambda | bool = True, + arch: Optional[Arch] = None, + default: _OT | None = ..., + bits: _ContextLambda | int | None = None, + ) -> None: ... + def __or__(self, flag: Flag) -> Self: ... + def __xor__(self, flag: Flag) -> Self: ... + def __matmul__(self, offset: _ContextLambda | int) -> Self: ... + def __getitem__(self, dim: _LengthT) -> Self: ... + def __rshift__(self, switch: _Switch[_IT, _OT]) -> Self: ... + def __floordiv__(self, condition: _ContextLambda | bool) -> Self: ... + def __rsub__(self, bits: _ContextLambda | int) -> Self: ... + def __set_byteorder__(self, order: ByteOrder) -> Self: ... + def __type__(self) -> type: ... + def __unpack__(self, context: _ContextLike) -> _OT: ... + def __pack__(self, obj: _IT, context: _ContextLike) -> None: ... + def __size__(self, context: _ContextLike) -> int: ... + __ixor__ = __xor__ + __ior__ = __or__ + __ifloordiv__ = __floordiv__ + __irshift__ = __rshift__ + __imatmul__ = __matmul__ + __isub__ = __rsub__ + def _verify_context_value(self, value: Any, expected: type) -> None: ... + def is_seq(self) -> bool: ... + def is_enabled(self, context: _ContextLike) -> bool: ... + def has_condition(self) -> bool: ... + def has_flag(self, flag: Flag) -> bool: ... + def length(self, context: _ContextLike) -> int | _GreedyType | _PrefixedType: ... + def get_struct( + self, value: Any, context: _ContextLike + ) -> _StructLike[_IT, _OT]: ... + def get_offset(self, context: _ContextLike) -> int: ... + def get_type(self) -> type: ... + def get_name(self) -> str | None: ... + +class _CallableTypeConverter(registry.TypeConverter): + def matches(self, annotation: Any) -> bool: ... + def convert(self, annotation: Any, kwargs: dict) -> Field: ... diff --git a/src/caterpillar/model/__init__.py b/src/caterpillar/model/__init__.py index 88f58eef..890d40ed 100644 --- a/src/caterpillar/model/__init__.py +++ b/src/caterpillar/model/__init__.py @@ -25,5 +25,5 @@ pack_file, sizeof, ) -from ._bitfield import BitField, bitfield, BitFieldGroup +from ._bitfield import BitField, bitfield, BitFieldGroup, issigned, getbits from ._template import istemplate, template, TemplateTypeVar, derive diff --git a/src/caterpillar/model/__init__.pyi b/src/caterpillar/model/__init__.pyi new file mode 100755 index 00000000..890d40ed --- /dev/null +++ b/src/caterpillar/model/__init__.pyi @@ -0,0 +1,29 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from ._base import Sequence +from ._struct import ( + Struct, + struct, + UnionHook, + union, + unpack, + unpack_file, + pack, + pack_into, + pack_file, + sizeof, +) +from ._bitfield import BitField, bitfield, BitFieldGroup, issigned, getbits +from ._template import istemplate, template, TemplateTypeVar, derive From bab081ee790bac6a972f2edf8a9ff791e908b4bc Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Wed, 25 Jun 2025 09:41:35 +0200 Subject: [PATCH 12/41] Stubs file for fields._mixin --- src/caterpillar/fields/_mixin.py | 77 ++++++++++++-------------- src/caterpillar/fields/_mixin.pyi | 92 +++++++++++++++++++++++++++++++ 2 files changed, 127 insertions(+), 42 deletions(-) mode change 100644 => 100755 src/caterpillar/fields/_mixin.py create mode 100755 src/caterpillar/fields/_mixin.pyi diff --git a/src/caterpillar/fields/_mixin.py b/src/caterpillar/fields/_mixin.py old mode 100644 new mode 100755 index 016d77dd..679e667b --- a/src/caterpillar/fields/_mixin.py +++ b/src/caterpillar/fields/_mixin.py @@ -13,21 +13,14 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . from io import BytesIO -from types import EllipsisType -from typing import Any, Collection, List, Union, Iterable, Callable +from typing import Iterable from functools import partial -from caterpillar.abc import ( - _ContextLike, - _StructLike, - _ContextLambda, - _Switch, - getstruct, -) from caterpillar.byteorder import ByteOrder, byteorder from caterpillar.options import Flag from caterpillar.context import CTX_SEQ, CTX_STREAM from caterpillar._common import unpack_seq, pack_seq, WithoutContextVar +from caterpillar.shared import getstruct from ._base import Field @@ -35,39 +28,39 @@ class FieldMixin: """A simple mixin to support operators used to create :class:`Field` instances.""" - def __or__(self, flag: Flag) -> Field: + def __or__(self, flag: Flag): """Creates a field *with* the given flag.""" return Field(self, byteorder(self)) | flag - def __xor__(self, flag: Flag) -> Field: + def __xor__(self, flag: Flag): """Creates a field *without* the given flag.""" return Field(self, byteorder(self)) ^ flag - def __matmul__(self, offset: Union[_ContextLambda, int]) -> Field: + def __matmul__(self, offset): """Creates a field that should start at the given offset.""" return Field(self, byteorder(self)) @ offset - def __getitem__(self, dim: Union[_ContextLambda, int, EllipsisType]) -> Field: + def __getitem__(self, dim): """Returns a sequenced field.""" return Field(self, byteorder(self))[dim] - def __rshift__(self, switch: Union[_Switch, dict]) -> Field: + def __rshift__(self, switch): """Inserts switch options into the new field""" return Field(self, byteorder(self)) >> switch - def __floordiv__(self, condition: Union[_ContextLambda, bool]) -> Field: + def __floordiv__(self, condition): """Returns a field with the given condition""" return Field(self, byteorder(self)) // condition - def __set_byteorder__(self, order: ByteOrder) -> Field: + def __set_byteorder__(self, order: ByteOrder): """Returns a field with the given byteorder""" return Field(self, order=order) - def __rsub__(self, bits: Union[_ContextLambda, int]) -> Field: + def __rsub__(self, bits): """Returns a field with the given bit count""" return Field(self, byteorder(self), bits=bits) - def __and__(self, other: _StructLike) -> "Chain": + def __and__(self, other): """Returns a chain with the next element added at the end""" if isinstance(other, Chain): return other & self @@ -93,7 +86,7 @@ class FieldStruct(FieldMixin): "__bits__": "TBD", } - def pack_single(self, obj: Any, context: _ContextLike) -> None: + def pack_single(self, obj, context) -> None: """ Abstract method to pack a single element. @@ -105,7 +98,7 @@ def pack_single(self, obj: Any, context: _ContextLike) -> None: """ raise NotImplementedError - def unpack_single(self, context: _ContextLike) -> Any: + def unpack_single(self, context): """ Abstract method to unpack a single element. @@ -116,7 +109,7 @@ def unpack_single(self, context: _ContextLike) -> Any: """ raise NotImplementedError - def pack_seq(self, seq: Collection, context: _ContextLike) -> None: + def pack_seq(self, seq, context) -> None: """ Pack a sequence of elements using the provided context. @@ -127,7 +120,7 @@ def pack_seq(self, seq: Collection, context: _ContextLike) -> None: """ pack_seq(seq, context, self.pack_single) - def unpack_seq(self, context: _ContextLike) -> List[Any]: + def unpack_seq(self, context): """ Unpack a sequence of elements using the provided context. @@ -137,7 +130,7 @@ def unpack_seq(self, context: _ContextLike) -> List[Any]: """ return unpack_seq(context, self.unpack_single) - def __pack__(self, obj: Any, context: _ContextLike) -> None: + def __pack__(self, obj, context) -> None: """ Pack data based on whether the field is sequential or not. @@ -148,7 +141,7 @@ def __pack__(self, obj: Any, context: _ContextLike) -> None: """ (self.pack_single if not context[CTX_SEQ] else self.pack_seq)(obj, context) - def __unpack__(self, context: _ContextLike) -> Any: + def __unpack__(self, context): """ Unpack data based on whether the field is sequential or not. @@ -186,15 +179,15 @@ class Chain(FieldStruct): __slots__ = ("_elements",) - def __init__(self, initial: _StructLike, *structs: _StructLike) -> None: + def __init__(self, initial, *structs) -> None: # start -> next -> next -> next -> done | unpack # Y # done <- previous <- previous <- start | pack self._elements = [getstruct(initial, initial)] - self._elements += list(map(lambda x: getstruct(x, x), structs)) + self._elements += [x for x in map(lambda x: getstruct(x, x), structs) if x] @property - def head(self) -> _StructLike: + def head(self): """ Get the head of the chain, i.e., the first structure. @@ -204,7 +197,7 @@ def head(self) -> _StructLike: return self._elements[0] @property - def tail(self) -> _StructLike: + def tail(self): """ Get the tail of the chain, i.e., the last structure. @@ -214,7 +207,7 @@ def tail(self) -> _StructLike: return self._elements[-1] - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: """ Calculate the size of the chain in bytes. @@ -235,7 +228,7 @@ def __type__(self) -> type: return self.tail.__type__() - def __and__(self, other: _StructLike) -> "Chain": + def __and__(self, other): """ Concatenate another structure to the end of the chain. @@ -247,7 +240,7 @@ def __and__(self, other: _StructLike) -> "Chain": self._elements.append(getstruct(other, other)) return self - def __rand__(self, other: _StructLike) -> "Chain": + def __rand__(self, other): """ Concatenate another structure to the beginning of the chain. @@ -258,7 +251,7 @@ def __rand__(self, other: _StructLike) -> "Chain": """ return self.__and__(other) - def unpack_single(self, context: _ContextLike) -> memoryview: + def unpack_single(self, context): """ Unpack a single data instance from the chain. @@ -278,7 +271,7 @@ def unpack_single(self, context: _ContextLike) -> memoryview: return data - def pack_single(self, obj: Any, context: _ContextLike) -> None: + def pack_single(self, obj, context) -> None: """ Pack a single data instance into the chain. @@ -311,10 +304,10 @@ class Operator: .. code-block:: python - from caterpillar.fields import uint16, _infix_ + from caterpillar.fields import uint16, Operator from caterpillar.model import struct - M = _infix_(lambda a, b: a[b*2]) + M = Operator(lambda a, b: a[b*2]) @struct class Format: @@ -325,7 +318,7 @@ class Format: .. code-block:: python - @_infix_ + @Operator def M(a, b): return a[b*2] @@ -333,21 +326,21 @@ def M(a, b): :type func: Callable[[Any, Any], _StructLike] """ - def __init__(self, func: Callable[[Any, Any], _StructLike]) -> None: + def __init__(self, func) -> None: self.func = func - def __truediv__(self, arg2) -> _StructLike: + def __truediv__(self, arg2): return self.func(arg2) - def __rtruediv__(self, arg1) -> "_infix_": + def __rtruediv__(self, arg1): return Operator(partial(self.func, arg1)) - def __call__(self, arg1, arg2) -> _StructLike: + def __call__(self, arg1, arg2): return self.func(arg1, arg2) # utility methods -def get_args(args: Any, context: _ContextLike) -> List[Any]: +def get_args(args, context): """ Get arguments for an instance. @@ -365,7 +358,7 @@ def get_args(args: Any, context: _ContextLike) -> List[Any]: return args -def get_kwargs(kwargs: dict, context: _ContextLike) -> dict: +def get_kwargs(kwargs: dict, context) -> dict: """ Process a dictionary of keyword arguments, replacing callable values with their results. diff --git a/src/caterpillar/fields/_mixin.pyi b/src/caterpillar/fields/_mixin.pyi new file mode 100755 index 00000000..f79342b5 --- /dev/null +++ b/src/caterpillar/fields/_mixin.pyi @@ -0,0 +1,92 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from ._base import Field +from caterpillar.abc import ( + _ContextLambda, + _ContextLike, + _StructLike, + _Switch, + _LengthT, + _IT, + _OT, +) +from caterpillar.byteorder import ByteOrder +from caterpillar.options import Flag +from typing import ( + Any, + Callable, + Collection, + Dict, + Generic, + List, + Type, + TypeVar, + Union, + overload, +) + +_NextOT = TypeVar("_NextOT") + +class FieldMixin(Generic[_IT, _OT]): + def __or__(self, flag: Flag) -> Field[_IT, _OT]: ... + def __xor__(self, flag: Flag) -> Field[_IT, _OT]: ... + def __matmul__(self, offset: _ContextLambda | int) -> Field[_IT, _OT]: ... + def __getitem__(self, dim: _LengthT) -> Field[Collection[_IT], Collection[_OT]]: ... + def __rshift__(self, switch: _Switch) -> Field[_IT, _OT]: ... + def __floordiv__(self, condition: _ContextLambda | bool) -> Field[_IT, _OT]: ... + def __set_byteorder__(self, order: ByteOrder) -> Field[_IT, _OT]: ... + def __rsub__(self, bits: _ContextLambda | int) -> Field[_IT, _OT]: ... + def __and__(self, other: _StructLike) -> Chain: ... + +class FieldStruct(FieldMixin[_IT, _OT], _StructLike[_IT, _OT]): + def pack_single(self, obj: _IT, context: _ContextLike) -> None: ... + def unpack_single(self, context: _ContextLike) -> _OT: ... + def pack_seq(self, seq: Collection[_IT], context: _ContextLike) -> None: ... + def unpack_seq(self, context: _ContextLike) -> Collection[_OT]: ... + def __pack__(self, obj: _IT, context: _ContextLike) -> None: ... + def __unpack__(self, context: _ContextLike) -> _OT: ... + +class Chain(FieldStruct[_IT, _OT]): + @overload + def __init__( + self, initial: _StructLike[_IT, Any], *structs: _StructLike + ) -> None: ... + @overload + def __init__(self, initial: _StructLike[_IT, _OT], *structs: None) -> None: ... + @property + def head(self) -> _StructLike[_IT, Any]: ... + @property + def tail(self) -> _StructLike[Any, _OT]: ... + def __size__(self, context: _ContextLike) -> int: ... + def __type__(self) -> Type[_OT]: ... + def __and__(self, other: _StructLike[_OT, _NextOT]) -> Chain[_IT, _NextOT]: ... + def __rand__(self, other: _StructLike[_OT, _NextOT]) -> Chain[_IT, _NextOT]: ... + def unpack_single(self, context: _ContextLike) -> _OT: ... + def pack_single(self, obj: _IT, context: _ContextLike) -> None: ... + +class Operator: + func: Callable[[Any, Any], _StructLike] + def __init__(self, func: Callable[[Any, Any], _StructLike]) -> None: ... + def __truediv__(self, arg2) -> _StructLike: ... + def __rtruediv__(self, arg1) -> Operator: ... + def __call__(self, arg1, arg2) -> _StructLike: ... + +def get_args( + args: Union[Any, _ContextLambda, List[Union[Any, _ContextLambda]]], + context: _ContextLike, +) -> list[Any]: ... +def get_kwargs( + kwargs: Dict[Any, Union[Any, _ContextLambda]], context: _ContextLike +) -> dict: ... From d89a1e0ef16baf4b9461f1ac7aebb4abf7b88231 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Wed, 25 Jun 2025 10:11:52 +0200 Subject: [PATCH 13/41] Added stubs for field.common --- fields.common.py: + Transformer: removed __fmt__ method fields.compression.py: + Updated public compression methods to use lazy imports --- src/caterpillar/fields/common.py | 181 +++++++++-------- src/caterpillar/fields/common.pyi | 257 +++++++++++++++++++++++++ src/caterpillar/fields/compression.py | 144 +++++++------- src/caterpillar/fields/compression.pyi | 71 +++++++ src/caterpillar/fields/conditional.py | 19 +- src/caterpillar/fields/conditional.pyi | 36 ++++ 6 files changed, 527 insertions(+), 181 deletions(-) mode change 100644 => 100755 src/caterpillar/fields/common.py create mode 100755 src/caterpillar/fields/common.pyi mode change 100644 => 100755 src/caterpillar/fields/compression.py create mode 100755 src/caterpillar/fields/compression.pyi mode change 100644 => 100755 src/caterpillar/fields/conditional.py create mode 100755 src/caterpillar/fields/conditional.pyi diff --git a/src/caterpillar/fields/common.py b/src/caterpillar/fields/common.py old mode 100644 new mode 100755 index ee70a13d..a405844a --- a/src/caterpillar/fields/common.py +++ b/src/caterpillar/fields/common.py @@ -25,8 +25,6 @@ from caterpillar.abc import ( _StructLike, _ContextLambda, - getstruct, - _EnumLike, _StreamType, _ContextLike, ) @@ -40,6 +38,8 @@ from caterpillar.byteorder import LittleEndian from caterpillar import registry from caterpillar._common import WithoutContextVar +from caterpillar.shared import getstruct +from caterpillar.model import getbits from ._base import Field, INVALID_DEFAULT, singleton from ._mixin import FieldStruct @@ -107,7 +107,7 @@ def __type__(self) -> type: """ return self.ty - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: """ Calculate the size of the field in bytes. @@ -119,7 +119,7 @@ def __size__(self, context: _ContextLike) -> int: """ return self.__bits__ // 8 - def pack_single(self, obj: Any, context: _ContextLike) -> None: + def pack_single(self, obj, context) -> None: """ Pack a single value into the stream using the defined format character. @@ -140,7 +140,7 @@ def pack_single(self, obj: Any, context: _ContextLike) -> None: data = PyStruct.pack(fmt, obj) context[CTX_STREAM].write(data) - def pack_seq(self, seq: Sequence, context: _ContextLike) -> None: + def pack_seq(self, seq, context) -> None: """ Pack a sequence of values into the stream. @@ -154,7 +154,7 @@ def pack_seq(self, seq: Sequence, context: _ContextLike) -> None: else: super().pack_seq(seq, context) - def unpack_single(self, context: _ContextLike) -> Any: + def unpack_single(self, context): """ Unpack a single value from the stream. @@ -169,7 +169,7 @@ def unpack_single(self, context: _ContextLike) -> Any: ) return value[0] if value else None - def unpack_seq(self, context: _ContextLike) -> List[Any]: + def unpack_seq(self, context): """ Unpack a sequence of values from the stream. @@ -191,7 +191,7 @@ def unpack_seq(self, context: _ContextLike) -> List[Any]: size = (self.__bits__ // 8) * length return list(PyStruct.unpack(fmt, context[CTX_STREAM].read(size))) - def get_length(self, context: _ContextLike) -> int: + def get_length(self, context) -> int: """ Get the length of the field, which may be dynamically determined based on the context. @@ -239,8 +239,6 @@ def is_padding(self) -> bool: void_ptr = PyStructFormattedField("P", int) -_ConstType = Union[str, bytes, Any] - class Transformer(FieldStruct): """ @@ -249,15 +247,11 @@ class Transformer(FieldStruct): __slots__ = ("struct",) - def __init__(self, struct: _StructLike) -> None: + def __init__(self, struct) -> None: self.struct = struct self.__bits__ = getattr(self.struct, "__bits__", None) - # TODO: document this - def __fmt__(self) -> str: - return self.struct.__fmt__() - - def __type__(self) -> type: + def __type__(self): """ Get the type of the data encoded/decoded by the transformer. @@ -265,7 +259,7 @@ def __type__(self) -> type: """ return self.struct.__type__() - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: """ Get the size of the data encoded/decoded by the transformer. @@ -274,7 +268,7 @@ def __size__(self, context: _ContextLike) -> int: """ return self.struct.__size__(context) - def encode(self, obj: Any, context: _ContextLike) -> Any: + def encode(self, obj, context): """ Encode data using the wrapped _StructLike object. @@ -284,7 +278,7 @@ def encode(self, obj: Any, context: _ContextLike) -> Any: """ return obj - def decode(self, parsed: Any, context: _ContextLike) -> Any: + def decode(self, parsed, context): """ Decode data using the wrapped _StructLike object. @@ -294,7 +288,7 @@ def decode(self, parsed: Any, context: _ContextLike) -> Any: """ return parsed - def pack_single(self, obj: Any, context: _ContextLike) -> None: + def pack_single(self, obj, context) -> None: """ Pack a single value into the stream using encoding. @@ -304,7 +298,7 @@ def pack_single(self, obj: Any, context: _ContextLike) -> None: value = self.encode(obj, context) self.struct.__pack__(value, context) - def unpack_single(self, context: _ContextLike) -> Any: + def unpack_single(self, context): """ Unpack a single value from the stream and decode it. @@ -342,11 +336,11 @@ class Const(Transformer): __slots__ = ("value",) - def __init__(self, value: _ConstType, struct: _StructLike) -> None: + def __init__(self, value, struct) -> None: super().__init__(struct) self.value = value - def encode(self, obj: Any, context: _ContextLike) -> Any: + def encode(self, obj, context): """ Encode data using the constant value. This method will always return the constant value, regardless of the input. Therefore, :code:`None` @@ -364,7 +358,7 @@ def encode(self, obj: Any, context: _ContextLike) -> Any: """ return self.value - def decode(self, parsed: Any, context: _ContextLike) -> Any: + def decode(self, parsed, context): """ Decode data and ensure it matches the constant value. If the parsed value doesn't match, a `ValidationError` is raised. @@ -422,15 +416,15 @@ class Enum(Transformer): def __init__( self, - model: type, - struct: _StructLike, - default: Optional[_EnumLike | Any] = INVALID_DEFAULT, + model, + struct, + default=INVALID_DEFAULT, ) -> None: super().__init__(struct) self.model = model self.default = default - def __type__(self) -> type: + def __type__(self): """ Determine the type for this transformation, which is either the enum type or a union of the enum and struct types, depending on the global field flags. @@ -443,7 +437,7 @@ def __type__(self) -> type: return Union[self.model, self.struct.__type__()] - def encode(self, obj: Any, context: _ContextLike) -> Any: + def encode(self, obj, context): """ Encode an enumeration value into its corresponding encoded representation. @@ -464,7 +458,7 @@ def encode(self, obj: Any, context: _ContextLike) -> Any: return obj.value - def decode(self, parsed: Any, context: _ContextLike) -> Any: + def decode(self, parsed, context): """ Decode an encoded value (typically an integer) back to its corresponding enumeration value. @@ -505,10 +499,10 @@ def decode(self, parsed: Any, context: _ContextLike) -> Any: class _EnumTypeConverter(registry.TypeConverter): - def matches(self, annotation: Any) -> bool: + def matches(self, annotation) -> bool: return isinstance(annotation, type) and issubclass(annotation, _EnumType) - def convert(self, annotation: Any, kwargs: dict) -> _StructLike: + def convert(self, annotation, kwargs: dict): struct_obj = getstruct(annotation) if not struct_obj: raise ValidationError( @@ -560,7 +554,7 @@ class Memory(FieldStruct): def __init__( self, - length: Union[int, _ContextLambda, EllipsisType], + length, ) -> None: self.length = length @@ -572,7 +566,7 @@ def __type__(self) -> type: """ return memoryview - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: """ Calculate the size of the memory field based on the `length` parameter. @@ -584,7 +578,7 @@ def __size__(self, context: _ContextLike) -> int: """ return self.length(context) if callable(self.length) else self.length - def pack_single(self, obj: Union[memoryview, bytes], context: _ContextLike) -> None: + def pack_single(self, obj, context) -> None: """ Pack a single byte object (memoryview or bytes) into the stream. @@ -619,7 +613,7 @@ def pack_single(self, obj: Union[memoryview, bytes], context: _ContextLike) -> N ) context[CTX_STREAM].write(obj) - def unpack_single(self, context: _ContextLike) -> Any: + def unpack_single(self, context): """ Unpack a single byte object (memoryview) from the stream. @@ -654,7 +648,7 @@ def __type__(self) -> type: """ return bytes - def unpack_single(self, context: _ContextLike) -> Any: + def unpack_single(self, context): """ Unpack a single byte sequence (bytes) from the stream. @@ -684,8 +678,8 @@ class String(Memory): def __init__( self, - length: Union[int, _ContextLambda, EllipsisType], - encoding: Optional[str] = None, + length, + encoding=None, ) -> None: super().__init__(length) self.encoding = encoding or "utf-8" @@ -698,11 +692,11 @@ def __type__(self) -> type: """ return str - def pack_single(self, obj: str, context: _ContextLike) -> None: + def pack_single(self, obj: str, context) -> None: """Packs a single string into the stream.""" return super().pack_single(obj.encode(self.encoding), context) - def unpack_single(self, context: _ContextLike) -> Any: + def unpack_single(self, context): """ Unpack a single string from the stream. @@ -749,9 +743,9 @@ class CString(FieldStruct): def __init__( self, - length: Union[int, _ContextLambda, None, EllipsisType] = None, - encoding: Optional[str] = None, - pad: Union[str, int, None] = None, + length=None, + encoding=None, + pad=None, ) -> None: """ Initialize the String field with a fixed length or a length determined by a context lambda. @@ -784,7 +778,7 @@ def __class_getitem__(cls, dim) -> Field: """ return CString(...)[dim] - def __size__(self, context: _ContextLike) -> Any: + def __size__(self, context): """ Returns the size of the `CString` field. @@ -801,7 +795,7 @@ def __type__(self) -> type: """ return str - def pack_single(self, obj: str, context: _ContextLike) -> None: + def pack_single(self, obj: str, context) -> None: """ Pack a single string into the stream with padding. @@ -828,7 +822,7 @@ def pack_single(self, obj: str, context: _ContextLike) -> None: stream.write(encoded) stream.write(self._raw_pad) - def unpack_single(self, context: _ContextLike) -> Any: + def unpack_single(self, context): """ Unpack a single C-style string from the stream. @@ -894,7 +888,7 @@ class ConstString(Const): __slots__ = () - def __init__(self, value: str, encoding: Optional[str] = None) -> None: + def __init__(self, value: str, encoding=None) -> None: if not isinstance(value, str): raise TypeError("value must be a string") @@ -972,11 +966,11 @@ class Computed(FieldStruct): __slots__ = ("value",) - def __init__(self, value: Union[_ConstType, _ContextLambda]) -> None: + def __init__(self, value) -> None: self.value = value self.__bits__ = 0 - def __type__(self) -> type: + def __type__(self): """ Return the type of the computed field. @@ -984,7 +978,7 @@ def __type__(self) -> type: """ return Any if callable(self.value) else type(self.value) - def __pack__(self, obj: Any, context: _ContextLike) -> None: + def __pack__(self, obj, context) -> None: """ No packing is needed for computed fields, as the value is computed dynamically. @@ -996,7 +990,7 @@ def __pack__(self, obj: Any, context: _ContextLike) -> None: """ pass - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: """ Return the size of the computed field. @@ -1007,7 +1001,7 @@ def __size__(self, context: _ContextLike) -> int: """ return 0 - def __unpack__(self, context: _ContextLike) -> Any: + def __unpack__(self, context): """ Unpack the computed value based on the context. @@ -1019,7 +1013,7 @@ def __unpack__(self, context: _ContextLike) -> Any: """ return self.value(context) if callable(self.value) else self.value - def pack_single(self, obj: Any, context: _ContextLike) -> None: + def pack_single(self, obj, context) -> None: """ No packing is needed for computed fields. @@ -1029,7 +1023,7 @@ def pack_single(self, obj: Any, context: _ContextLike) -> None: # No need for an implementation pass - def unpack_single(self, context: _ContextLike) -> None: + def unpack_single(self, context) -> None: """ No unpacking is needed for computed fields. @@ -1089,20 +1083,20 @@ def __type__(self) -> type: """ return None.__class__ - def __pack__(self, obj: Any, context: _ContextLike) -> None: + def __pack__(self, obj, context) -> None: pass - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: return 0 - def __unpack__(self, context: _ContextLike) -> Any: + def __unpack__(self, context): return None - def pack_single(self, obj: Any, context: _ContextLike) -> None: + def pack_single(self, obj, context) -> None: # No need for an implementation pass - def unpack_single(self, context: _ContextLike) -> None: + def unpack_single(self, context) -> None: # No need for an implementation pass @@ -1143,9 +1137,9 @@ class Prefixed(FieldStruct): def __init__( self, - prefix: _StructLike, - struct: Optional[_StructLike] = None, - encoding: Optional[str] = None, + prefix, + struct=None, + encoding=None, ): self.prefix = prefix self.struct = struct @@ -1158,7 +1152,7 @@ def __init__( ) self.encoding, self.struct = struct, None - def __type__(self) -> Optional[Union[type, str]]: + def __type__(self): """ Return the type associated with this Prefixed field. @@ -1169,7 +1163,7 @@ def __type__(self) -> Optional[Union[type, str]]: """ return bytes if self.struct is None else self.struct.__type__() - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: """ Prefixed fields do not have a fixed size. @@ -1180,7 +1174,7 @@ def __size__(self, context: _ContextLike) -> int: """ raise DynamicSizeError("Prefixed does not store a size", context) - def pack_single(self, obj: Any, context: _ContextLike) -> None: + def pack_single(self, obj, context) -> None: """ Pack a single object into the stream, with the prefix indicating the size. @@ -1189,7 +1183,6 @@ def pack_single(self, obj: Any, context: _ContextLike) -> None: :param obj: The object to pack (should be a byte sequence). :param context: The current context. """ - # REVISIT: We can only provide a value with __len__ here if self.struct is not None: data = BytesIO() with WithoutContextVar(context, CTX_STREAM, data): @@ -1203,7 +1196,7 @@ def pack_single(self, obj: Any, context: _ContextLike) -> None: self.prefix.__pack__(len(obj), context) context[CTX_STREAM].write(obj) - def unpack_single(self, context: _ContextLike) -> Any: + def unpack_single(self, context): """ Unpack a single object from the stream, using the prefix to determine the size. @@ -1244,7 +1237,7 @@ class Int(FieldStruct): __slots__ = ("signed", "size") - def __init__(self, bits: int, signed: bool = True) -> None: + def __init__(self, bits: int, signed=True) -> None: self.signed = signed self.__bits__ = bits if not isinstance(bits, int): @@ -1259,7 +1252,7 @@ def __type__(self) -> type: """ return int - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: """ Return the size of the integer in bytes. @@ -1268,7 +1261,7 @@ def __size__(self, context: _ContextLike) -> int: """ return self.size - def pack_single(self, obj: int, context: _ContextLike) -> None: + def pack_single(self, obj: int, context) -> None: """ Pack a single integer value into the stream. @@ -1286,7 +1279,7 @@ def pack_single(self, obj: int, context: _ContextLike) -> None: obj.to_bytes(self.size, byteorder, signed=self.signed) ) - def unpack_single(self, context: _ContextLike) -> int: + def unpack_single(self, context) -> int: """ Unpack a single integer value from the stream. @@ -1360,11 +1353,11 @@ class Aligned(FieldStruct): def __init__( self, - struct: _StructLike, - alignment: Union[int, _ContextLambda], - after: bool = False, - before: bool = False, - filler: Union[int, str, None] = None, + struct, + alignment, + after=False, + before=False, + filler=None, ) -> None: if not before and not after: raise ValueError("Must specify either before or after") @@ -1384,7 +1377,7 @@ def __init__( if not isinstance(self._filler, int): raise ValueError(f"Filler must be a single byte - got {filler!r}") - def __type__(self) -> Optional[Union[type, str]]: + def __type__(self): """ Return the type associated with this aligned field. @@ -1392,7 +1385,7 @@ def __type__(self) -> Optional[Union[type, str]]: """ return self.struct.__type__() - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: """ Calculate the size of the aligned field, accounting for padding based on the alignment. @@ -1408,7 +1401,7 @@ def __size__(self, context: _ContextLike) -> int: struct_size = self.struct.__size__(context) return struct_size + (self.alignment - (struct_size % self.alignment)) - def unpack_alignment(self, context: _ContextLike): + def unpack_alignment(self, context): """ Unpack padding for the alignment, verifying that the correct amount of padding is present. @@ -1431,7 +1424,7 @@ def unpack_alignment(self, context: _ContextLike): f"Expected {size} bytes of padding (value={self._filler!r}), got {data.count(self._filler)}" ) - def unpack_single(self, context: _ContextLike) -> Any: + def unpack_single(self, context): """ Unpack a single aligned field from the stream. @@ -1448,7 +1441,7 @@ def unpack_single(self, context: _ContextLike) -> Any: self.unpack_alignment(context) return obj - def pack_alignment(self, context: _ContextLike): + def pack_alignment(self, context): """ Apply padding for the alignment before or after the structure, depending on the `before` and `after` settings. @@ -1460,7 +1453,7 @@ def pack_alignment(self, context: _ContextLike): size = value - (stream.tell() % value) stream.write(bytes([self._filler] * size)) - def pack_single(self, obj: Any, context: _ContextLike) -> None: + def pack_single(self, obj, context) -> None: """ Pack a single aligned field into the stream, applying padding if necessary. @@ -1474,7 +1467,7 @@ def pack_single(self, obj: Any, context: _ContextLike) -> None: self.pack_alignment(context) -def align(alignment: Union[int, _ContextLambda]) -> _ContextLambda: +def align(alignment): """ Create a context lambda to calculate the alignment padding required at the current stream position. @@ -1499,7 +1492,7 @@ def align(alignment: Union[int, _ContextLambda]) -> _ContextLambda: :return: A context lambda function that returns the number of bytes to align the next structure. """ - def _get_aligned_size(context: _ContextLike) -> Any: + def _get_aligned_size(context: _ContextLike): pos = context[CTX_STREAM].tell() value = alignment(context) if callable(alignment) else alignment return value - (pos % value) @@ -1536,7 +1529,7 @@ class Lazy(FieldStruct): when the field is accessed. """ - def __init__(self, struct: Callable[[], _StructLike]) -> None: + def __init__(self, struct) -> None: if not callable(struct): raise TypeError(f"struct must be a callable - got {struct!r}") @@ -1554,7 +1547,7 @@ def struct(self) -> _StructLike: """ return self.struct_fn() - def __bits__(self) -> str: + def __bits__(self): """ Get the bit representation of the Lazy struct by delegating to the underlying struct. @@ -1563,7 +1556,7 @@ def __bits__(self) -> str: """ return self.struct.__bits__() - def __type__(self) -> type: + def __type__(self): """ Get the type associated with the Lazy struct by delegating to the underlying struct. @@ -1572,7 +1565,7 @@ def __type__(self) -> type: """ return self.struct.__type__() - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: """ Get the size of the Lazy struct by delegating to the underlying struct. @@ -1582,7 +1575,7 @@ def __size__(self, context: _ContextLike) -> int: """ return self.struct.__size__(context) - def pack_single(self, obj: Any, context: _ContextLike) -> None: + def pack_single(self, obj, context) -> None: """ Pack a single value using the Lazy struct by delegating to the underlying struct. @@ -1591,7 +1584,7 @@ def pack_single(self, obj: Any, context: _ContextLike) -> None: """ self.struct.__pack__(obj, context) - def unpack_single(self, context: _ContextLike) -> Any: + def unpack_single(self, context): """ Unpack a single value using the Lazy struct by delegating to the underlying struct. @@ -1633,7 +1626,7 @@ def __type__(self) -> type: """ return UUID - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: """ Get the size of the UUID field. @@ -1652,7 +1645,7 @@ def __bits__(self) -> int: """ return 128 - def __pack__(self, obj: UUID, context: _ContextLike) -> None: + def __pack__(self, obj: UUID, context) -> None: """ Pack a UUID object into the stream. @@ -1665,7 +1658,7 @@ def __pack__(self, obj: UUID, context: _ContextLike) -> None: is_le = context[CTX_FIELD].order is LittleEndian super().__pack__(obj.bytes_le if is_le else obj.bytes, context) - def __unpack__(self, context: _ContextLike) -> UUID: + def __unpack__(self, context) -> UUID: """ Unpack a UUID from the stream. diff --git a/src/caterpillar/fields/common.pyi b/src/caterpillar/fields/common.pyi new file mode 100755 index 00000000..bc15a075 --- /dev/null +++ b/src/caterpillar/fields/common.pyi @@ -0,0 +1,257 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from ._base import Field +from ._mixin import FieldStruct +from _typeshed import SupportsLenAndGetItem + +from caterpillar import registry +from caterpillar.abc import ( + _ContextLambda, + _ContextLike, + _LengthT, + _StructLike, + _IT, + _OT, +) +from caterpillar.options import Flag +from functools import cached_property +from types import EllipsisType, NoneType +from typing import ( + Any, + Callable, + Collection, + Generic, + List, + Optional, + Type, + TypeVar, + Union, +) +from uuid import UUID + +ENUM_STRICT: Flag[NoneType] = ... + +class PyStructFormattedField(FieldStruct[_IT, _IT]): + text: str + ty: Type[_IT] + __bits__: int + def __init__(self, ch: str, type_: Type[_IT]) -> None: ... + def __type__(self) -> type: ... + def __size__(self, context: _ContextLike) -> int: ... + def pack_single(self, obj: _IT, context: _ContextLike) -> None: ... + def pack_seq(self, seq: Collection[_IT], context: _ContextLike) -> None: ... + def unpack_single(self, context: _ContextLike) -> _IT: ... + def unpack_seq(self, context: _ContextLike) -> Collection[_IT]: ... + def get_length(self, context: _ContextLike) -> int: ... + def is_padding(self) -> bool: ... + +padding: PyStructFormattedField[NoneType] = ... +char: PyStructFormattedField[str] = ... +boolean: PyStructFormattedField[bool] = ... +int8: PyStructFormattedField[int] = ... +uint8: PyStructFormattedField[int] = ... +int16: PyStructFormattedField[int] = ... +uint16: PyStructFormattedField[int] = ... +int32: PyStructFormattedField[int] = ... +uint32: PyStructFormattedField[int] = ... +int64: PyStructFormattedField[int] = ... +uint64: PyStructFormattedField[int] = ... +ssize_t: PyStructFormattedField[int] = ... +size_t: PyStructFormattedField[int] = ... +float16: PyStructFormattedField[float] = ... +float32: PyStructFormattedField[float] = ... +float64: PyStructFormattedField[float] = ... +double: PyStructFormattedField[float] = ... +void_ptr: PyStructFormattedField[int] = ... + +_IT_transformed = TypeVar("_IT_transformed") +_OT_transformed = TypeVar("_OT_transformed") + +class Transformer( + Generic[_IT, _IT_transformed, _OT, _OT_transformed], + FieldStruct[_IT, _OT], +): + struct: _StructLike[_IT_transformed, _OT_transformed] + __bits__: Union[Callable[[], int], int] + def __init__( + self, struct: _StructLike[_IT_transformed, _OT_transformed] + ) -> None: ... + def __fmt__(self) -> str: ... + def __type__(self) -> Type[_OT]: ... + def __size__(self, context: _ContextLike) -> int: ... + def encode(self, obj: _IT, context: _ContextLike) -> _IT_transformed: ... + def decode(self, parsed: _OT_transformed, context: _ContextLike) -> _OT: ... + def pack_single(self, obj: _IT, context: _ContextLike) -> None: ... + def unpack_single(self, context: _ContextLike) -> _OT: ... + +class Const(Transformer[_IT, _IT, _IT, _IT]): + value: _IT + def __init__(self, value: _IT, struct: _StructLike[_IT, _IT]) -> None: ... + def encode(self, obj: _IT, context: _ContextLike) -> _IT: ... + def decode(self, parsed: _IT, context: _ContextLike) -> _IT: ... + +_EnumT = TypeVar("_EnumT") + +class Enum(Generic[_EnumT, _IT], Transformer[_EnumT, _IT, _EnumT, _IT]): + model: Type[_EnumT] + default: _EnumT + def __init__( + self, + model: Type[_EnumT], + struct: _StructLike[_IT, _IT], + default: _EnumT | _IT | None = ..., + ) -> None: ... + def __type__(self) -> Union[Type[_EnumT], Type[_IT]]: ... + def encode(self, obj: _EnumT, context: _ContextLike) -> _IT: ... + def decode(self, parsed: _IT, context: _ContextLike) -> _EnumT: ... + +class _EnumTypeConverter(registry.TypeConverter): + def matches(self, annotation: Any) -> bool: ... + def convert(self, annotation: Any, kwargs: dict) -> _StructLike: ... + +_MemoryIT = TypeVar("_MemoryIT", default=Union[memoryview, bytes]) +_MemoryOT = TypeVar("_MemoryOT", default=memoryview) + +class Memory(Generic[_MemoryIT, _MemoryOT], FieldStruct[_MemoryIT, _MemoryOT]): + length: _LengthT + def __init__(self, length: _LengthT) -> None: ... + def __type__(self) -> Type[_MemoryOT]: ... + def __size__(self, context: _ContextLike) -> int: ... + def pack_single(self, obj: _MemoryIT, context: _ContextLike) -> None: ... + def unpack_single(self, context: _ContextLike) -> _MemoryOT: ... + +class Bytes(Memory[bytes, bytes]): + def __type__(self) -> Type[bytes]: ... + def unpack_single(self, context: _ContextLike) -> bytes: ... + +class String(Memory[str, str]): + encoding: str + def __init__(self, length: _LengthT, encoding: str | None = None) -> None: ... + def __type__(self) -> Type[str]: ... + def pack_single(self, obj: str, context: _ContextLike) -> None: ... + def unpack_single(self, context: _ContextLike) -> str: ... + +class CString(FieldStruct[str, str]): + length: _LengthT + encoding: str + pad: int + def __init__( + self, + length: int | _ContextLambda | None | EllipsisType = ..., + encoding: str | None = ..., + pad: str | int | None = ..., + ) -> None: ... + def __class_getitem__(cls, dim: _LengthT) -> Field[List[str], List[str]]: ... + def __size__(self, context: _ContextLike) -> Any: ... + def __type__(self) -> type: ... + def pack_single(self, obj: str, context: _ContextLike) -> None: ... + def unpack_single(self, context: _ContextLike) -> Any: ... + +class ConstString(Const[str]): + def __init__(self, value: str, encoding: str | None = None) -> None: ... + +class ConstBytes(Const[bytes]): + def __init__(self, value: bytes) -> None: ... + +class Computed(Generic[_IT], FieldStruct[NoneType, _IT]): + value: _IT + __bits__: int + def __init__(self, value: _IT | _ContextLambda[_IT]) -> None: ... + def __type__(self) -> Type[_IT]: ... + def __pack__(self, obj: NoneType, context: _ContextLike) -> None: ... + def __size__(self, context: _ContextLike) -> int: ... + def __unpack__(self, context: _ContextLike) -> _IT: ... + def pack_single(self, obj: NoneType, context: _ContextLike) -> None: ... + def unpack_single(self, context: _ContextLike) -> _IT: ... + +class Pass(FieldStruct[None, None]): + def __bits__(self) -> int: ... + def __type__(self) -> Type[None]: ... + def __pack__(self, obj: None, context: _ContextLike) -> None: ... + def __size__(self, context: _ContextLike) -> int: ... + def __unpack__(self, context: _ContextLike) -> Any: ... + def pack_single(self, obj: None, context: _ContextLike) -> None: ... + def unpack_single(self, context: _ContextLike) -> None: ... + +_PrefixIOT = TypeVar("_PrefixIOT", bound=SupportsLenAndGetItem, default=bytes) + +class Prefixed(Generic[_PrefixIOT], FieldStruct[_PrefixIOT, _PrefixIOT]): + prefix: _StructLike[int, int] + struct: _StructLike[_PrefixIOT, _PrefixIOT] + encoding: Optional[str] + def __init__( + self, + prefix: _StructLike[int, int], + struct: _StructLike[_PrefixIOT, _PrefixIOT] | None = None, + encoding: str | None = None, + ) -> None: ... + def __type__(self) -> Type[_PrefixIOT]: ... + def __size__(self, context: _ContextLike) -> int: ... + def pack_single(self, obj: _PrefixIOT, context: _ContextLike) -> None: ... + def unpack_single(self, context: _ContextLike) -> _PrefixIOT: ... + +class Int(FieldStruct[int, int]): + signed: bool + __bits__: int + size: int + def __init__(self, bits: int, signed: bool = True) -> None: ... + def __type__(self) -> Type[int]: ... + def __size__(self, context: _ContextLike) -> int: ... + def pack_single(self, obj: int, context: _ContextLike) -> None: ... + def unpack_single(self, context: _ContextLike) -> int: ... + +class UInt(Int): + def __init__(self, bits: int) -> None: ... + +int24: Int +uint24: UInt + +class Aligned(FieldStruct[_IT, _OT]): + struct: _StructLike[_IT, _OT] + alignment: int | _ContextLambda[int] + def __init__( + self, + struct: _StructLike[_IT, _OT], + alignment: int | _ContextLambda[int], + after: bool = False, + before: bool = False, + filler: int | str | None = None, + ) -> None: ... + def __type__(self) -> Type[_OT]: ... + def __size__(self, context: _ContextLike) -> int: ... + def unpack_alignment(self, context: _ContextLike): ... + def unpack_single(self, context: _ContextLike) -> _OT: ... + def pack_alignment(self, context: _ContextLike): ... + def pack_single(self, obj: _IT, context: _ContextLike) -> None: ... + +def align(alignment: int | _ContextLambda[int]) -> _ContextLambda[int]: ... + +class Lazy(FieldStruct[_IT, _OT]): + struct_fn: Callable[[], _StructLike[_IT, _OT]] + def __init__(self, struct: Callable[[], _StructLike[_IT, _OT]]) -> None: ... + @cached_property + def struct(self) -> _StructLike[_IT, _OT]: ... + def __bits__(self) -> int: ... + def __type__(self) -> Type[_OT]: ... + def __size__(self, context: _ContextLike) -> int: ... + def pack_single(self, obj: _IT, context: _ContextLike) -> None: ... + def unpack_single(self, context: _ContextLike) -> _OT: ... + +class Uuid(FieldStruct[UUID, UUID]): + def __type__(self) -> Type[UUID]: ... + def __size__(self, context: _ContextLike) -> int: ... + def __bits__(self) -> int: ... + def __pack__(self, obj: UUID, context: _ContextLike) -> None: ... + def __unpack__(self, context: _ContextLike) -> UUID: ... diff --git a/src/caterpillar/fields/compression.py b/src/caterpillar/fields/compression.py old mode 100644 new mode 100755 index a465f115..41bdbbdb --- a/src/caterpillar/fields/compression.py +++ b/src/caterpillar/fields/compression.py @@ -12,18 +12,12 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from abc import abstractmethod -from typing import Protocol, Union, Callable, Optional, runtime_checkable +from typing import Protocol, runtime_checkable -from caterpillar.abc import ( - _ContextLike, - _StructLike, - _ContainsStruct, - getstruct, - hasstruct, -) -from ._mixin import get_kwargs -from .common import Transformer, Bytes +from caterpillar.abc import _StructLike +from caterpillar.shared import getstruct, hasstruct +from caterpillar.fields._mixin import get_kwargs +from caterpillar.fields.common import Transformer, Bytes @runtime_checkable @@ -36,7 +30,6 @@ class _Compressor(Protocol): Any class implementing this protocol must define these methods. """ - @abstractmethod def compress(self, data: bytes, **kwds) -> bytes: """ Compress the provided data. @@ -47,9 +40,8 @@ def compress(self, data: bytes, **kwds) -> bytes: :return: The compressed data. :rtype: bytes """ - pass + return b"" - @abstractmethod def decompress(self, data: bytes, **kwds) -> bytes: """ Decompress the provided data. @@ -60,7 +52,7 @@ def decompress(self, data: bytes, **kwds) -> bytes: :return: The decompressed data. :rtype: bytes """ - pass + return b"" class Compressed(Transformer): @@ -83,10 +75,10 @@ class Compressed(Transformer): def __init__( self, - compressor: _Compressor, - struct: Union[_ContainsStruct, _StructLike], - comp_kwargs: Optional[dict] = None, - decomp_kwargs: Optional[dict] = None, + compressor, + struct, + comp_kwargs=None, + decomp_kwargs=None, ) -> None: if hasstruct(struct): struct = getstruct(struct) @@ -95,7 +87,7 @@ def __init__( self.comp_args = comp_kwargs or {} self.decomp_args = decomp_kwargs or {} - def encode(self, obj: bytes, context: _ContextLike) -> bytes: + def encode(self, obj: bytes, context) -> bytes: """ Compress the input data using the provided compressor. @@ -108,7 +100,7 @@ def encode(self, obj: bytes, context: _ContextLike) -> bytes: """ return self.compressor.compress(obj, **get_kwargs(self.comp_args, context)) - def decode(self, parsed: bytes, context: _ContextLike) -> bytes: + def decode(self, parsed: bytes, context) -> bytes: """ Decompress the input data using the provided compressor. @@ -124,81 +116,77 @@ def decode(self, parsed: bytes, context: _ContextLike) -> bytes: ) -_LengthOrStruct = Union[_ContainsStruct, _StructLike, Callable, int] - - def compressed( - lib: _Compressor, - obj: _LengthOrStruct, - comp_kwargs: Optional[dict] = None, - decomp_kwargs: Optional[dict] = None, + lib, + obj, + comp_kwargs=None, + decomp_kwargs=None, ) -> _StructLike: if callable(obj) or isinstance(obj, int) or obj is ...: obj = Bytes(obj) return Compressed(lib, obj, comp_kwargs, decomp_kwargs) -try: - import zlib +def ZLibCompressed( + obj, + comp_kwargs=None, + decomp_kwargs=None, +): + """ + Create a struct representing zlib compression. + """ + try: + import zlib - def ZLibCompressed( - obj: _LengthOrStruct, - comp_kwargs: Optional[dict] = None, - decomp_kwargs: Optional[dict] = None, - ): - """ - Create a struct representing zlib compression. - """ return compressed(zlib, obj, comp_kwargs, decomp_kwargs) + except ImportError: + raise NotImplementedError("Could not import zlib!") -except ImportError: - ZLibCompressed = None -try: - import bz2 +def Bz2Compressed( + obj, + comp_kwargs=None, + decomp_kwargs=None, +): + """ + Create a struct representing bz2 compression. + """ + try: + import bz2 - def Bz2Compressed( - obj: _LengthOrStruct, - comp_kwargs: Optional[dict] = None, - decomp_kwargs: Optional[dict] = None, - ): - """ - Create a struct representing bz2 compression. - """ return compressed(bz2, obj, comp_kwargs, decomp_kwargs) + except ImportError: + raise NotImplementedError("Could not import bz2!") -except ImportError: - Bz2Compressed = None -try: - import lzma +def LZMACompressed( + obj, + comp_kwargs=None, + decomp_kwargs=None, +): + """ + Create a struct representing lzma compression. + """ + try: + import lzma - def LZMACompressed( - obj: _LengthOrStruct, - comp_kwargs: Optional[dict] = None, - decomp_kwargs: Optional[dict] = None, - ): - """ - Create a struct representing lzma compression. - """ return compressed(lzma, obj, comp_kwargs, decomp_kwargs) + except ImportError: + raise NotImplementedError("Could not import lzma!") -except ImportError: - LZMACompressed = None -try: - # install package manuall with pip install lzallright - import lzallright +def LZOCompressed( + obj, + comp_kwargs=None, + decomp_kwargs=None, +): + """ + Create a struct representing LZO compression. + """ + try: + # install package manuall with pip install lzallright + import lzallright - def LZOCompressed( - obj: _LengthOrStruct, - comp_kwargs: Optional[dict] = None, - decomp_kwargs: Optional[dict] = None, - ): - """ - Create a struct representing LZO compression. - """ return compressed(lzallright.LZOCompressor(), obj, comp_kwargs, decomp_kwargs) - -except ImportError: - LZOCompressed = None + except ImportError: + raise NotImplementedError("Could not import lzallright!") diff --git a/src/caterpillar/fields/compression.pyi b/src/caterpillar/fields/compression.pyi new file mode 100755 index 00000000..e310c7f8 --- /dev/null +++ b/src/caterpillar/fields/compression.pyi @@ -0,0 +1,71 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from .common import Transformer +from caterpillar.abc import ( + _ContainsStruct, + _ContextLike, + _StructLike, + _LengthT, +) +from typing import Protocol, runtime_checkable + +@runtime_checkable +class _Compressor(Protocol): + def compress(self, data: bytes, **kwds) -> bytes: ... + def decompress(self, data: bytes, **kwds) -> bytes: ... + +class Compressed(Transformer[bytes, bytes, bytes, bytes]): + compressor: _Compressor + comp_args: dict + decomp_args: dict + def __init__( + self, + compressor: _Compressor, + struct: _ContainsStruct[bytes, bytes] | _StructLike[bytes, bytes], + comp_kwargs: dict | None = None, + decomp_kwargs: dict | None = None, + ) -> None: ... + def encode(self, obj: bytes, context: _ContextLike) -> bytes: ... + def decode(self, parsed: bytes, context: _ContextLike) -> bytes: ... + +_LengthTorStructT = _LengthT | _ContainsStruct[bytes, bytes] | _StructLike[bytes, bytes] + +def compressed( + lib: _Compressor, + obj: _LengthTorStructT, + comp_kwargs: dict | None = None, + decomp_kwargs: dict | None = None, +) -> _StructLike[bytes, bytes]: ... + +def ZLibCompressed( + obj: _LengthTorStructT, + comp_kwargs: dict | None = None, + decomp_kwargs: dict | None = None, +) -> _StructLike[bytes, bytes]: ... +def Bz2Compressed( + obj: _LengthTorStructT, + comp_kwargs: dict | None = None, + decomp_kwargs: dict | None = None, +) -> _StructLike[bytes, bytes]: ... +def LZMACompressed( + obj: _LengthTorStructT, + comp_kwargs: dict | None = None, + decomp_kwargs: dict | None = None, +) -> _StructLike[bytes, bytes]: ... +def LZOCompressed( + obj: _LengthTorStructT, + comp_kwargs: dict | None = None, + decomp_kwargs: dict | None = None, +) -> _StructLike[bytes, bytes]: ... diff --git a/src/caterpillar/fields/conditional.py b/src/caterpillar/fields/conditional.py old mode 100644 new mode 100755 index 2fc28210..e9bb5a4c --- a/src/caterpillar/fields/conditional.py +++ b/src/caterpillar/fields/conditional.py @@ -18,9 +18,10 @@ from typing import Union, Any from typing import Optional from caterpillar.abc import _ContextLambda, _StructLike -from caterpillar.abc import _ContextLike, typeof +from caterpillar.abc import _ContextLike from caterpillar.context import ConditionContext from caterpillar.exception import ValidationError +from caterpillar.shared import typeof from ._base import Field @@ -40,7 +41,7 @@ class definition. While this class can't be used in class definitions, __slots__ = "chain", "conditions" - def __init__(self, struct: _StructLike, condition: _ContextLambda) -> None: + def __init__(self, struct, condition) -> None: if (sys.version_info.major, sys.version_info.minor) >= (3, 14): warnings.warn( "Python3.14 breaks support for Contitional fields. Conditional " @@ -50,7 +51,7 @@ def __init__(self, struct: _StructLike, condition: _ContextLambda) -> None: self.conditions = [] self.add(struct, condition) - def __type__(self) -> str: + def __type__(self): return Optional[Union[*map(typeof, self.chain.values())]] def __repr__(self) -> str: @@ -66,12 +67,12 @@ def __repr__(self) -> str: return f"" - def add(self, struct: _StructLike, func: _ContextLambda) -> None: + def add(self, struct, func) -> None: idx = len(self.chain) self.chain[idx] = struct self.conditions.append(func) - def get_struct(self, context: _ContextLike) -> Optional[_StructLike]: + def get_struct(self, context): index = 0 while index < len(self.chain): func = self.conditions[index] @@ -79,16 +80,16 @@ def get_struct(self, context: _ContextLike) -> Optional[_StructLike]: return self.chain[index] index += 1 - def __unpack__(self, context: _ContextLike) -> Any: + def __unpack__(self, context) -> Any: struct = self.get_struct(context) return struct.__unpack__(context) if struct else None - def __pack__(self, obj: Any, context: _ContextLike) -> None: + def __pack__(self, obj: Any, context) -> None: struct = self.get_struct(context) if struct: struct.__pack__(obj, context) - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: struct = self.get_struct(context) return struct.__size__(context) if struct else 0 @@ -196,4 +197,4 @@ def __exit__(self, *_): # REVISIT: There is one case where 'ELSE' is not applicable and will cause # a field to be present at all times. This problem exists if we add fields # into an else-branch without a previously defined field. -Else = ElseIf(lambda _: True) +Else = ElseIf(lambda context: True) diff --git a/src/caterpillar/fields/conditional.pyi b/src/caterpillar/fields/conditional.pyi new file mode 100755 index 00000000..b7c66379 --- /dev/null +++ b/src/caterpillar/fields/conditional.pyi @@ -0,0 +1,36 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from caterpillar.abc import _ContextLambda, _ContextLike, _StructLike +from caterpillar.context import ConditionContext +from typing import Any, Dict, List + +class ConditionalChain: + chain: Dict[int, _StructLike] + conditions: List[_ContextLambda[bool]] + def __init__(self, struct: _StructLike, condition: _ContextLambda[int]) -> None: ... + def __type__(self) -> type: ... + def add(self, struct: _StructLike, func: _ContextLambda[bool]) -> None: ... + def get_struct(self, context: _ContextLike) -> _StructLike | None: ... + def __unpack__(self, context: _ContextLike) -> Any: ... + def __pack__(self, obj: Any, context: _ContextLike) -> None: ... + def __size__(self, context: _ContextLike) -> int: ... + +class If(ConditionContext): ... + +class ElseIf(ConditionContext): + def __enter__(self): ... + def __exit__(self, *_) -> None: ... + +Else: ElseIf From 21820a62a7765794a749c98ce66a630ae648da97 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Wed, 25 Jun 2025 11:07:32 +0200 Subject: [PATCH 14/41] Stub files for fields.{crypto,digest,hook} --- src/caterpillar/fields/crypto.py | 94 ++++++--------- src/caterpillar/fields/crypto.pyi | 75 ++++++++++++ src/caterpillar/fields/digest.py | 194 ++++++++++++++---------------- src/caterpillar/fields/digest.pyi | 167 +++++++++++++++++++++++++ src/caterpillar/fields/hook.py | 24 +--- src/caterpillar/fields/hook.pyi | 46 +++++++ 6 files changed, 421 insertions(+), 179 deletions(-) mode change 100644 => 100755 src/caterpillar/fields/crypto.py create mode 100755 src/caterpillar/fields/crypto.pyi mode change 100644 => 100755 src/caterpillar/fields/digest.py create mode 100755 src/caterpillar/fields/digest.pyi mode change 100644 => 100755 src/caterpillar/fields/hook.py create mode 100755 src/caterpillar/fields/hook.pyi diff --git a/src/caterpillar/fields/crypto.py b/src/caterpillar/fields/crypto.py old mode 100644 new mode 100755 index 9acb115e..968302d5 --- a/src/caterpillar/fields/crypto.py +++ b/src/caterpillar/fields/crypto.py @@ -12,38 +12,24 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from typing import Union, Any, Type, Optional, Protocol, Iterable, runtime_checkable +from typing import Protocol, runtime_checkable -try: - from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes - from cryptography.hazmat.primitives.padding import PaddingContext -except ImportError: - Cipher = algorithms = modes = PaddingContext = Any - - -from caterpillar.abc import _StructLike, _ContextLike -from caterpillar.abc import _GreedyType, _ContextLambda from caterpillar.exception import UnsupportedOperation from caterpillar.exception import InvalidValueError -from caterpillar.context import CTX_STREAM, Context +from caterpillar.context import CTX_STREAM from .common import Memory, Bytes from ._mixin import get_args, get_kwargs @runtime_checkable class Padding(Protocol): # pylint: disable=missing-class-docstring - def unpadder(self) -> PaddingContext: + def unpadder(self): """Abstract method to get an unpadder for padding.""" - def padder(self) -> PaddingContext: + def padder(self): """Abstract method to get a padder for padding.""" -_ArgType = Union[_ContextLambda, Any] - -KwArgs = Context - - class Encrypted(Memory): """Struct that is able to encrypt/decrypt blocks of memory. @@ -67,16 +53,18 @@ class Encrypted(Memory): # REVISIT: this constructor looks ugly def __init__( self, - length: Union[int, _GreedyType, _ContextLambda], - algorithm: Type["algorithms.CipherAlgorithm"], - mode: Union[Type["modes.Mode"], "modes.Mode"], - padding: Union[Padding, Type[Padding]] = None, - algo_args: Optional[Iterable[_ArgType]] = None, - mode_args: Optional[Iterable[_ArgType]] = None, - padding_args: Optional[Iterable[_ArgType]] = None, - post: Optional[_StructLike] = None, + length, + algorithm, + mode, + padding=None, + algo_args=None, + mode_args=None, + padding_args=None, + post=None, ) -> None: - if Cipher is None: + try: + from cryptography.hazmat.primitives.ciphers import Cipher + except ImportError: raise UnsupportedOperation( ( "To use encryption with this framework, the module 'cryptography' " @@ -94,7 +82,7 @@ def __init__( self._padding_args = padding_args self.post = post - def algorithm(self, context: _ContextLike) -> "algorithms.CipherAlgorithm": + def algorithm(self, context): """ Get the encryption algorithm instance. @@ -103,11 +91,11 @@ def algorithm(self, context: _ContextLike) -> "algorithms.CipherAlgorithm": :return: An instance of the encryption algorithm. :rtype: algorithms.CipherAlgorithm """ - return self.get_instance( - algorithms.CipherAlgorithm, self._algo, self._algo_args, context - ) + from cryptography.hazmat.primitives.ciphers import CipherAlgorithm + + return self.get_instance(CipherAlgorithm, self._algo, self._algo_args, context) - def mode(self, context: _ContextLike) -> "modes.Mode": + def mode(self, context): """ Get the encryption mode instance. @@ -116,9 +104,11 @@ def mode(self, context: _ContextLike) -> "modes.Mode": :return: An instance of the encryption mode. :rtype: modes.Mode """ + from cryptography.hazmat.primitives.ciphers import modes + return self.get_instance(modes.Mode, self._mode, self._mode_args, context) - def padding(self, context: _ContextLike) -> Padding: + def padding(self, context) -> Padding: """ Get the padding scheme instance. @@ -129,9 +119,7 @@ def padding(self, context: _ContextLike) -> Padding: """ return self.get_instance(Padding, self._padding, self._padding_args, context) - def get_instance( - self, type_: type, field: Any, args: Any, context: _ContextLambda - ) -> Any: + def get_instance(self, type_, field, args, context): """ Get an instance of a specified type. @@ -155,7 +143,7 @@ def get_instance( args, kwargs = get_args(args, context), {} return field(*args, **kwargs) - def pack_single(self, obj: Any, context: _ContextLike) -> None: + def pack_single(self, obj, context) -> None: """ Pack a single element. @@ -164,6 +152,8 @@ def pack_single(self, obj: Any, context: _ContextLike) -> None: :param context: The current operation context. :type context: _ContextLike """ + from cryptography.hazmat.primitives.ciphers import Cipher + cipher = Cipher(self.algorithm(context), self.mode(context)) padding = self.padding(context) @@ -175,7 +165,7 @@ def pack_single(self, obj: Any, context: _ContextLike) -> None: encryptor = cipher.encryptor() super().pack_single(encryptor.update(data) + encryptor.finalize(), context) - def unpack_single(self, context: _ContextLike) -> memoryview: + def unpack_single(self, context): """ Unpack a single element. @@ -184,6 +174,7 @@ def unpack_single(self, context: _ContextLike) -> memoryview: :return: The unpacked element as a memoryview. :rtype: memoryview """ + from cryptography.hazmat.primitives.ciphers import Cipher value = super().unpack_single(context) cipher = Cipher(self.algorithm(context), self.mode(context)) @@ -197,29 +188,24 @@ def unpack_single(self, context: _ContextLike) -> memoryview: return memoryview(data) -_KeyType = Union[str, bytes, int, _ContextLambda] - - class KeyCipher(Bytes): - key: Union[str, bytes, int] - """The key that should be applied. + # key: bytes + # """The key that should be applied. - It will be converted automatically to bytes if not given. - """ + # It will be converted automatically to bytes if not given. + # """ - key_length: int - """Internal attribute to keep track of the key's length""" + # key_length: int + # """Internal attribute to keep track of the key's length""" __slots__ = "key", "key_length", "is_lazy" - def __init__( - self, key: _KeyType, length: Union[_ContextLambda, int, None] = None - ) -> None: + def __init__(self, key, length=None) -> None: super().__init__(length or ...) self.key = self.is_lazy = self.key_length = None self.set_key(key) - def set_key(self, key: _KeyType, context: _ContextLike = None) -> None: + def set_key(self, key, context=None) -> None: if callable(key) and context is None: # context lambda indicates the key will be computed at runtime self.key = key @@ -242,7 +228,7 @@ def set_key(self, key: _KeyType, context: _ContextLike = None) -> None: self.key_length = len(self.key) self.is_lazy = False - def process(self, obj: bytes, context: _ContextLike) -> bytes: + def process(self, obj: bytes, context) -> bytes: length = len(obj) data = bytearray(length) key = self.key @@ -255,10 +241,10 @@ def process(self, obj: bytes, context: _ContextLike) -> bytes: def _do_process(self, src: bytes, dest: bytearray): raise NotImplementedError - def pack_single(self, obj: bytes, context: _ContextLike) -> None: + def pack_single(self, obj: bytes, context) -> None: context[CTX_STREAM].write(self.process(obj, context)) - def unpack_single(self, context: _ContextLike) -> bytes: + def unpack_single(self, context) -> bytes: obj: bytes = super().unpack_single(context) return self.process(obj, context) diff --git a/src/caterpillar/fields/crypto.pyi b/src/caterpillar/fields/crypto.pyi new file mode 100755 index 00000000..57f43624 --- /dev/null +++ b/src/caterpillar/fields/crypto.pyi @@ -0,0 +1,75 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from .common import Bytes, Memory +from caterpillar.abc import ( + _ContextLambda, + _ContextLike, + _GreedyType, + _StructLike, + _LengthT, +) +from caterpillar.context import Context +from cryptography.hazmat.primitives.ciphers import modes, CipherAlgorithm +from cryptography.hazmat.primitives.padding import PaddingContext +from typing import Any, Iterable, Protocol, Type, Union, _VT + +class Padding(Protocol): + def unpadder(self) -> PaddingContext: ... + def padder(self) -> PaddingContext: ... + +KwArgs = Context +_ArgType = Union[_ContextLambda, Any] + +class Encrypted(Memory): + post: _StructLike | None + def __init__( + self, + length: int | _GreedyType | _ContextLambda, + algorithm: type[CipherAlgorithm], + mode: Type[modes.Mode] | modes.Mode, + padding: Padding | type[Padding] | None = None, + algo_args: Iterable[_ArgType] | None = None, + mode_args: Iterable[_ArgType] | None = None, + padding_args: Iterable[_ArgType] | None = None, + post: _StructLike | None = None, + ) -> None: ... + def algorithm(self, context: _ContextLike) -> CipherAlgorithm: ... + def mode(self, context: _ContextLike) -> modes.Mode: ... + def padding(self, context: _ContextLike) -> Padding: ... + def get_instance( + self, + type_: Type[_VT], + field: Any | _VT | None, + args: Any, + context: _ContextLambda, + ) -> _VT: ... + def pack_single(self, obj: Any, context: _ContextLike) -> None: ... + def unpack_single(self, context: _ContextLike) -> memoryview: ... + +_KeyType = Union[int, str, bytes, _ContextLambda[Union[int, str, bytes]]] + +class KeyCipher(Bytes): + key: _KeyType + key_length: int + is_lazy: bool + def __init__(self, key: _KeyType, length: _LengthT | None = None) -> None: ... + def set_key(self, key: _KeyType, context: _ContextLike | None = None) -> None: ... + def process(self, obj: bytes, context: _ContextLike) -> bytes: ... + def pack_single(self, obj: bytes, context: _ContextLike) -> None: ... + def unpack_single(self, context: _ContextLike) -> bytes: ... + +class Xor(KeyCipher): ... +class Or(KeyCipher): ... +class And(KeyCipher): ... diff --git a/src/caterpillar/fields/digest.py b/src/caterpillar/fields/digest.py old mode 100644 new mode 100755 index 94f68b55..01226c0b --- a/src/caterpillar/fields/digest.py +++ b/src/caterpillar/fields/digest.py @@ -17,15 +17,10 @@ import warnings import zlib -from typing import Any, Callable, Optional, Self, Type - -from caterpillar.abc import _ContextLike, _StructLike, _ContextLambda, _Action from caterpillar.context import CTX_OBJECT, CTX_STREAM from caterpillar.exception import StructException, ValidationError from caterpillar.shared import Action -from caterpillar.fields.hook import ( - IOHook, -) +from caterpillar.fields.hook import IOHook from ._base import Field from .common import Bytes, uint32 @@ -84,13 +79,7 @@ class Algorithm: __slots__ = ("_create", "_update", "_digest", "name") - def __init__( - self, - create: Callable[[_ContextLike], Any] | None = None, - update: Callable[[Any, bytes, _ContextLike], Any] | None = None, - digest: Callable[[Any, _ContextLike], bytes] | None = None, - name: Optional[str] = None, - ) -> None: + def __init__(self, create=None, update=None, digest=None, name=None) -> None: self._create = create self._update = update self._digest = digest @@ -107,7 +96,7 @@ def __repr__(self) -> str: """ return f"<{self.__class__.__name__} name={self.name or ''!r}>" - def create(self, context: _ContextLike) -> Any: + def create(self, context): """ Create an instance of the algorithm or checksum using the provided context. @@ -124,7 +113,7 @@ def create(self, context: _ContextLike) -> Any: raise NotImplementedError("create() is not implemented for this algorithm") - def update(self, algo_obj: Any, data: bytes, context: _ContextLike) -> Any: + def update(self, algo_obj, data: bytes, context): """ Update the algorithm or checksum with the given data. @@ -145,7 +134,7 @@ def update(self, algo_obj: Any, data: bytes, context: _ContextLike) -> Any: raise NotImplementedError("update() is not implemented for this algorithm") - def digest(self, algo_obj: Any, context: _ContextLike) -> bytes: + def digest(self, algo_obj, context) -> bytes: """ Compute the digest or checksum value from the algorithm instance. @@ -236,14 +225,7 @@ class Digest: :type path: Optional[str] """ - def __init__( - self, - algorithm: Algorithm, - struct: _StructLike, - name: Optional[str] = None, - verify: bool = False, - path: Optional[str] = None, - ) -> None: + def __init__(self, algorithm, struct, name=None, verify=False, path=None) -> None: if (sys.version_info.major, sys.version_info.minor) >= (3, 14): warnings.warn( "Python3.14 breaks support for Digest fields. The hash must be calculated " @@ -265,7 +247,7 @@ def __init__( self._verify = verify self.path = path or f"{CTX_OBJECT}.{self.name}" - def _get_annotations(self, frame: Any) -> dict[str, Any]: + def _get_annotations(self, frame): """ Retrieve the annotations (i.e., field definitions) from the current frame's local variables. @@ -290,7 +272,7 @@ def _get_annotations(self, frame: Any) -> dict[str, Any]: def __repr__(self) -> str: return f"Digest(algo={self.algo!r}, verify={self._verify!r})" - def __enter__(self) -> Self: + def __enter__(self): """ Install the start action for the digest field during struct definition. @@ -349,7 +331,7 @@ def __exit__(self, *_) -> None: if self._verify: annotations[f"{self.name}_verify"] = Action(unpack=self.verfiy) - def begin(self, context: _ContextLike) -> None: + def begin(self, context) -> None: """ Initialize the digest calculation at the beginning of packing/unpacking. @@ -362,7 +344,7 @@ def begin(self, context: _ContextLike) -> None: self._hook.init(context) self._obj = self.algo.create(context) - def end_pack(self, context: _ContextLike) -> None: + def end_pack(self, context) -> None: """ Finalize the digest calculation at the end of packing/unpacking. @@ -375,7 +357,7 @@ def end_pack(self, context: _ContextLike) -> None: context.__context_setattr__(self.path or self.name, self._digest) self._hook.finish(context) - def end_unpack(self, context: _ContextLike) -> None: + def end_unpack(self, context) -> None: """ Finalize the digest calculation at the end of unpacking. @@ -387,7 +369,7 @@ def end_unpack(self, context: _ContextLike) -> None: self._digest = self.algo.digest(self._obj, context) self._hook.finish(context) - def update(self, data: bytes, context: _ContextLike) -> None: + def update(self, data: bytes, context) -> None: """ Update the checksum with new data during packing/unpacking. @@ -400,7 +382,7 @@ def update(self, data: bytes, context: _ContextLike) -> None: """ self._obj = self.algo.update(self._obj, data, context) or self._obj - def verfiy(self, context: _ContextLike) -> None: + def verfiy(self, context) -> None: """ Verify the checksum upon unpacking. @@ -463,7 +445,7 @@ def __init__(self, target: str, algorithm: Algorithm) -> None: self._ctx_hook = f"{CTX_DIGEST_HOOK}__{target}" self._ctx_algo = f"{CTX_DIGEST_ALGO}__{target}" - def update(self, data: bytes, context: _ContextLike) -> None: + def update(self, data: bytes, context) -> None: """ Updates the digest object with new data. @@ -474,7 +456,7 @@ def update(self, data: bytes, context: _ContextLike) -> None: new_obj = self.algo.update(obj, data, context) context[self._ctx_obj] = new_obj or obj - def begin(self, context: _ContextLike) -> None: + def begin(self, context) -> None: """ Initializes the digest algorithm and attaches an IO hook to track data. """ @@ -518,7 +500,7 @@ class Format: :type verify: bool """ - def __init__(self, target: str, struct: _StructLike, verify: bool = False) -> None: + def __init__(self, target: str, struct, verify=False) -> None: self.name = target self.struct = struct self.verify = verify @@ -531,11 +513,11 @@ def __type__(self) -> type: """Defines the Python type returned after unpacking (always bytes).""" return bytes - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: """Returns the size in bytes of the digest field.""" return self.struct.__size__(context) - def __pack__(self, obj: None, context: _ContextLike) -> None: + def __pack__(self, obj: None, context) -> None: """ Called during packing. Computes the digest over all previously packed data, stores it in the context, finalizes the IO hook, and packs the digest itself. @@ -545,7 +527,7 @@ def __pack__(self, obj: None, context: _ContextLike) -> None: context[self._ctx_hook].finish(context) self.struct.__pack__(digest, context) - def __unpack__(self, context: _ContextLike): + def __unpack__(self, context): """ Called during unpacking. Computes the digest over all preceding data, reads the stored digest, optionally verifies it, and returns the unpacked value. @@ -573,7 +555,7 @@ def __unpack__(self, context: _ContextLike): return digest @staticmethod - def begin(target: str, algo: Algorithm) -> _Action: + def begin(target: str, algo): """Factory method to create a DigestFieldAction used at the start of a struct to set up hashing for the named digest field. @@ -588,7 +570,7 @@ def begin(target: str, algo: Algorithm) -> _Action: # --- public algorithms --- -def _hash_digest(algo: Algorithm, struct: _StructLike): +def _hash_digest(algo, struct): """ A utility function to create a `Digest` wrapper for a specific hash algorithm and struct. The wrapper initializes the digest calculation based on the algorithm provided. @@ -602,26 +584,26 @@ def _hash_digest(algo: Algorithm, struct: _StructLike): """ def _wrapper( - name: Optional[str] = None, - verify: bool = False, - path: Optional[str] = None, + name=None, + verify=False, + path=None, ) -> Digest: return Digest(algo, struct, name, verify, path) return _wrapper -def _hash_digest_field(struct: _StructLike): +def _hash_digest_field(struct): def _wrapper( name: str, - verify: bool = False, + verify=False, ) -> DigestField: return DigestField(target=name, struct=struct, verify=verify) return _wrapper -def _hashlib_algo(func) -> Algorithm: +def _hashlib_algo(func): """ Creates an `Algorithm` object from a hash function (e.g., hashlib.sha256). @@ -658,9 +640,8 @@ def _hashlib_algo(func) -> Algorithm: try: from cryptography.hazmat.primitives import hashes - from cryptography.hazmat.primitives import hmac - def _cryptography_hash_algo(cls: Type[hashes.HashAlgorithm]): + def _cryptography_hash_algo(cls): """ Creates an `Algorithm` from a cryptography `hashes.HashAlgorithm` class. @@ -698,62 +679,6 @@ def _cryptography_hash_algo(cls: Type[hashes.HashAlgorithm]): Sha3_512 = _hash_digest(Sha3_512_Algo, Bytes(64)) Md5 = _hash_digest(Md5_Algo, Bytes(16)) - class HMACAlgorithm(Algorithm): - """ - HMAC (Hash-based Message Authentication Code) algorithm implementation. - - This class wraps an HMAC algorithm using a specified hash function and key. - """ - - def __init__( - self, - key: bytes | _ContextLambda, - algorithm: hashes.HashAlgorithm, - ) -> None: - super().__init__(name=f"hmac_{algorithm.name}") - self._key = key - self._algorithm = algorithm - - def create(self, context: _ContextLike) -> Any: - """ - Creates an HMAC object with the provided key and algorithm. - """ - key = self._key(context) if callable(self._key) else self._key - return hmac.HMAC(key, self._algorithm) - - def update(self, algo_obj: Any, data: bytes, context: _ContextLike) -> Any: - """ - Updates the HMAC object with new data. - """ - return algo_obj.update(data) - - def digest(self, algo_obj: Any, context: _ContextLike) -> Any: - """ - Finalizes the HMAC object and returns the computed digest. - """ - return algo_obj.finalize() - - class HMAC(Digest): - """ - HMAC Digest handler, used to create and verify HMACs based on a provided key and algorithm. - """ - - def __init__( - self, - key: bytes | _ContextLambda, - algorithm: hashes.HashAlgorithm, - name: Optional[str] = None, - verify: bool = False, - path: Optional[str] = None, - ) -> None: - super().__init__( - HMACAlgorithm(key, algorithm), - Bytes(algorithm.digest_size), - name, - verify, - path, - ) - except ImportError: Sha1_Algo = _hashlib_algo(hashlib.sha1) Sha2_224_Algo = _hashlib_algo(hashlib.sha224) @@ -777,9 +702,6 @@ def __init__( Sha3_512 = _hash_digest(Sha3_512_Algo, Bytes(64)) Md5 = _hash_digest(Md5_Algo, Bytes(16)) - HMACAlgorithm = None - HMAC = None - Sha1_Field = _hash_digest_field(Bytes(32)) Sha2_224_Field = _hash_digest_field(Bytes(28)) @@ -791,3 +713,63 @@ def __init__( Sha3_384_Field = _hash_digest_field(Bytes(48)) Sha3_512_Field = _hash_digest_field(Bytes(64)) Md5_Field = _hash_digest_field(Bytes(16)) + + +class HMACAlgorithm(Algorithm): + """ + HMAC (Hash-based Message Authentication Code) algorithm implementation. + + This class wraps an HMAC algorithm using a specified hash function and key. + """ + + def __init__( + self, + key, + algorithm, + ) -> None: + super().__init__(name=f"hmac_{algorithm.name}") + self._key = key + self._algorithm = algorithm + + def create(self, context): + """ + Creates an HMAC object with the provided key and algorithm. + """ + from cryptography.hazmat.primitives import hmac + + key = self._key(context) if callable(self._key) else self._key + return hmac.HMAC(key, self._algorithm) + + def update(self, algo_obj, data: bytes, context): + """ + Updates the HMAC object with new data. + """ + return algo_obj.update(data) + + def digest(self, algo_obj, context): + """ + Finalizes the HMAC object and returns the computed digest. + """ + return algo_obj.finalize() + + +class HMAC(Digest): + """ + HMAC Digest handler, used to create and verify HMACs based on a provided key and algorithm. + """ + + def __init__( + self, + key, + algorithm, + name=None, + verify=False, + path=None, + ) -> None: + super().__init__( + HMACAlgorithm(key, algorithm), + Bytes(algorithm.digest_size), + name, + verify, + path, + ) diff --git a/src/caterpillar/fields/digest.pyi b/src/caterpillar/fields/digest.pyi new file mode 100755 index 00000000..0d198880 --- /dev/null +++ b/src/caterpillar/fields/digest.pyi @@ -0,0 +1,167 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from ._base import Field as Field +from .common import Bytes as Bytes, uint32 as uint32 +from caterpillar.abc import _ContextLambda, _ContextLike, _StructLike, _ActionLike +from caterpillar.context import CTX_OBJECT as CTX_OBJECT, CTX_STREAM as CTX_STREAM +from caterpillar.exception import ( + StructException as StructException, + ValidationError as ValidationError, +) +from caterpillar.fields.hook import IOHook as IOHook +from caterpillar.shared import Action as Action +from cryptography.hazmat.primitives import hashes +from typing import Callable, Generic, Optional, Protocol, Self, Type, TypeVar + +DEFAULT_DIGEST_PATH: str + +class _DigestValue: ... + +_AlgoObjT = TypeVar("_AlgoObjT") +_AlgoReturnT = TypeVar("_AlgoReturnT", default=bytes) + +class Algorithm(Generic[_AlgoObjT, _AlgoReturnT]): + name: str + def __init__( + self, + create: _ContextLambda[_AlgoObjT] | None = None, + update: Callable[[_AlgoObjT, bytes, _ContextLike], _AlgoObjT] | None = None, + digest: Callable[[_AlgoObjT, _ContextLike], _AlgoReturnT] | None = None, + name: str | None = None, + ) -> None: ... + def create(self, context: _ContextLike) -> _AlgoObjT: ... + def update( + self, algo_obj: _AlgoObjT, data: bytes, context: _ContextLike + ) -> _AlgoObjT: ... + def digest(self, algo_obj: _AlgoObjT, context: _ContextLike) -> _AlgoReturnT: ... + +class Digest(Generic[_AlgoObjT, _AlgoReturnT]): + algo: Algorithm[_AlgoObjT, _AlgoReturnT] + name: str + struct: _StructLike[_AlgoReturnT, _AlgoReturnT] + path: str + def __init__( + self, + algorithm: Algorithm[_AlgoObjT, _AlgoReturnT], + struct: _StructLike[_AlgoReturnT, _AlgoReturnT], + name: str | None = None, + verify: bool = False, + path: str | None = None, + ) -> None: ... + def __enter__(self) -> Self: ... + def __exit__(self, *_) -> None: ... + def begin(self, context: _ContextLike) -> None: ... + def end_pack(self, context: _ContextLike) -> None: ... + def end_unpack(self, context: _ContextLike) -> None: ... + def update(self, data: bytes, context: _ContextLike) -> None: ... + def verfiy(self, context: _ContextLike) -> None: ... + +CTX_DIGEST_OBJ: str +CTX_DIGEST_HOOK: str +CTX_DIGEST_ALGO: str +CTX_DIGEST: str + +class DigestFieldAction(Generic[_AlgoObjT, _AlgoReturnT]): + name: str + algo: Algorithm[_AlgoObjT, _AlgoReturnT] + def __init__( + self, target: str, algorithm: Algorithm[_AlgoObjT, _AlgoReturnT] + ) -> None: ... + def update(self, data: bytes, context: _ContextLike) -> None: ... + def begin(self, context: _ContextLike) -> None: ... + __action_pack__ = begin + __action_unpack__ = begin + +class DigestField(Generic[_AlgoReturnT], _StructLike[None, _AlgoReturnT]): + name: str + struct: _StructLike[_AlgoReturnT, _AlgoReturnT] + verify: bool + def __init__( + self, + target: str, + struct: _StructLike[_AlgoReturnT, _AlgoReturnT], + verify: bool = False, + ) -> None: ... + def __type__(self) -> Type[bytes]: ... + def __size__(self, context: _ContextLike) -> int: ... + def __pack__(self, obj: None, context: _ContextLike) -> None: ... + def __unpack__(self, context: _ContextLike) -> _AlgoReturnT: ... + @staticmethod + def begin( + target: str, algo: Algorithm[_AlgoObjT, _AlgoReturnT] + ) -> DigestFieldAction[_AlgoObjT, _AlgoReturnT]: ... + +class _DigestFactory(Protocol[_AlgoReturnT]): + def __call__( + self, name: Optional[str] = ..., verify: bool = ..., path: Optional[str] = ... + ) -> Digest[_AlgoReturnT]: ... + +Crc32_Algo: Algorithm[int, int] +Crc32: _DigestFactory[int] +Crc32_Field: DigestField[int] +Adler_Algo: Algorithm[int] +Adler: _DigestFactory[int] +Adler_Field: DigestField[int] +Sha1_Algo: Algorithm[hashes.Hash] +Sha2_224_Algo: Algorithm[hashes.Hash] +Sha2_256_Algo: Algorithm[hashes.Hash] +Sha2_384_Algo: Algorithm[hashes.Hash] +Sha2_512_Algo: Algorithm[hashes.Hash] +Sha3_224_Algo: Algorithm[hashes.Hash] +Sha3_256_Algo: Algorithm[hashes.Hash] +Sha3_384_Algo: Algorithm[hashes.Hash] +Sha3_512_Algo: Algorithm[hashes.Hash] +Md5_Algo: Algorithm[hashes.Hash] +Sha1: _DigestFactory[bytes] +Sha2_224: _DigestFactory[bytes] +Sha2_256: _DigestFactory[bytes] +Sha2_384: _DigestFactory[bytes] +Sha2_512: _DigestFactory[bytes] +Sha3_224: _DigestFactory[bytes] +Sha3_256: _DigestFactory[bytes] +Sha3_384: _DigestFactory[bytes] +Sha3_512: _DigestFactory[bytes] +Md5: _DigestFactory[bytes] + +class HMACAlgorithm(Algorithm[hashes.Hash]): + def __init__( + self, key: bytes | _ContextLambda[bytes], algorithm: hashes.HashAlgorithm + ) -> None: ... + def create(self, context: _ContextLike) -> hashes.Hash: ... + def update( + self, algo_obj: hashes.Hash, data: bytes, context: _ContextLike + ) -> hashes.Hash: ... + def digest(self, algo_obj: hashes.Hash, context: _ContextLike) -> bytes: ... + +class HMAC(Digest[bytes]): + def __init__( + self, + key: bytes | _ContextLambda[bytes], + algorithm: hashes.HashAlgorithm, + name: str | None = None, + verify: bool = False, + path: str | None = None, + ) -> None: ... + +Sha1_Field: DigestField[bytes] +Sha2_224_Field: DigestField[bytes] +Sha2_256_Field: DigestField[bytes] +Sha2_384_Field: DigestField[bytes] +Sha2_512_Field: DigestField[bytes] +Sha3_224_Field: DigestField[bytes] +Sha3_256_Field: DigestField[bytes] +Sha3_384_Field: DigestField[bytes] +Sha3_512_Field: DigestField[bytes] +Md5_Field: DigestField[bytes] diff --git a/src/caterpillar/fields/hook.py b/src/caterpillar/fields/hook.py old mode 100644 new mode 100755 index 858504c7..2934a19b --- a/src/caterpillar/fields/hook.py +++ b/src/caterpillar/fields/hook.py @@ -13,17 +13,9 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . from io import RawIOBase -from typing import Callable, Optional -from caterpillar.abc import _ContextLike from caterpillar.context import CTX_STREAM -HookInit = Callable[[_ContextLike], None] -HookUpdate = Callable[[bytes, _ContextLike], Optional[bytes]] -HookRead = Callable[[bytes, _ContextLike], Optional[bytes]] -HookWrite = Callable[[bytes, _ContextLike], Optional[bytes]] -HookFinish = Callable[[_ContextLike], None] - class IOHook(RawIOBase): """ @@ -49,13 +41,7 @@ class IOHook(RawIOBase): """ def __init__( - self, - io: RawIOBase, - init: Optional[HookInit] = None, - update: Optional[HookUpdate] = None, - read: Optional[HookRead] = None, - write: Optional[HookWrite] = None, - finish: Optional[HookFinish] = None, + self, io, init=None, update=None, read=None, write=None, finish=None ) -> None: # NOTE: no validation here if _io is valid, because # self.init will set it @@ -76,7 +62,7 @@ def assert_context_set(self) -> None: if self._context is None: raise ValueError("Context is not set") - def init(self, context: _ContextLike) -> None: + def init(self, context) -> None: """ Initialize the I/O hook with the provided context. This triggers the `init` hook, if available, and sets up the context for subsequent operations. @@ -90,7 +76,7 @@ def init(self, context: _ContextLike) -> None: self._io = context[CTX_STREAM] self._context[CTX_STREAM] = self - def finish(self, context: _ContextLike) -> None: + def finish(self, context) -> None: """ Finalize the I/O hook by calling the `finish` hook (if provided) and restoring the original I/O stream in the context. @@ -122,7 +108,7 @@ def readable(self) -> bool: """ return super().readable() - def read(self, size: int = -1) -> bytes | None: + def read(self, size: int = -1): """ Read data from the stream, applying the optional hooks (if any). @@ -147,7 +133,7 @@ def read(self, size: int = -1) -> bytes | None: return data - def write(self, b: bytes, /) -> int | None: + def write(self, b, /): """ Write data to the stream, applying the optional hooks (if any). diff --git a/src/caterpillar/fields/hook.pyi b/src/caterpillar/fields/hook.pyi new file mode 100755 index 00000000..8e713ecd --- /dev/null +++ b/src/caterpillar/fields/hook.pyi @@ -0,0 +1,46 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from typing import Callable, Optional +from typing_extensions import Buffer +from caterpillar.abc import _ContextLike, _ContextLambda +from caterpillar.context import CTX_STREAM as CTX_STREAM +from io import RawIOBase + +HookInit = _ContextLambda[None] +HookUpdate = Callable[[bytes, _ContextLike], Optional[bytes]] +HookRead = Callable[[bytes, _ContextLike], Optional[bytes]] +HookWrite = Callable[[bytes, _ContextLike], Optional[bytes]] +HookFinish = _ContextLambda[None] + +class IOHook(RawIOBase): + def __init__( + self, + io: RawIOBase | None, + init: HookInit | None = None, + update: HookUpdate | None = None, + read: HookRead | None = None, + write: HookWrite | None = None, + finish: HookFinish | None = None, + ) -> None: ... + def assert_context_set(self) -> None: ... + def init(self, context: _ContextLike) -> None: ... + def finish(self, context: _ContextLike) -> None: ... + def seekable(self) -> bool: ... + def readable(self) -> bool: ... + def read(self, size: int = -1) -> bytes | None: ... + def write(self, b: Buffer, /) -> int | None: ... + def writable(self) -> bool: ... + def tell(self) -> int: ... + def seek(self, offset: int, whence: int = 0) -> int: ... From fc6f65d21300c7772eb2e7593543105c88b30648 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Wed, 25 Jun 2025 11:18:25 +0200 Subject: [PATCH 15/41] Stub files for fields.{net,pointer,varint} --- src/caterpillar/fields/net.py | 15 ++++---- src/caterpillar/fields/net.pyi | 37 ++++++++++++++++++ src/caterpillar/fields/pointer.py | 40 +++++++++---------- src/caterpillar/fields/pointer.pyi | 62 ++++++++++++++++++++++++++++++ src/caterpillar/fields/varint.py | 15 +++----- src/caterpillar/fields/varint.pyi | 15 ++++++++ 6 files changed, 145 insertions(+), 39 deletions(-) mode change 100644 => 100755 src/caterpillar/fields/net.py create mode 100755 src/caterpillar/fields/net.pyi mode change 100644 => 100755 src/caterpillar/fields/pointer.py create mode 100755 src/caterpillar/fields/pointer.pyi mode change 100644 => 100755 src/caterpillar/fields/varint.py create mode 100755 src/caterpillar/fields/varint.pyi diff --git a/src/caterpillar/fields/net.py b/src/caterpillar/fields/net.py old mode 100644 new mode 100755 index 93e9cac8..45042d56 --- a/src/caterpillar/fields/net.py +++ b/src/caterpillar/fields/net.py @@ -19,7 +19,6 @@ from typing import Union, Any, Optional -from caterpillar.abc import _ContextLike from .common import Transformer, uint32, UInt, Bytes from ._base import singleton @@ -47,7 +46,7 @@ def __type__(self) -> type: """ return ipaddress.IPv4Address - def encode(self, obj: ipaddress.IPv4Address, context: _ContextLike) -> Any: + def encode(self, obj: ipaddress.IPv4Address, context): """ Encode an IPv4Address object. @@ -58,7 +57,7 @@ def encode(self, obj: ipaddress.IPv4Address, context: _ContextLike) -> Any: # pylint: disable-next=protected-access return obj._ip - def decode(self, parsed: int, context: _ContextLike) -> ipaddress.IPv4Address: + def decode(self, parsed: int, context) -> ipaddress.IPv4Address: """ Decode an encoded IPv4 address. @@ -92,7 +91,7 @@ def __type__(self) -> type: """ return ipaddress.IPv6Address - def encode(self, obj: ipaddress.IPv6Address, context: _ContextLike) -> Any: + def encode(self, obj: ipaddress.IPv6Address, context): """ Encode an IPv6Address object. @@ -103,7 +102,7 @@ def encode(self, obj: ipaddress.IPv6Address, context: _ContextLike) -> Any: # pylint: disable-next=protected-access return obj._ip - def decode(self, parsed: int, context: _ContextLike) -> ipaddress.IPv6Address: + def decode(self, parsed: int, context) -> ipaddress.IPv6Address: """ Decode an encoded IPv6 address. @@ -123,7 +122,7 @@ class MACAddress(Transformer): DELIMITERS = re.compile(rb"[:-]") - def __init__(self, sep: Optional[str] = None) -> None: + def __init__(self, sep=None) -> None: """ Initialize the MACAddress transformer. @@ -132,7 +131,7 @@ def __init__(self, sep: Optional[str] = None) -> None: super().__init__(Bytes(6)) self.sep = sep or ":" - def encode(self, obj: Union[str, bytes], context: _ContextLike) -> Any: + def encode(self, obj, context): """ Encode a MAC address. @@ -147,7 +146,7 @@ def encode(self, obj: Union[str, bytes], context: _ContextLike) -> Any: mac = re.sub(MACAddress.DELIMITERS, b"", obj) return binascii.unhexlify(mac) - def decode(self, parsed: bytes, context: _ContextLike) -> bytes: + def decode(self, parsed: bytes, context) -> bytes: """ Decode an encoded MAC address. diff --git a/src/caterpillar/fields/net.pyi b/src/caterpillar/fields/net.pyi new file mode 100755 index 00000000..13494122 --- /dev/null +++ b/src/caterpillar/fields/net.pyi @@ -0,0 +1,37 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from re import Pattern +from ipaddress import IPv4Address, IPv6Address + +from ._base import singleton as singleton +from .common import ( + Bytes as Bytes, + Transformer as Transformer, + UInt as UInt, + uint32 as uint32, +) +from caterpillar.abc import _ContextLike + +IPv4Address = Transformer[IPv4Address, int, IPv4Address, int] +IPv6Address = Transformer[IPv6Address, int, IPv6Address, int] + +class MACAddress(Transformer[str | bytes, bytes, bytes, bytes]): + DELIMITERS: Pattern + sep: str + def __init__(self, sep: str | None = None) -> None: ... + def encode(self, obj: str | bytes, context: _ContextLike) -> bytes: ... + def decode(self, parsed: bytes, context: _ContextLike) -> bytes: ... + +MAC: MACAddress diff --git a/src/caterpillar/fields/pointer.py b/src/caterpillar/fields/pointer.py old mode 100644 new mode 100755 index cbc14f6e..7cbde26a --- a/src/caterpillar/fields/pointer.py +++ b/src/caterpillar/fields/pointer.py @@ -15,12 +15,13 @@ from typing import Any, Union, Optional -from caterpillar.abc import _ContextLike, _StructLike, _ContextLambda, getstruct +from caterpillar.abc import _ContextLike, _StructLike, _ContextLambda from caterpillar.byteorder import Arch from caterpillar.exception import DelegationError, StructException from caterpillar.context import CTX_STREAM, CTX_FIELD, CTX_ARCH, CTX_SEQ from caterpillar.options import Flag from caterpillar._common import WithoutContextVar +from caterpillar.shared import getstruct from ._mixin import FieldStruct from .common import uint16, uint24, uint32, uint64, uint8 @@ -38,15 +39,13 @@ class pointer(int): :ivar Any obj: The associated object, if any. """ - obj: Optional[Any] - def __repr__(self) -> str: result = super().__repr__() if self.obj is not None: result = f"<{type(self.obj).__name__}* {hex(self)}>" return result - def get(self) -> Optional[Any]: + def get(self): return self.obj @@ -58,20 +57,17 @@ class Pointer(FieldStruct): :ivar struct: The configured struct to use. """ - model: Optional[_StructLike] - struct: Union[_StructLike, _ContextLambda] - __slots__ = ("model", "struct") def __init__( self, - struct: Union[_StructLike, _ContextLambda], - model: Optional[_StructLike] = None, + struct, + model=None, ) -> None: self.struct = struct self.model = getstruct(model, model) if model is not None else None - def __mul__(self, model: _StructLike) -> "Pointer": + def __mul__(self, model): """ Create a new Pointer with a specified model. @@ -82,7 +78,7 @@ def __mul__(self, model: _StructLike) -> "Pointer": return type(self)(self.struct, model) - def __type__(self) -> type: + def __type__(self): """ Get the type associated with the Pointer. @@ -91,7 +87,7 @@ def __type__(self) -> type: """ return pointer - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context): """ Get the size of the Pointer struct. @@ -104,7 +100,7 @@ def __size__(self, context: _ContextLike) -> int: struct = self.struct(context) return struct.__size__(context) - def unpack_single(self, context: _ContextLike) -> Union[int, pointer]: + def unpack_single(self, context): """ Unpack a single value using the Pointer struct. @@ -142,7 +138,7 @@ def unpack_single(self, context: _ContextLike) -> Union[int, pointer]: stream.seek(fallback) return self._create(value, start, model_obj, context) - def pack_single(self, obj: Any, context: _ContextLike) -> None: + def pack_single(self, obj, context) -> None: """ Pack a single value using the Pointer struct. @@ -156,7 +152,7 @@ def pack_single(self, obj: Any, context: _ContextLike) -> None: with WithoutContextVar(context, CTX_SEQ, False): struct.__pack__(int(obj), context) - def _to_offset(self, value: Any, start: int, context: _ContextLike) -> int: + def _to_offset(self, value, start: int, context) -> int: """ Convert the pointer value to an offset. @@ -168,7 +164,7 @@ def _to_offset(self, value: Any, start: int, context: _ContextLike) -> int: """ return value - def _clean(self, value: int, context: _ContextLike) -> Any: + def _clean(self, value: int, context) -> Any: """ Clean the pointer value. @@ -178,7 +174,7 @@ def _clean(self, value: int, context: _ContextLike) -> Any: """ return value - def _create(self, value: Any, start: int, model_obj: Any, context: _ContextLike): + def _create(self, value, start: int, model_obj, context): """ Create a new pointer object. @@ -197,7 +193,7 @@ def _create(self, value: Any, start: int, model_obj: Any, context: _ContextLike) SIGNED_POINTER_TYS = {x.__bits__: x for x in [int8, int16, int24, int32, int64]} -def uintptr_fn(context: _ContextLike) -> _StructLike: +def uintptr_fn(context): """ Generator function to decide which struct to use as the pointer type based on the current architecture. @@ -210,7 +206,7 @@ def uintptr_fn(context: _ContextLike) -> _StructLike: return UNSIGNED_POINTER_TYS.get(arch.ptr_size, UInt(arch.ptr_size)) -def intptr_fn(context: _ContextLike) -> _StructLike: +def intptr_fn(context): """ Generator function to decide which struct to use as the pointer type based on the current architecture. @@ -254,7 +250,7 @@ class RelativePointer(Pointer): A struct that represents a relative pointer to another struct within the stream. """ - def __type__(self) -> type: + def __type__(self): """ Get the type associated with the RelativePointer. @@ -266,7 +262,7 @@ def __type__(self) -> type: return relative_pointer - def _to_offset(self, value: Any, start: int, context: _ContextLike) -> int: + def _to_offset(self, value, start: int, context) -> int: """ Convert the relative pointer value to an offset. @@ -278,7 +274,7 @@ def _to_offset(self, value: Any, start: int, context: _ContextLike) -> int: """ return start + value - def _create(self, value: Any, start: int, model_obj: Any, context: _ContextLike): + def _create(self, value, start: int, model_obj, context): """ Create a new relative pointer object. diff --git a/src/caterpillar/fields/pointer.pyi b/src/caterpillar/fields/pointer.pyi new file mode 100755 index 00000000..a55ab16b --- /dev/null +++ b/src/caterpillar/fields/pointer.pyi @@ -0,0 +1,62 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from ._mixin import FieldStruct +from caterpillar.abc import _ContextLambda, _ContextLike, _StructLike +from caterpillar.options import Flag +from typing import Dict, Generic, Optional, Type, TypeVar + +PTR_STRICT: Flag[None] + +_PtrValueT = TypeVar("_PtrValueT") + +class pointer(Generic[_PtrValueT], int): + obj: Optional[_PtrValueT] + def get(self) -> _PtrValueT | None: ... + +class Pointer(FieldStruct[int, pointer[_PtrValueT]]): + model: _StructLike[_PtrValueT, _PtrValueT] | None + struct: _StructLike[int, int] | _ContextLambda[_StructLike[int, int]] + def __init__( + self, + struct: _StructLike[int, int] | _ContextLambda[_StructLike[int, int]], + model: _StructLike[_PtrValueT, _PtrValueT] | None = None, + ) -> None: ... + def __mul__( + self, model: _StructLike[_PtrValueT, _PtrValueT] + ) -> Pointer[_PtrValueT]: ... + def __type__(self) -> Type[pointer[_PtrValueT]]: ... + def __size__(self, context: _ContextLike) -> int: ... + def unpack_single(self, context: _ContextLike) -> pointer[_PtrValueT]: ... + def pack_single(self, obj: int, context: _ContextLike) -> None: ... + +UNSIGNED_POINTER_TYS: Dict +SIGNED_POINTER_TYS: Dict + +def uintptr_fn(context: _ContextLike) -> _StructLike[int, int]: ... +def intptr_fn(context: _ContextLike) -> _StructLike[int, int]: ... + +uintptr: Pointer[None] +intptr: Pointer[None] + +class relative_pointer(pointer[_PtrValueT]): + base: int + @property + def absolute(self) -> int: ... + +class RelativePointer(Pointer[_PtrValueT]): + def __type__(self) -> relative_pointer[_PtrValueT]: ... + +offintptr: RelativePointer[None] +offuintptr: RelativePointer[None] diff --git a/src/caterpillar/fields/varint.py b/src/caterpillar/fields/varint.py old mode 100644 new mode 100755 index 38d36d01..46bc8c8b --- a/src/caterpillar/fields/varint.py +++ b/src/caterpillar/fields/varint.py @@ -14,9 +14,6 @@ # along with this program. If not, see . from __future__ import annotations -from typing import Any - -from caterpillar.abc import _StreamType, _ContextLike from caterpillar.exception import InvalidValueError, DynamicSizeError, StreamError from caterpillar.byteorder import LittleEndian from caterpillar.context import CTX_FIELD, CTX_STREAM @@ -54,10 +51,10 @@ class VarInt(FieldStruct): def __type__(self) -> type: return int - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: raise DynamicSizeError("VarInt has dynamic size!") - def bit_config(self, context: _ContextLike) -> tuple: + def bit_config(self, context) -> tuple: high_bit = 1 << 7 low_bit = 0 if context[CTX_FIELD].has_flag(VARINT_LSB): @@ -65,7 +62,7 @@ def bit_config(self, context: _ContextLike) -> tuple: low_bit = 1 << 7 return high_bit, low_bit - def pack_single(self, obj: int, context: _ContextLike) -> None: + def pack_single(self, obj: int, context) -> None: """ Pack a single value into the stream. @@ -78,7 +75,7 @@ def pack_single(self, obj: int, context: _ContextLike) -> None: if obj < 0: raise InvalidValueError("Invalid negative value for VarInt encoding!") - stream: _StreamType = context[CTX_STREAM] + stream = context[CTX_STREAM] order = context[CTX_FIELD].order is_little = order == LittleEndian @@ -101,7 +98,7 @@ def pack_single(self, obj: int, context: _ContextLike) -> None: # Just write all bytes to the stream stream.write(bytes(data)) - def unpack_single(self, context: _ContextLike) -> Any: + def unpack_single(self, context): """ Unpack a single value from the stream. @@ -109,7 +106,7 @@ def unpack_single(self, context: _ContextLike) -> Any: :param context: The current context. :return: The unpacked value. """ - stream: _StreamType = context[CTX_STREAM] + stream = context[CTX_STREAM] data = [] _, lb = self.bit_config(context) shift = 0 diff --git a/src/caterpillar/fields/varint.pyi b/src/caterpillar/fields/varint.pyi new file mode 100755 index 00000000..b3386145 --- /dev/null +++ b/src/caterpillar/fields/varint.pyi @@ -0,0 +1,15 @@ +from ._mixin import FieldStruct +from caterpillar.abc import _ContextLike +from caterpillar.options import Flag +from typing import Tuple, Type + +VARINT_LSB: Flag[None] + +class VarInt(FieldStruct[int, int]): + def __type__(self) -> Type[int]: ... + def __size__(self, context: _ContextLike) -> int: ... + def bit_config(self, context: _ContextLike) -> Tuple[int, int]: ... + def pack_single(self, obj: int, context: _ContextLike) -> None: ... + def unpack_single(self, context: _ContextLike) -> int: ... + +vint: VarInt From 66848a4044f0b8cf6c0a90198e76e55bf837dfda Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Wed, 25 Jun 2025 11:45:22 +0200 Subject: [PATCH 16/41] Updated imports and removed unnecessary imports --- src/caterpillar/__init__.py | 6 +- src/caterpillar/byteorder.pyi | 2 - src/caterpillar/fields/__init__.py | 158 +++++++++++++++++++++++++++- src/caterpillar/fields/common.py | 6 +- src/caterpillar/fields/common.pyi | 2 +- src/caterpillar/fields/net.py | 3 - src/caterpillar/fields/net.pyi | 7 +- src/caterpillar/model/__init__.py | 23 +++++ src/caterpillar/model/__init__.pyi | 29 ------ src/caterpillar/model/_base.py | 5 +- src/caterpillar/py.py | 159 ++++++++++++++++++++++++----- test/_Py/fields/test_py_digest.py | 1 - test/_Py/fields/test_py_memory.py | 2 +- 13 files changed, 327 insertions(+), 76 deletions(-) delete mode 100755 src/caterpillar/model/__init__.pyi diff --git a/src/caterpillar/__init__.py b/src/caterpillar/__init__.py index 546e6d8b..7247e54e 100644 --- a/src/caterpillar/__init__.py +++ b/src/caterpillar/__init__.py @@ -19,7 +19,7 @@ __author__ = "MatrixEditor" -def native_support(): +def native_support() -> bool: """Return True if native support is available.""" try: # pylint: disable-next=import-outside-toplevel @@ -31,4 +31,6 @@ def native_support(): # Explicitly report deprecation warnings -warnings.filterwarnings("default", module="caterpillar") \ No newline at end of file +warnings.filterwarnings("default", module="caterpillar") + +__all__ = ["__version__", "__author__", "native_support"] diff --git a/src/caterpillar/byteorder.pyi b/src/caterpillar/byteorder.pyi index 8851d09e..e4babc47 100755 --- a/src/caterpillar/byteorder.pyi +++ b/src/caterpillar/byteorder.pyi @@ -15,8 +15,6 @@ from enum import Enum from typing import Optional -BYTEORDER_FIELD: str = ... - class ByteOrder: name: str ch: str diff --git a/src/caterpillar/fields/__init__.py b/src/caterpillar/fields/__init__.py index 887b5204..87236985 100644 --- a/src/caterpillar/fields/__init__.py +++ b/src/caterpillar/fields/__init__.py @@ -13,7 +13,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . from ._base import Field, INVALID_DEFAULT, DEFAULT_OPTION, singleton -from ._mixin import FieldMixin, FieldStruct, Chain, Operator +from ._mixin import FieldMixin, FieldStruct, Chain, Operator, get_args, get_kwargs from .common import ( PyStructFormattedField, Transformer, @@ -54,6 +54,7 @@ Aligned, align, Lazy, + ENUM_STRICT, ) from .varint import VarInt, VARINT_LSB, vint from .compression import ( @@ -63,9 +64,21 @@ LZMACompressed, LZOCompressed, ) -from .crypto import Encrypted, Xor, Or, And -from .net import IPv4Address, IPv6Address, MAC, MACAddress -from .pointer import uintptr, intptr, offintptr, offuintptr, Pointer +from .crypto import Encrypted, Xor, Or, And, KeyCipher +from .net import MAC, MACAddress, IPv4Address, IPv6Address +from .pointer import ( + uintptr, + intptr, + offintptr, + offuintptr, + Pointer, + pointer, + intptr_fn, + PTR_STRICT, + relative_pointer, + RelativePointer, + uintptr_fn, +) from .conditional import ConditionalChain, If, Else, ElseIf from .hook import IOHook from .digest import ( @@ -110,4 +123,141 @@ Crc32_Field, Adler_Algo, Adler_Field, + CTX_DIGEST, + CTX_DIGEST_ALGO, + CTX_DIGEST_HOOK, + CTX_DIGEST_OBJ, + HMACAlgorithm, ) + +__all__ = [ + "Digest", + "Algorithm", + "Md5", + "Sha1", + "Sha2_256", + "Sha2_224", + "Sha2_384", + "Sha2_512", + "Sha3_224", + "Sha3_256", + "Sha3_384", + "Sha3_512", + "Crc32", + "Adler", + "HMAC", + "DigestField", + "DigestFieldAction", + "Md5_Algo", + "Md5_Field", + "Sha1_Algo", + "Sha1_Field", + "Sha2_256_Algo", + "Sha2_256_Field", + "Sha2_224_Algo", + "Sha2_224_Field", + "Sha2_384_Algo", + "Sha2_384_Field", + "Sha2_512_Algo", + "Sha2_512_Field", + "Sha3_224_Algo", + "Sha3_224_Field", + "Sha3_256_Algo", + "Sha3_256_Field", + "Sha3_384_Algo", + "Sha3_384_Field", + "Sha3_512_Algo", + "Sha3_512_Field", + "Crc32_Algo", + "Crc32_Field", + "Adler_Algo", + "Adler_Field", + "CTX_DIGEST", + "CTX_DIGEST_ALGO", + "CTX_DIGEST_HOOK", + "CTX_DIGEST_OBJ", + "HMACAlgorithm", + "uintptr", + "intptr", + "offintptr", + "offuintptr", + "Pointer", + "pointer", + "intptr_fn", + "PTR_STRICT", + "relative_pointer", + "RelativePointer", + "uintptr_fn", + "Compressed", + "ZLibCompressed", + "Bz2Compressed", + "LZMACompressed", + "LZOCompressed", + "PyStructFormattedField", + "Transformer", + "Const", + "ConstBytes", + "ConstString", + "Enum", + "String", + "Bytes", + "Memory", + "Computed", + "Pass", + "CString", + "Prefixed", + "Int", + "UInt", + "padding", + "char", + "boolean", + "int8", + "uint8", + "int16", + "uint16", + "int24", + "uint24", + "int32", + "uint32", + "int64", + "uint64", + "ssize_t", + "size_t", + "float16", + "float32", + "float64", + "double", + "void_ptr", + "Uuid", + "Aligned", + "align", + "Lazy", + "ENUM_STRICT", + "Field", + "INVALID_DEFAULT", + "DEFAULT_OPTION", + "singleton", + "FieldMixin", + "FieldStruct", + "Chain", + "Operator", + "get_args", + "get_kwargs", + "VarInt", + "VARINT_LSB", + "vint", + "Encrypted", + "Xor", + "Or", + "And", + "KeyCipher", + "MAC", + "MACAddress", + "IPv4Address", + "IPv6Address", + "ConditionalChain", + "If", + "Else", + "ElseIf", + "IOHook", +] diff --git a/src/caterpillar/fields/common.py b/src/caterpillar/fields/common.py index a405844a..44a60470 100755 --- a/src/caterpillar/fields/common.py +++ b/src/caterpillar/fields/common.py @@ -16,15 +16,14 @@ import warnings from io import BytesIO -from typing import Sequence, Any, Optional, Union, List, Callable -from types import EllipsisType, NoneType +from typing import Any, Union +from types import NoneType from functools import cached_property from enum import Enum as _EnumType from uuid import UUID from caterpillar.abc import ( _StructLike, - _ContextLambda, _StreamType, _ContextLike, ) @@ -39,7 +38,6 @@ from caterpillar import registry from caterpillar._common import WithoutContextVar from caterpillar.shared import getstruct -from caterpillar.model import getbits from ._base import Field, INVALID_DEFAULT, singleton from ._mixin import FieldStruct diff --git a/src/caterpillar/fields/common.pyi b/src/caterpillar/fields/common.pyi index bc15a075..0f044eab 100755 --- a/src/caterpillar/fields/common.pyi +++ b/src/caterpillar/fields/common.pyi @@ -104,7 +104,7 @@ class Const(Transformer[_IT, _IT, _IT, _IT]): _EnumT = TypeVar("_EnumT") -class Enum(Generic[_EnumT, _IT], Transformer[_EnumT, _IT, _EnumT, _IT]): +class Enum(Generic[_EnumT, _IT], Transformer[_EnumT, _IT, Union[_EnumT, _IT], _IT]): model: Type[_EnumT] default: _EnumT def __init__( diff --git a/src/caterpillar/fields/net.py b/src/caterpillar/fields/net.py index 45042d56..0d61d9d0 100755 --- a/src/caterpillar/fields/net.py +++ b/src/caterpillar/fields/net.py @@ -16,9 +16,6 @@ import binascii import re -from typing import Union, Any, Optional - - from .common import Transformer, uint32, UInt, Bytes from ._base import singleton diff --git a/src/caterpillar/fields/net.pyi b/src/caterpillar/fields/net.pyi index 13494122..666ac7a4 100755 --- a/src/caterpillar/fields/net.pyi +++ b/src/caterpillar/fields/net.pyi @@ -12,8 +12,9 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . +import ipaddress + from re import Pattern -from ipaddress import IPv4Address, IPv6Address from ._base import singleton as singleton from .common import ( @@ -24,8 +25,8 @@ from .common import ( ) from caterpillar.abc import _ContextLike -IPv4Address = Transformer[IPv4Address, int, IPv4Address, int] -IPv6Address = Transformer[IPv6Address, int, IPv6Address, int] +IPv4Address: Transformer[ipaddress.IPv4Address, int, ipaddress.IPv4Address, int] +IPv6Address: Transformer[ipaddress.IPv6Address, int, ipaddress.IPv6Address, int] class MACAddress(Transformer[str | bytes, bytes, bytes, bytes]): DELIMITERS: Pattern diff --git a/src/caterpillar/model/__init__.py b/src/caterpillar/model/__init__.py index 890d40ed..4f71136c 100644 --- a/src/caterpillar/model/__init__.py +++ b/src/caterpillar/model/__init__.py @@ -27,3 +27,26 @@ ) from ._bitfield import BitField, bitfield, BitFieldGroup, issigned, getbits from ._template import istemplate, template, TemplateTypeVar, derive + +__all__ = [ + "Sequence", + "Struct", + "struct", + "UnionHook", + "union", + "unpack", + "unpack_file", + "pack", + "pack_into", + "pack_file", + "sizeof", + "BitField", + "bitfield", + "BitFieldGroup", + "issigned", + "getbits", + "istemplate", + "template", + "TemplateTypeVar", + "derive", +] diff --git a/src/caterpillar/model/__init__.pyi b/src/caterpillar/model/__init__.pyi deleted file mode 100755 index 890d40ed..00000000 --- a/src/caterpillar/model/__init__.pyi +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (C) MatrixEditor 2023-2025 -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . -from ._base import Sequence -from ._struct import ( - Struct, - struct, - UnionHook, - union, - unpack, - unpack_file, - pack, - pack_into, - pack_file, - sizeof, -) -from ._bitfield import BitField, bitfield, BitFieldGroup, issigned, getbits -from ._template import istemplate, template, TemplateTypeVar, derive diff --git a/src/caterpillar/model/_base.py b/src/caterpillar/model/_base.py index 703fa823..c88d1c75 100755 --- a/src/caterpillar/model/_base.py +++ b/src/caterpillar/model/_base.py @@ -25,7 +25,6 @@ CTX_SEQ, ) from caterpillar.byteorder import ( - BYTEORDER_FIELD, ByteOrder, SysNative, Arch, @@ -46,7 +45,7 @@ Const, ) from caterpillar._common import unpack_seq, pack_seq -from caterpillar.shared import ATTR_ACTION_PACK, ATTR_ACTION_UNPACK, Action +from caterpillar.shared import ATTR_ACTION_PACK, ATTR_ACTION_UNPACK, Action, ATTR_BYTEORDER from caterpillar import registry @@ -265,7 +264,7 @@ def _process_field(self, name: str, annotation, default) -> Field: field = None struct = None - order = getattr(annotation, BYTEORDER_FIELD, self.order or SysNative) + order = getattr(annotation, ATTR_BYTEORDER, self.order or SysNative) arch = self.arch or system_arch result = self._process_annotation(annotation, default, order, arch) if isinstance(result, Field): diff --git a/src/caterpillar/py.py b/src/caterpillar/py.py index b7426578..b0168cbd 100644 --- a/src/caterpillar/py.py +++ b/src/caterpillar/py.py @@ -1,24 +1,19 @@ -# use this module to import everything Python related +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . -from .abc import ( - _ContextLike, - _StructLike, - _ContextLambda, - _Switch, - _SupportsUnpack, - _SupportsSize, - _SupportsPack, - _ContainsStruct, - getstruct, - hasstruct, - typeof, - _EnumLike, - _GreedyType, - _PrefixedType, - _StreamFactory, - _StreamType, - STRUCT_FIELD, -) +# use this module to import everything Python related from .registry import TypeConverter, annotation_registry, to_struct from .byteorder import ( ByteOrder, @@ -44,7 +39,6 @@ x86_64, AMD, AMD64, - BYTEORDER_FIELD, ) from .context import ( Context, @@ -67,6 +61,7 @@ CTX_PATH, CTX_SEQ, CTX_ARCH, + ExprMixin, ) from .exception import ( StructException, @@ -105,5 +100,123 @@ has_flag, O_ARRAY_FACTORY, ) -from ._common import WithoutContextVar -from .shared import ATTR_ACTION_PACK, ATTR_STRUCT, Action \ No newline at end of file +from ._common import WithoutContextVar, iseof, pack_seq, unpack_seq +from .shared import ( + ATTR_ACTION_PACK, + ATTR_STRUCT, + Action, + ATTR_ACTION_UNPACK, + ATTR_BITS, + ATTR_BYTEORDER, + ATTR_SIGNED, + ATTR_TEMPLATE, + ATTR_TYPE, + getstruct, + hasstruct, + MODE_PACK, + MODE_UNPACK, + typeof, +) + +from .model import __all__ as model_all +from .fields import __all__ as fields_all + +# pyright: reportUnsupportedDunderAll=false +__all__ = model_all + fields_all + [ + "ExprMixin", + "WithoutContextVar", + "AARCH64", + "AMD", + "AMD64", + "ARM", + "ARM64", + "Arch", + "BigEndian", + "ByteOrder", + "LittleEndian", + "MIPS", + "MIPS64", + "Native", + "NetEndian", + "PowerPC", + "PowerPC64", + "RISC_V", + "RISC_V64", + "SPARC", + "SPARC64", + "SysNative", + "system_arch", + "x86", + "x86_64", + "BinaryExpression", + "CTX_ARCH", + "CTX_FIELD", + "CTX_INDEX", + "CTX_OBJECT", + "CTX_OFFSETS", + "CTX_PARENT", + "CTX_PATH", + "CTX_POS", + "CTX_SEQ", + "CTX_STREAM", + "CTX_VALUE", + "ConditionContext", + "Context", + "ContextLength", + "ContextPath", + "UnaryExpression", + "ctx", + "parent", + "this", + "DelegationError", + "DynamicSizeError", + "InvalidValueError", + "OptionError", + "Stop", + "StreamError", + "StructException", + "ValidationError", + "F_DYNAMIC", + "F_KEEP_POSITION", + "F_OFFSET_OVERRIDE", + "F_SEQUENTIAL", + "Flag", + "GLOBAL_BITFIELD_FLAGS", + "GLOBAL_FIELD_FLAGS", + "GLOBAL_STRUCT_OPTIONS", + "GLOBAL_UNION_OPTIONS", + "O_ARRAY_FACTORY", + "S_ADD_BYTES", + "S_DISCARD_CONST", + "S_DISCARD_UNNAMED", + "S_EVAL_ANNOTATIONS", + "S_REPLACE_TYPES", + "S_SLOTS", + "S_UNION", + "get_flag", + "get_flags", + "has_flag", + "set_field_flags", + "set_struct_flags", + "set_union_flags", + "TypeConverter", + "annotation_registry", + "to_struct", + "ATTR_ACTION_PACK", + "ATTR_STRUCT", + "Action", + "iseof", + "pack_seq", + "unpack_seq", + "ATTR_ACTION_UNPACK", + "ATTR_BITS", + "ATTR_BYTEORDER", + "ATTR_SIGNED", + "ATTR_TEMPLATE", + "ATTR_TYPE", + "getstruct", + "hasstruct", + "MODE_PACK", + "MODE_UNPACK", + "typeof", +] diff --git a/test/_Py/fields/test_py_digest.py b/test/_Py/fields/test_py_digest.py index a679b91d..49cf91c7 100644 --- a/test/_Py/fields/test_py_digest.py +++ b/test/_Py/fields/test_py_digest.py @@ -9,7 +9,6 @@ Md5, Md5_Field, Sha2_256, - _DigestValue, Md5_Algo, Sha2_256_Algo, Sha2_256_Field, diff --git a/test/_Py/fields/test_py_memory.py b/test/_Py/fields/test_py_memory.py index c8a8913a..f6dcb523 100644 --- a/test/_Py/fields/test_py_memory.py +++ b/test/_Py/fields/test_py_memory.py @@ -18,7 +18,7 @@ def test_memory_length(): with pytest.raises(ValidationError): pack(b"12345678901", memory, as_field=True) - dyn_memory = Memory(lambda ctx: ctx._root.length) + dyn_memory = Memory(lambda context: context._root.length) assert len(unpack(dyn_memory, b"123456789011", as_field=True, length=10)) == 10 greedy_memory = Memory(...) From 8e2510ca142a3bd05b6da3d28b20901f41801d2d Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Wed, 25 Jun 2025 17:21:49 +0200 Subject: [PATCH 17/41] Added stubs for _C module and c.py --- abc.py: + Removed __type__() requirement from '_StructLIke' as described in the docs + Added new protocol: _SupportsType --- docs/sphinx/source/library/abc.rst | 16 +- docs/sphinx/source/library/byteorder.rst | 2 - src/caterpillar/_C.pyi | 615 +++++++++++++++-------- src/caterpillar/abc.py | 3 + src/caterpillar/c.pyi | 28 ++ src/caterpillar/context.pyi | 3 - src/caterpillar/shortcuts.py | 37 ++ src/caterpillar/shortcuts.pyi | 39 +- test/_C/test_context.py | 2 +- test/_C/test_parsing.py | 2 +- 10 files changed, 508 insertions(+), 239 deletions(-) create mode 100755 src/caterpillar/c.pyi diff --git a/docs/sphinx/source/library/abc.rst b/docs/sphinx/source/library/abc.rst index 96a08f13..f80ac8d8 100644 --- a/docs/sphinx/source/library/abc.rst +++ b/docs/sphinx/source/library/abc.rst @@ -35,21 +35,7 @@ Context Other base classes ------------------ -.. autoclass:: caterpillar.abc._EnumLike +.. autoclass:: caterpillar.abc._SwitchLike :members: -.. autoclass:: caterpillar.abc._Switch - :members: - - -Standard interface ------------------- - -.. autoattribute:: caterpillar.abc.STRUCT_FIELD - -.. autofunction:: caterpillar.abc.hasstruct - -.. autofunction:: caterpillar.abc.getstruct - -.. autofunction:: caterpillar.abc.typeof diff --git a/docs/sphinx/source/library/byteorder.rst b/docs/sphinx/source/library/byteorder.rst index 6f9aaee1..e2da36ab 100644 --- a/docs/sphinx/source/library/byteorder.rst +++ b/docs/sphinx/source/library/byteorder.rst @@ -10,8 +10,6 @@ Byteorder .. autoclass:: caterpillar.byteorder.ByteOrder :members: -.. autoattribute:: caterpillar.byteorder.BYTEORDER_FIELD - .. autoattribute:: caterpillar.byteorder.Native .. autoattribute:: caterpillar.byteorder.BigEndian diff --git a/src/caterpillar/_C.pyi b/src/caterpillar/_C.pyi index 813ee8c8..4edd2803 100644 --- a/src/caterpillar/_C.pyi +++ b/src/caterpillar/_C.pyi @@ -1,11 +1,32 @@ -from __future__ import annotations - -from typing import Any, Optional, Collection, Union, Callable, IO, TypeVar -from enum import EnumType - -_Length = Union[int, ContextLambda, slice, Ellipsis] -ContextLambda = Callable[[Context], Any] -_ConstType = Union[Any, ContextLambda] +from _typeshed import Incomplete +from types import NotImplementedType +from typing import ( + IO, + Any, + Callable, + Collection, + Dict, + Generic, + Optional, + Protocol, + Type, + TypeVar, + Union, +) + +from caterpillar.abc import ( + _IT, + _OT, + _ContextLike, + _LengthT, + _ContextLambdaReturnT_co, + _ContextLambda, +) + +class _CContextLambda(Protocol[_ContextLambdaReturnT_co]): + def __call__(self, layer: layer) -> _ContextLambdaReturnT_co: ... + +_CTypeDesc = Union[str, Type[_OT], NotImplementedType] BIG_ENDIAN: Endian DefaultOption: DefaultOptionType @@ -23,6 +44,7 @@ S_EVAL_ANNOTATIONS: Option S_REPLACE_TYPES: Option S_SLOTS: Option S_UNION: Option +TYPE_MAP: dict boolean: Bool char: Char f16: Float @@ -44,34 +66,35 @@ u64: Int u8: Int varint: VarInt -class Context(dict): - def __init__(self, /, **kwargs) -> None: ... - def __context_getattr__(self, *args, **kwargs): ... - class Arch: name: str ptr_size: int def __init__(self, name: str, ptr_size: int) -> None: ... + def __eq__(self, other: object) -> bool: ... + def __ge__(self, other: object) -> bool: ... + def __gt__(self, other: object) -> bool: ... def __hash__(self) -> int: ... + def __le__(self, other: object) -> bool: ... + def __lt__(self, other: object) -> bool: ... + def __ne__(self, other: object) -> bool: ... -class binaryexpr: - expr: int - lhs: Union[ContextLambda, Any] - rhs: Union[ContextLambda, Any] - def __init__( - self, - expr: int, - left: Union[ContextLambda, Any], - right: Union[ContextLambda, Any], - ) -> None: ... - def __call__(self, *args, **kwargs) -> Any: ... +class Bool(builtinatom[bool, bool]): + def __init__(self) -> None: ... + +class Char(builtinatom[str, str]): + def __init__(self) -> None: ... -class ContextPath: +class Context(dict, _ContextLike): + def __init__(self, *args, **kwargs) -> None: ... + def __context_getattr__(self, *args, **kwargs): ... + def __getattribute__(self, name: str, /) -> Any: ... + +class ContextPath(_ContextLambda): path: str def __init__(self, path: str) -> None: ... def __add__(self, other) -> binaryexpr: ... def __and__(self, other) -> binaryexpr: ... - def __call__(self, context: Context) -> Any: ... + def __call__(self, context: _ContextLike): ... def __eq__(self, other: object) -> binaryexpr: ... def __floordiv__(self, other) -> binaryexpr: ... def __ge__(self, other: object) -> binaryexpr: ... @@ -81,23 +104,43 @@ class ContextPath: def __le__(self, other: object) -> binaryexpr: ... def __lshift__(self, other) -> binaryexpr: ... def __lt__(self, other: object) -> binaryexpr: ... + def __matmul__(self, *args, **kwargs) -> binaryexpr: ... def __mod__(self, other) -> binaryexpr: ... def __mul__(self, other) -> binaryexpr: ... def __ne__(self, other: object) -> binaryexpr: ... def __neg__(self) -> unaryexpr: ... def __or__(self, other) -> binaryexpr: ... - def __sub__(self, other) -> unaryexpr: ... + def __pos__(self) -> unaryexpr: ... + def __pow__(self, other) -> binaryexpr: ... + def __radd__(self, other) -> binaryexpr: ... + def __rand__(self, other) -> binaryexpr: ... + def __rfloordiv__(self, other) -> binaryexpr: ... + def __rlshift__(self, other) -> binaryexpr: ... + def __rmatmul__(self, *args, **kwargs) -> binaryexpr: ... + def __rmod__(self, other) -> binaryexpr: ... + def __rmul__(self, other) -> binaryexpr: ... + def __ror__(self, other) -> binaryexpr: ... + def __rpow__(self, other) -> binaryexpr: ... + def __rrshift__(self, other) -> binaryexpr: ... + def __rshift__(self, other) -> binaryexpr: ... + def __rsub__(self, other) -> binaryexpr: ... + def __rtruediv__(self, other) -> binaryexpr: ... + def __rxor__(self, other) -> binaryexpr: ... + def __size__(self, layer: layer) -> int: ... + def __sub__(self, other) -> binaryexpr: ... def __truediv__(self, other) -> binaryexpr: ... - def __type__(self) -> type: ... - def __xor__(self, other) -> binaryexpr: ... + def __type__(self, layer: layer) -> _CTypeDesc: ... + def __xor__(self, other): ... class DefaultOptionType: - def __init__(self) -> None: ... + @classmethod + def __init__(cls, *args, **kwargs) -> None: ... class Endian: ch: str name: str def __init__(self, ch: str, name: str) -> None: ... + def __add__(self, other): ... def __eq__(self, other: object) -> bool: ... def __ge__(self, other: object) -> bool: ... def __gt__(self, other: object) -> bool: ... @@ -105,69 +148,35 @@ class Endian: def __le__(self, other: object) -> bool: ... def __lt__(self, other: object) -> bool: ... def __ne__(self, other: object) -> bool: ... + def __radd__(self, other): ... -class atom: - def __init__(self) -> None: ... - def __pack__(self, obj: Any, context: layer) -> None: ... - def __size__(self, context: layer) -> Any: ... - def __type__(self) -> Any: ... - def __unpack__(self, context: layer) -> Any: ... - -class catom(atom): - def __init__(self) -> None: ... - def __pack__(self, obj: Any, context: layer) -> None: ... - def __pack_many__(self, obj: Collection[Any], layer: layer) -> None: ... - def __size__(self, context: layer) -> Any: ... - def __type__(self) -> Any: ... - def __unpack__(self, context: layer) -> Any: ... - def __unpack_many__(self, context: layer, lengthinfo) -> Collection[Any]: ... - -class builtinatom(catom): - def __init__(self, *args, **kwargs) -> None: ... - def __floordiv__(self, other) -> condition: ... - def __getitem__(self, index) -> repeated: ... - def __matmul__(self, *args, **kwargs) -> atoffset: ... - def __rfloordiv__(self, other) -> condition: ... - def __rmatmul__(self, *args, **kwargs) -> atoffset: ... - def __rrshift__(self, other) -> switch: ... - def __rshift__(self, other) -> switch: ... - -class Char(builtinatom): - def __init__(self, *args, **kwargs) -> None: ... - -class condition(builtinatom): - atom: Any - condition: Union[bool, ContextLambda] - def __init__(self, condition: Union[bool, ContextLambda], atom: Any) -> None: ... - def is_enabled(self, context: layer) -> bool: ... - def __set_byteorder__(self, byteorder: Endian) -> condition: ... - -class const(builtinatom): - def __init__(self, *args, **kwargs) -> None: ... +class Float(builtinatom[float, float]): + little_endian: bool + nbits: int + nbytes: int + def __init__(self, nbits: int, little_endian: bool = ...) -> None: ... + def __set_byteorder__(self, endian: Endian) -> Float: ... -class builtinatom(catom): - def __init__(self) -> None: ... - def __add__(self, endian: Endian) -> Field: ... - def __floordiv__(self, condition: Union[bool, ContextLambda]) -> Field: ... - def __getitem__(self, length: _Length) -> Field: ... - def __matmul__(self, offset: Union[ContextLambda, int]) -> Field: ... - def __or__(self, option: Option) -> Field: ... - def __radd__(self, endian: Endian) -> Field: ... - def __rfloordiv__(self, condition: Union[bool, ContextLambda]) -> Field: ... - def __rmatmul__(self, offset: Union[ContextLambda, int]) -> Field: ... - def __ror__(self, option: Option) -> Field: ... - def __rrshift__(self, switch: Union[dict, ContextLambda]) -> Field: ... - def __rshift__(self, switch: Union[dict, ContextLambda]) -> Field: ... - def __rxor__(self, option: Option) -> Field: ... - def __xor__(self, option: Option) -> Field: ... +class Int(builtinatom[int, int]): + little_endian: bool + nbits: int + nbytes: int + signed: bool + def __init__( + self, nbits: int, signed: bool = ..., little_endian: bool = ... + ) -> None: ... + def __set_byteorder__(self, endian: Endian) -> Int: ... class InvalidDefaultType: - def __init__(self) -> None: ... + @classmethod + def __init__(cls, *args, **kwargs) -> None: ... + +_OptionValueT = TypeVar("_OptionValueT", default=Any) -class Option: +class Option(Generic[_OptionValueT]): name: str - value: Optional[Any] - def __init__(self, name: str, value: Optional[Any] = ...) -> None: ... + value: Optional[_OptionValueT] + def __init__(self, name: str, value: Optional[_OptionValueT] = ...) -> None: ... def __eq__(self, other: object) -> bool: ... def __ge__(self, other: object) -> bool: ... def __gt__(self, other: object) -> bool: ... @@ -176,182 +185,199 @@ class Option: def __lt__(self, other: object) -> bool: ... def __ne__(self, other: object) -> bool: ... +class Padding(builtinatom[None, None]): + def __init__(self, pad: str) -> None: ... + class State: globals: Context io: IO offset_table: dict - def __init__(self, io: IO, **globals) -> None: ... + def __init__(self, io: Optional[IO] = ..., **globals) -> None: ... def read(self, __n: int) -> bytes: ... def seek(self, __offset: int, __whence: int = ...) -> None: ... def tell(self) -> int: ... def write(self, __data: bytes) -> None: ... -class fieldinfo: - excluded: bool - default: Any - field: atom - name: str - def __init__(self, field: atom, excluded: bool = ...) -> None: ... - -class lengthinfo: - length: int - greedy: bool - def __init__(self, length: int = ..., greedy: bool = ...) -> None: ... +_ModelT = TypeVar("_ModelT") -class Struct(builtinatom): +class Struct(Generic[_ModelT], builtinatom[_ModelT, _ModelT]): members: dict[str, fieldinfo] - model: type + model: Type[_ModelT] options: set[Option] def __init__( self, - model: type, + model: Type[_ModelT], options: set[Option] = ..., endian: Endian = ..., field_options: set[Option] = ..., alter_model: bool = ..., ) -> None: ... -class unaryexpr: - expr: int - value: Union[ContextLambda, Any] - def __init__(self, expr: int, value: Union[ContextLambda, Any]) -> Any: ... - def __call__(self, *args, **kwargs): ... - def __hash__(self) -> int: ... - -class layer: - field: Field - greedy: bool - index: int - length: int - obj: Context - parent: layer - path: str - sequence: Collection - sequential: bool - state: State - value: Any - def __init__( - self, - state: State, - field: Field = ..., - obj: Context = ..., - value: Any = ..., - path: str = ..., - sequence: Collection = ..., - parent: layer = ..., - ) -> None: ... - def __context_getattr__(self, path: str) -> Any: ... - -def typeof(obj: atom): ... -def pack_into(__obj: Any, __struct: atom, __io: IO, **globals) -> None: ... -def pack(__obj: Any, __struct: atom, **globals) -> bytes: ... -def sizeof(obj: atom, globals: Optional[dict | Context] = ...): ... -def unpack(__io: Any, __struct: atom, **globals) -> Any: ... - -class Int(builtinatom): - little_endian: bool - nbits: int - nbytes: int - signed: bool - def __init__( - self, nbits: int, signed: bool = ..., little_endian: bool = ... - ) -> None: ... - -class Float(builtinatom): - little_endian: bool - nbits: int - nbytes: int - def __init__(self, nbits: int, little_endian: bool = ...) -> None: ... - -class Padding(builtinatom): - def __init__(self, pad: int) -> None: ... - -class string(builtinatom): - encoding: str - errors: str - length: _Length - def __init__(self, length: _Length, encoding: str, errors: str = ...) -> None: ... +class VarInt(builtinatom[int, int]): + little_endian: Incomplete + lsb: Incomplete + def __init__(self, little_endian: bool = ..., lsb: bool = ...) -> None: ... + def __set_byteorder__(self, endian: Endian) -> VarInt: ... -class atoffset(builtinatom): - offset: Union[int, ContextLambda] +class atoffset(builtinatom[_IT, _OT]): + offset: Union[int, _CContextLambda[int]] whence: int def __init__( - self, offset: Union[int, ContextLambda], atom: atom, whence: int = ... + self, + offset: Union[int, _CContextLambda[int]], + atom: atom[_IT, _OT], + whence: int = ..., ) -> None: ... def get_offset(self, layer: layer) -> int: ... def __set_byteorder__(self, byteorder: Endian) -> atoffset: ... +class atom(Generic[_IT, _OT]): + def __init__(self) -> None: ... + def __pack__(self, obj: _IT, context: layer) -> None: ... + def __size__(self, context: layer) -> int: ... + def __type__(self) -> _CTypeDesc[_OT]: ... + def __unpack__(self, context: layer) -> _OT: ... -class repeated(builtinatom): - atom: Any - length: _Length - def __init__(self, atom: atom, length: _Length) -> None: ... - def __set_byteorder__(self, byteorder: Endian) -> repeated: ... +class binaryexpr(_ContextLambda): + expr: int + lhs: Union[_ContextLambda, Any] + rhs: Union[_ContextLambda, Any] + def __init__(self, expr: int, lhs, rhs) -> None: ... + def __add__(self, other): ... + def __and__(self, other): ... + def __call__(self, *args, **kwargs): ... + def __floordiv__(self, other): ... + def __invert__(self): ... + def __lshift__(self, other): ... + def __matmul__(self, *args, **kwargs): ... + def __mod__(self, other): ... + def __mul__(self, other): ... + def __neg__(self): ... + def __or__(self, other): ... + def __pos__(self): ... + def __pow__(self, other): ... + def __radd__(self, other): ... + def __rand__(self, other): ... + def __rfloordiv__(self, other): ... + def __rlshift__(self, other): ... + def __rmatmul__(self, *args, **kwargs): ... + def __rmod__(self, other): ... + def __rmul__(self, other): ... + def __ror__(self, other): ... + def __rpow__(self, other): ... + def __rrshift__(self, other): ... + def __rshift__(self, other): ... + def __rsub__(self, other): ... + def __rtruediv__(self, other): ... + def __rxor__(self, other): ... + def __sub__(self, other): ... + def __truediv__(self, other): ... + def __xor__(self, other): ... + +class builtinatom(catom[_IT, _OT]): + def __init__(self) -> None: ... + def __floordiv__( + self, condition: Union[bool, _ContextLambda[bool]] + ) -> condition[_IT, _OT]: ... + def __getitem__(self, index: _LengthT) -> repeated[_IT, _OT]: ... + def __matmul__( + self, offset: Union[int, _CContextLambda[int]] + ) -> atoffset[_IT, _OT]: ... + def __rfloordiv__( + self, other: Union[bool, _CContextLambda[bool]] + ) -> condition[_IT, _OT]: ... + def __rmatmul__( + self, offset: Union[int, _CContextLambda[int]] + ) -> atoffset[_IT, _OT]: ... + def __rrshift__(self, other) -> switch: ... + def __rshift__(self, other) -> switch: ... -class seqlayer(layer): - index: int - length: int - sequence: Collection +class catom(atom[_IT, _OT]): def __init__(self, *args, **kwargs) -> None: ... - -class switch(builtinatom): - atom: Any - cases: Union[dict[Any, Any], ContextLambda] + def __pack__(self, obj: _IT, context: layer) -> None: ... + def __pack_many__(self, obj: Collection[_IT], layer: layer) -> None: ... + def __size__(self, context: layer) -> int: ... + def __type__(self) -> Union[Type[_OT], str, NotImplementedType]: ... + def __unpack__(self, context: layer) -> _OT: ... + def __unpack_many__( + self, context: layer, lengthinfo: lengthinfo + ) -> Collection[_OT]: ... + +class computed(builtinatom[_IT, _IT]): + value: _IT + def __init__(self, value: _IT) -> None: ... + +class condition(builtinatom[Optional[_IT], Optional[_OT]]): + atom: atom[_IT, _OT] + condition: Union[bool, _ContextLambda[bool]] def __init__( - self, atom: atom, cases: Union[dict[Any, Any], ContextLambda] + self, condition: Union[bool, _ContextLambda[bool]], atom: atom[_IT, _OT] ) -> None: ... - def get_next(self, obj: Any, context: layer) -> Any: ... - def __set_byteorder__(self, byteorder: Endian) -> switch: ... + def is_enabled(self, context: layer) -> bool: ... + def __set_byteorder__(self, byteorder: Endian) -> condition[_IT, _OT]: ... + +class const(Generic[_IT], builtinatom[_IT, _IT]): + atom: atom[_IT, _IT] + value: _IT + def __init__(self, atom: atom[_IT, _IT], value: _IT) -> None: ... + def __set_byteorder__(self, endian: Endian) -> const[_IT, _IT]: ... -class cstring(builtinatom): - length: Union[_Length, atom] +class cstring(builtinatom[str, str]): + length: Union[_LengthT, atom[int, int]] encoding: str errors: str terminator: str keep_terminator: bool - def __init__( self, - length: Union[_Length, atom], + length: Union[_LengthT, atom], encoding: str = ..., errors: str = ..., sep: str = ..., keep_terminator: bool = ..., ) -> None: ... -class octetstring(builtinatom): - length: _Length - def __init__(self, length: _Length) -> None: ... +class enumeration(builtinatom[_IT, _IT]): + atom: atom[_IT, _IT] + default: Optional[_IT] + enum_type: Type[_IT] + members: Dict[str, _IT] + def __init__( + self, + atom: atom[_IT, _IT], + enum_type: Type[_IT], + default: Optional[_IT] = ..., + ) -> None: ... + def __set_byteorder__(self, endian: Endian) -> enumeration[_IT]: ... -class enumeration(builtinatom): - atom: Any - enum_type: EnumType - members: dict[Any, Any] +class fieldinfo: + excluded: bool default: Any + field: atom + name: str + def __init__(self, field: atom, excluded: bool = ...) -> None: ... + +class layer(_ContextLike): + parent: Optional[layer] + path: str + state: State def __init__( self, - atom: atom, - enum_type: EnumType, - default: Any = ..., + state: State, + path: str = ..., + parent: layer = ..., ) -> None: ... - def __set_byteorder__(self, byteorder: Endian) -> enumeration: ... - -class pstring(builtinatom): - atom: Any - encoding: str - errors: str - def __init__(self, atom: atom, encoding: str = ..., errors: str = ...) -> None: ... - def __set_byteorder__(self, byteorder: Endian) -> pstring: ... + def __context_getattr__(self, path: str) -> Any: ... -class VarInt(builtinatom): - little_endian: bool - lsb: bool - def __init__(self, little_endian: bool = ..., lsb: bool = ...) -> None: ... - def __set_byteorder__(self, byteorder: Endian) -> VarInt: ... +class lazy(builtinatom[_IT, _OT]): + always_lazy: bool + fn: Callable[[], atom[_IT, _OT]] + def __init__( + self, fn: Callable[[], atom[_IT, _OT]], always_lazy: bool = ... + ) -> None: ... + def __set_byteorder__(self, endian: Endian) -> lazy[_IT, _OT]: ... -class computed(builtinatom): - value: _ConstType - def __init__(self, value: _ConstType) -> None: ... +class lengthinfo: ... class objlayer(layer): obj: Context @@ -360,13 +386,11 @@ class objlayer(layer): ) -> None: ... def __context_getattr__(self, path: str) -> Any: ... -class lazy(builtinatom): - always_lazy: bool - fn: Callable[[], atom] - def __init__(self, fn: Callable[[], atom], always_lazy: bool = ...) -> None: ... - def __set_byteorder__(self, byteorder: Endian) -> lazy: ... +class octetstring(builtinatom[bytes, bytes]): + length: _LengthT + def __init__(self, length: _LengthT) -> None: ... -class patom(atom): +class patom(atom[_IT, _OT]): def __init__(self) -> None: ... def __floordiv__(self, other): ... def __getitem__(self, index): ... @@ -375,3 +399,162 @@ class patom(atom): def __rmatmul__(self, *args, **kwargs): ... def __rrshift__(self, other): ... def __rshift__(self, other): ... + +class pstring(builtinatom[str, str]): + atom: atom[str, str] + encoding: str + errors: str + def __init__( + self, atom: atom[str, str], encoding: str = ..., errors: str = ... + ) -> None: ... + def __set_byteorder__(self, byteorder: Endian) -> pstring: ... + +class repeated(builtinatom[_IT, _OT]): + atom: atom[_IT, _OT] + length: _LengthT + def __init__(self, atom: atom[_IT, _OT], length: _LengthT) -> None: ... + def __set_byteorder__(self, byteorder: Endian) -> repeated[_IT, _OT]: ... + +class seqlayer(layer): + index: int + length: int + sequence: Collection + def __init__(self, *args, **kwargs) -> None: ... + +class string(builtinatom[str, str]): + encoding: str + errors: str + length: _LengthT + def __init__(self, length: _LengthT, encoding: str, errors: str = ...) -> None: ... + +class switch(builtinatom): + atom: atom + cases: Dict[Any, atom] + def __init__(self, *args, **kwargs) -> None: ... + def get_next(self, *args, **kwargs): ... + def __set_byteorder__(self, endian: Endian) -> None: ... + +class unaryexpr: + expr: int + value: Any | _CContextLambda + def __init__(self, *args, **kwargs) -> None: ... + def __add__(self, other): ... + def __and__(self, other): ... + def __call__(self, *args, **kwargs): ... + def __floordiv__(self, other): ... + def __hash__(self) -> int: ... + def __invert__(self): ... + def __lshift__(self, other): ... + def __matmul__(self, *args, **kwargs): ... + def __mod__(self, other): ... + def __mul__(self, other): ... + def __neg__(self): ... + def __or__(self, other): ... + def __pos__(self): ... + def __pow__(self, other): ... + def __radd__(self, other): ... + def __rand__(self, other): ... + def __rfloordiv__(self, other): ... + def __rlshift__(self, other): ... + def __rmatmul__(self, *args, **kwargs): ... + def __rmod__(self, other): ... + def __rmul__(self, other): ... + def __ror__(self, other): ... + def __rpow__(self, other): ... + def __rrshift__(self, other): ... + def __rshift__(self, other): ... + def __rsub__(self, other): ... + def __rtruediv__(self, other): ... + def __rxor__(self, other): ... + def __sub__(self, other): ... + def __truediv__(self, other): ... + def __xor__(self, other): ... + +def typeof(obj: atom): ... +def pack_into(__obj: _IT, __struct: atom[_IT, _OT], __io: IO, **globals) -> None: ... +def pack(__obj: _IT, __struct: atom[_IT, _OT], **globals) -> bytes: ... +def sizeof(obj: atom[_IT, _OT], globals: Optional[dict | Context] = ...) -> int: ... +def unpack(__io: IO | bytes, __struct: atom[_IT, _OT], **globals) -> _OT: ... + +__all__ = [ + "Arch", + "Bool", + "Char", + "Context", + "ContextPath", + "DefaultOptionType", + "Endian", + "Float", + "Int", + "InvalidDefaultType", + "Option", + "Padding", + "State", + "Struct", + "VarInt", + "atoffset", + "atom", + "binaryexpr", + "builtinatom", + "catom", + "computed", + "condition", + "const", + "cstring", + "enumeration", + "fieldinfo", + "layer", + "lazy", + "lengthinfo", + "objlayer", + "octetstring", + "patom", + "pstring", + "repeated", + "seqlayer", + "string", + "switch", + "unaryexpr", + "typeof", + "pack_into", + "pack", + "sizeof", + "unpack", + "BIG_ENDIAN", + "DefaultOption", + "FIELD_OPTIONS", + "F_DYNAMIC", + "F_SEQUENTIAL", + "HOST_ARCH", + "InvalidDefault", + "LITTLE_ENDIAN", + "NATIVE_ENDIAN", + "STRUCT_OPTIONS", + "S_DISCARD_CONST", + "S_DISCARD_UNNAMED", + "S_EVAL_ANNOTATIONS", + "S_REPLACE_TYPES", + "S_SLOTS", + "S_UNION", + "TYPE_MAP", + "boolean", + "char", + "f16", + "f32", + "f64", + "i128", + "i16", + "i24", + "i32", + "i64", + "i8", + "lsbvarint", + "padding", + "u128", + "u16", + "u24", + "u32", + "u64", + "u8", + "varint", +] diff --git a/src/caterpillar/abc.py b/src/caterpillar/abc.py index f0a450ed..46c0f9ed 100755 --- a/src/caterpillar/abc.py +++ b/src/caterpillar/abc.py @@ -118,6 +118,9 @@ def __unpack__(self, context: _ContextLike) -> Any: def __pack__(self, obj: Any, context: _ContextLike) -> None: pass + +@runtime_checkable +class _SupportsType(Protocol): def __type__(self) -> Optional[Union[type, str]]: pass diff --git a/src/caterpillar/c.pyi b/src/caterpillar/c.pyi new file mode 100755 index 00000000..a4fe9785 --- /dev/null +++ b/src/caterpillar/c.pyi @@ -0,0 +1,28 @@ +from typing import Callable, Iterable, Type, TypeVar, overload +from caterpillar._C import * # noqa +from caterpillar._C import __all__ as _c_all + +_T = TypeVar("_T") + +@overload +def struct( + cls: None = None, + /, + options: Iterable[Option] = ..., + endian: Endian = ..., + arch: Arch = ..., + field_options: Iterable[Option] = ..., +) -> Callable[[Type[_T]], Type[_T]]: ... +@overload +def struct( + cls: Type[_T], + /, + options: Iterable[Option] = ..., + endian: Endian = ..., + arch: Arch = ..., + field_options: Iterable[Option] = ..., +) -> Type[_T]: ... + + +# pyright: reportUnsupportedDunderAll=false +__all__ = _c_all + ["struct"] diff --git a/src/caterpillar/context.pyi b/src/caterpillar/context.pyi index ec37afe7..882c3d1a 100755 --- a/src/caterpillar/context.pyi +++ b/src/caterpillar/context.pyi @@ -12,17 +12,14 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from dataclasses import dataclass from types import FrameType from typing import ( Any, Callable, Dict, List, - NewType, Optional, Self, - Tuple, Type, Union, dataclass_transform, diff --git a/src/caterpillar/shortcuts.py b/src/caterpillar/shortcuts.py index 3a2aad9b..332e6d8c 100644 --- a/src/caterpillar/shortcuts.py +++ b/src/caterpillar/shortcuts.py @@ -44,3 +44,40 @@ from .registry import to_struct from . import options as opt from .fields import Field as F + +__all__ = [ + "AARCH64", + "AMD", + "AMD64", + "ARM", + "ARM64", + "BigEndian", + "LittleEndian", + "PowerPC", + "PowerPC64", + "RISC_V", + "RISC_V64", + "x86", + "x86_64", + "bitfield", + "pack", + "pack_file", + "pack_into", + "struct", + "union", + "unpack", + "unpack_file", + "sizeof", + "Seq", + "typeof", + "getstruct", + "hasstruct", + "ContextPath", + "ctx", + "parent", + "this", + "lenof", + "to_struct", + "F", + "opt", +] diff --git a/src/caterpillar/shortcuts.pyi b/src/caterpillar/shortcuts.pyi index 63219055..332e6d8c 100755 --- a/src/caterpillar/shortcuts.pyi +++ b/src/caterpillar/shortcuts.pyi @@ -43,4 +43,41 @@ from .model import ( from .shared import typeof, getstruct, hasstruct from .registry import to_struct from . import options as opt -from .fields import Field as F \ No newline at end of file +from .fields import Field as F + +__all__ = [ + "AARCH64", + "AMD", + "AMD64", + "ARM", + "ARM64", + "BigEndian", + "LittleEndian", + "PowerPC", + "PowerPC64", + "RISC_V", + "RISC_V64", + "x86", + "x86_64", + "bitfield", + "pack", + "pack_file", + "pack_into", + "struct", + "union", + "unpack", + "unpack_file", + "sizeof", + "Seq", + "typeof", + "getstruct", + "hasstruct", + "ContextPath", + "ctx", + "parent", + "this", + "lenof", + "to_struct", + "F", + "opt", +] diff --git a/test/_C/test_context.py b/test/_C/test_context.py index ad3b9e64..ab5e4555 100644 --- a/test/_C/test_context.py +++ b/test/_C/test_context.py @@ -27,7 +27,7 @@ def test_context_getattr(): assert getattr(c, "foo") == 1 assert getattr(c, "bar") == 2 # This call will be transferred to the __context_getattr__ function - assert getattr(c, "foo.__class__") == int + assert getattr(c, "foo.__class__") is int with pytest.raises(AttributeError): # this class is strict when it comes to undefined diff --git a/test/_C/test_parsing.py b/test/_C/test_parsing.py index c143fff7..5d0a8068 100644 --- a/test/_C/test_parsing.py +++ b/test/_C/test_parsing.py @@ -4,7 +4,7 @@ import caterpillar # TODO: raise issue for fixes -if False: #caterpillar.native_support(): +if False:# caterpillar.native_support(): from caterpillar._C import atom, typeof, sizeof, patom, repeated from caterpillar._C import switch From a8d1655b1614b65e18beaaa1f0e3bb24b0d0a082 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Wed, 25 Jun 2025 17:26:50 +0200 Subject: [PATCH 18/41] Updated action to include recent Python versions --- .github/workflows/python-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index 0156440d..99e9c54f 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -9,7 +9,7 @@ jobs: fail-fast: true matrix: os: ["ubuntu-latest", "windows-latest", "macos-latest"] - python-version: ["3.12"] + python-version: ["3.12", "3.13"] steps: - name: Checkout source From 413a9c59a6c9cf3493b7bfdff6ef8005c5febf33 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Wed, 25 Jun 2025 17:32:11 +0200 Subject: [PATCH 19/41] Update caterpilllar C installatin candidate to reflect 3.14 updates --- src/caterpillar/include/caterpillar/macros.h | 1 + src/ccaterpillar/default.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/caterpillar/include/caterpillar/macros.h b/src/caterpillar/include/caterpillar/macros.h index 1ed7f31d..24221bb8 100644 --- a/src/caterpillar/include/caterpillar/macros.h +++ b/src/caterpillar/include/caterpillar/macros.h @@ -24,6 +24,7 @@ #include #define PY_3_13_PLUS (PY_VERSION_HEX >= 0x030d0000) +#define PY_3_14_PLUS (PY_VERSION_HEX >= 0x030e0000) #define _Cp_NameStr(x) ("caterpillar._C." x) diff --git a/src/ccaterpillar/default.c b/src/ccaterpillar/default.c index 442f3f26..d4e83295 100644 --- a/src/ccaterpillar/default.c +++ b/src/ccaterpillar/default.c @@ -2,7 +2,7 @@ #include "caterpillar/caterpillar.h" -#if PY_3_13_PLUS +#if PY_3_14_PLUS #define _Py_IMMORTAL_REFCNT _Py_IMMORTAL_INITIAL_REFCNT #endif From 9e3cf713e40fbc5eac7d72320cd1a0fd18639af9 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Thu, 26 Jun 2025 21:34:16 +0200 Subject: [PATCH 20/41] Split _ActionLike into separate protocols --- abc.py: + _ActionLike was removed and splitted into two separate protocols:_SupportsActionUnpack and _SupportsActionPack --- src/caterpillar/abc.py | 7 ++++--- src/caterpillar/abc.pyi | 13 ++++++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/caterpillar/abc.py b/src/caterpillar/abc.py index 46c0f9ed..9fbc0b90 100755 --- a/src/caterpillar/abc.py +++ b/src/caterpillar/abc.py @@ -63,12 +63,13 @@ def __call__(self, context: _ContextLike) -> Any: @runtime_checkable -class _ActionLike(Protocol): - @abstractmethod +class _SupportsActionUnpack(Protocol): def __action_pack__(self, context: _ContextLike) -> None: pass - @abstractmethod + +@runtime_checkable +class _SupportsActionPack(Protocol): def __action_unpack__(self, context: _ContextLike) -> None: pass diff --git a/src/caterpillar/abc.pyi b/src/caterpillar/abc.pyi index 75ce32bd..b306c327 100755 --- a/src/caterpillar/abc.pyi +++ b/src/caterpillar/abc.pyi @@ -61,9 +61,16 @@ class _ContainsStruct(Protocol[_IT_contra, _OT]): __struct__: _StructLike[_IT_contra, _OT] @runtime_checkable -class _ActionLike(Protocol): - def __action_pack__(self, context: _ContextLike) -> None: ... - def __action_unpack__(self, context: _ContextLike) -> None: ... +class _SupportsActionUnpack(Protocol): + def __action_pack__(self, context: _ContextLike) -> None: + pass + +@runtime_checkable +class _SupportsActionPack(Protocol): + def __action_unpack__(self, context: _ContextLike) -> None: + pass + +_ActionLike = Union[_SupportsActionPack, _SupportsActionUnpack] @runtime_checkable class _SupportsPack(Protocol[_IT_contra]): From 1c27b4f8749b81d4cafc962d5aec4197d44f142a Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Thu, 26 Jun 2025 21:39:40 +0200 Subject: [PATCH 21/41] Int and UInt field structs now implement a reasonable __repr__ format --- src/caterpillar/fields/common.py | 7 +++++++ src/caterpillar/fields/common.pyi | 8 ++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/caterpillar/fields/common.py b/src/caterpillar/fields/common.py index 44a60470..160d07b4 100755 --- a/src/caterpillar/fields/common.py +++ b/src/caterpillar/fields/common.py @@ -1242,6 +1242,13 @@ def __init__(self, bits: int, signed=True) -> None: raise ValueError(f"Invalid int size: {bits!r} - expected int") self.size = self.__bits__ // 8 + def __repr__(self) -> str: + name = "int" + if not self.signed: + name = f"u{name}" + + return f"<{name}{self.__bits__}>" + def __type__(self) -> type: """ Return the type associated with this Int field. diff --git a/src/caterpillar/fields/common.pyi b/src/caterpillar/fields/common.pyi index 0f044eab..ca209b2e 100755 --- a/src/caterpillar/fields/common.pyi +++ b/src/caterpillar/fields/common.pyi @@ -176,7 +176,7 @@ class Computed(Generic[_IT], FieldStruct[NoneType, _IT]): def pack_single(self, obj: NoneType, context: _ContextLike) -> None: ... def unpack_single(self, context: _ContextLike) -> _IT: ... -class Pass(FieldStruct[None, None]): +class _Pass(FieldStruct[None, None]): def __bits__(self) -> int: ... def __type__(self) -> Type[None]: ... def __pack__(self, obj: None, context: _ContextLike) -> None: ... @@ -185,6 +185,8 @@ class Pass(FieldStruct[None, None]): def pack_single(self, obj: None, context: _ContextLike) -> None: ... def unpack_single(self, context: _ContextLike) -> None: ... +Pass: _Pass + _PrefixIOT = TypeVar("_PrefixIOT", bound=SupportsLenAndGetItem, default=bytes) class Prefixed(Generic[_PrefixIOT], FieldStruct[_PrefixIOT, _PrefixIOT]): @@ -249,9 +251,11 @@ class Lazy(FieldStruct[_IT, _OT]): def pack_single(self, obj: _IT, context: _ContextLike) -> None: ... def unpack_single(self, context: _ContextLike) -> _OT: ... -class Uuid(FieldStruct[UUID, UUID]): +class _Uuid(FieldStruct[UUID, UUID]): def __type__(self) -> Type[UUID]: ... def __size__(self, context: _ContextLike) -> int: ... def __bits__(self) -> int: ... def __pack__(self, obj: UUID, context: _ContextLike) -> None: ... def __unpack__(self, context: _ContextLike) -> UUID: ... + +Uuid: _Uuid \ No newline at end of file From a87b71b450f8758f7ece433c7ed041640c61c6f0 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Thu, 26 Jun 2025 21:42:31 +0200 Subject: [PATCH 22/41] New bitfield concept based in int.{from,to}_bytes --- + BitField was renamed to Bitfield + Each Bitfield now stores a list of groups, which either represent a field or a list of entries + bitfield_example was updated --- examples/bitfield_example.py | 5 +- src/caterpillar/model/__init__.py | 22 +- src/caterpillar/model/_base.py | 2 +- src/caterpillar/model/_base.pyi | 1 + src/caterpillar/model/_bitfield.py | 754 +++++++++++++++++++--------- src/caterpillar/model/_bitfield.pyi | 122 +++-- src/caterpillar/options.py | 6 + src/caterpillar/options.pyi | 29 +- 8 files changed, 643 insertions(+), 298 deletions(-) diff --git a/examples/bitfield_example.py b/examples/bitfield_example.py index 039feb0d..ba8b35b5 100644 --- a/examples/bitfield_example.py +++ b/examples/bitfield_example.py @@ -1,4 +1,5 @@ # type: ignore +from caterpillar.fields.common import uint8 from caterpillar.py import bitfield, char, int8, unpack, pack try: @@ -16,10 +17,10 @@ class Format: _1: 3 # unnamed padding to the rest of the byte -print(Format.__struct__) +print(Format.__struct__.groups) obj = unpack(Format, b"\x80\x80") print(obj) -# prints: Format(b1=True, b2='2', b3=0, _1=0) +# prints: Format(b1=1, b2='2', b3=0, _1=0) # real_pos: 0123456701234567 # bit_pos: 7654321076543210 # ---------------- # right to left diff --git a/src/caterpillar/model/__init__.py b/src/caterpillar/model/__init__.py index 4f71136c..2d8b97d6 100644 --- a/src/caterpillar/model/__init__.py +++ b/src/caterpillar/model/__init__.py @@ -25,7 +25,18 @@ pack_file, sizeof, ) -from ._bitfield import BitField, bitfield, BitFieldGroup, issigned, getbits +from ._bitfield import ( + Bitfield, + bitfield, + BitfieldEntry, + BitfieldGroup, + BitfieldValueFactory, + issigned, + getbits, + NewGroup, + EndGroup, + SetAlignment, +) from ._template import istemplate, template, TemplateTypeVar, derive __all__ = [ @@ -40,13 +51,18 @@ "pack_into", "pack_file", "sizeof", - "BitField", + "Bitfield", "bitfield", - "BitFieldGroup", + "BitfieldGroup", "issigned", "getbits", "istemplate", "template", "TemplateTypeVar", "derive", + "NewGroup", + "EndGroup", + "SetAlignment", + "BitfieldEntry", + "BitfieldValueFactory", ] diff --git a/src/caterpillar/model/_base.py b/src/caterpillar/model/_base.py index c88d1c75..2f9b8315 100755 --- a/src/caterpillar/model/_base.py +++ b/src/caterpillar/model/_base.py @@ -424,7 +424,7 @@ def pack_one(self, obj, context) -> None: if name in self._member_map_: value = self.get_value(obj, name, field) else: - # REVISIT: this line might not be necessary if const fields alredy + # REVISIT: this line might not be necessary if const fields already # use their internal value. value = field.default if field.default != INVALID_DEFAULT else None field.__pack__(value, context) diff --git a/src/caterpillar/model/_base.pyi b/src/caterpillar/model/_base.pyi index a192be91..332a2af5 100755 --- a/src/caterpillar/model/_base.pyi +++ b/src/caterpillar/model/_base.pyi @@ -32,6 +32,7 @@ class Sequence(FieldMixin, Generic[_SeqModelT], _StructLike[_SeqModelT, _SeqMode options: set[Flag] field_options: set[Flag] is_union: bool + _member_map_: Dict[str, Field] def __init__( self, model: Optional[dict[str, Field]] = None, diff --git a/src/caterpillar/model/_bitfield.py b/src/caterpillar/model/_bitfield.py index 517f3424..52f4fabe 100755 --- a/src/caterpillar/model/_bitfield.py +++ b/src/caterpillar/model/_bitfield.py @@ -12,36 +12,141 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . -import struct as libstruct - -from dataclasses import dataclass, field as dcfield - -from caterpillar.abc import _StructLike -from caterpillar.shared import typeof, ATTR_BITS, ATTR_SIGNED +from caterpillar.fields.common import Int +from caterpillar.shared import ( + ATTR_ACTION_PACK, + ATTR_ACTION_UNPACK, + ATTR_BYTEORDER, + typeof, + ATTR_BITS, + ATTR_SIGNED, +) from caterpillar.byteorder import ( - byteorder, + LittleEndian, + SysNative, system_arch, ) from caterpillar.options import ( + B_GROUP_NEW, GLOBAL_BITFIELD_FLAGS, GLOBAL_STRUCT_OPTIONS, GLOBAL_UNION_OPTIONS, + B_OVERWRITE_ALIGNMENT, + B_GROUP_END, + B_GROUP_KEEP, + Flag, ) from caterpillar.fields import ( Field, - uint8, - uint16, - uint32, - uint64, - boolean, Pass, + INVALID_DEFAULT, ) -from caterpillar.exception import ValidationError, DelegationError -from caterpillar.context import Context, CTX_OBJECT, CTX_STREAM +from caterpillar.exception import StructException +from caterpillar.context import CTX_PATH, Context, CTX_OBJECT, CTX_STREAM + +from ._struct import Struct, sizeof + + +# --- BItfield Concept --- +# NEW REVISED CONCEPT +# Each Bitfield stores a sequence of so-called bitfield-groups, whereby each group stores +# a collection of sized fields. A bitfield-group may be represented by a collection of other +# fields (_StructLike) or a single _StructLike object. For instance, consider the following +# bitfield definition: +# +# @bitfield +# class Format: +# a1: 1 +# a2: 1 - boolean +# _ : 0 +# b1: char +# c1: uint32 +# +# The created Bitfield will store three formal bitfield-groups (marked with characters a to c +# here). By default, the bitfield uses 8bit alignment, which results in 1 byte for the first +# group: +# +# Group Pos Bits +# a 0x00 8 +# b 0x01 8 +# c 0x02 32 +# +# Internally, only the first group requires special parsing since the other two are structs +# without a specific bit number. This dynamic grouping model now allows us to use the power +# of struct class definitions in bitfields. +# +# This new approach enables more complect bitfield definitions. Therefore, the syntax will be +# extended: +# +# +---------------------------------------------------+--------------------------------+ +# 1.| : [ - ] | default definition | +# +---------------------------------------------------+--------------------------------+ +# 2.| : 0 | start new byte | +# +---------------------------------------------------+--------------------------------+ +# 3.| : | custom field (no bits used) | +# +---------------------------------------------------+--------------------------------+ +# 4.| : (,) | field with custom type factory | +# +---------------------------------------------------+--------------------------------+ +# 5.| : (,[,]) | bits with custom type factory | +# | : (,[]) | and options | +# +---------------------------------------------------+--------------------------------+ +# +# The generation process will follow some implications and rules derived from this extended +# syntax. These rules are as follows: +# +# For 1.: +# - The default alignment is one byte (8bits). If an annotation of rule no. 2 follows, +# eight the current byte will be filled. +# - When the is given, typeof will be called to retrieve the and +# getbits+sizeof to retrieve the current alignment. The new alignment will be +# ignored if a custom alignment has been set in the constructor of the BItfield. +# If the stores the B_OVERWRITE_ALIGNMENT option, the current alignment +# will be overwritten. +# - If the B_GROUP_END option is present, the current group will be finalized and a +# new one will be started. +# +# For 2.: +# - This field definition will be removed regardless of the given name +# - The current configured alignment will be used to fill bits up to the alignment. +# - This field definition finalizes the current bitfield-group unless B_GROUP_KEEP +# has been configured on the bitfield. +# +# For 3.: +# - The same rules as for defining fields in struct classes apply here. +# - Additionally, this definition implies a definition of rule no. 2. The current group +# will be finalized regardless of whether B_GROUP_KEEP has been set. +# +# For 4.: +# - Extension of rule no. 1 that explicitly defines the conversion factory, which must be +# one of the following types: Type (such as int, bool) that supports the __int__ method, +# or a type or instance of a BitfieldValueFactory. +# +# For 5.: +# - Same processing as for rule no. 4 but defined options (either flags or options described +# below) can be present either as list or single element. Additional options are: +# NewGroup: Aligns the previous group to the current alignment and creates a new one +# while also adding the specified bitfield entry to the new group. +# EndGroup: Adds the bitfield entry to the current group and aligns to according to the +# current alignment. +# SetAlignment: Updates the current working alignment +# The order of specified options matters. -from ._struct import Struct +DEFAULT_ALIGNMENT = 8 -BitTuple = tuple +NewGroup = B_GROUP_NEW +EndGroup = B_GROUP_END + + +class SetAlignment: + def __init__(self, new_alignment: int) -> None: + self.alignment = new_alignment + + @staticmethod + def flag(new_alignment: int): + return Flag("bitfield.new_alignment", new_alignment) + + def __hash__(self) -> int: + return hash("SetAlignment") def getbits(obj) -> int: @@ -53,36 +158,103 @@ def issigned(obj) -> bool: return bool(getattr(obj, ATTR_SIGNED, None)) -@dataclass(init=False) -class BitFieldGroup: - size: int - pos: int - fmt: str - fields: dict = dcfield(default_factory=dict) - - def __init__(self, size: int, pos: int, fields=None) -> None: - self.size = size - self.pos = pos - self.fields = fields or {} - # this has to get refactored - if 8 < size <= 16: - self.fmt = "H" - elif 16 < size <= 32: - self.fmt = "I" - elif 32 < size <= 64: - self.fmt = "Q" - else: - self.fmt = "B" - - -class BitField(Struct): +class BitfieldValueFactory: + def __init__(self, target=None) -> None: + self.target = target or int + + def to_int(self, obj) -> int: + return int(obj) + + def from_int(self, value: int): + return self.target(value) + + +class BitfieldEntry: + def __init__( + self, bit: int, width: int, name: str, factory=None, action=None + ) -> None: + self.bit = bit + self.width = width + self.name = name + self.factory = factory or BitfieldValueFactory() + if isinstance(self.factory, type): + self.factory = self.factory() + self.action = action + self.low_mask = (1 << self.width) - 1 + + @staticmethod + def new_action(action): + return BitfieldEntry(0, 0, "", action=action) + + def shift(self, value_width: int) -> int: + bit_pos = max(0, value_width - self.bit) + return max(bit_pos - self.width, 0) + + def is_action(self) -> bool: + return self.action is not None + + def __repr__(self) -> str: + if self.is_action(): + return repr(self.action) + + r = f"" + + +class BitfieldGroup: + def __init__(self, bit_count: int) -> None: + self.bit_count = bit_count + self.entries = [] + + def is_field(self) -> bool: + return self.bit_count == -1 + + def get_field(self): + return self.entries[0] + + def set_field(self, field): + self.entries = [field] + self.bit_count = -1 + + def align_to(self, alignment: int): + if not self.is_field(): + pad = self.bit_count % alignment + if pad > 0: + self.bit_count += alignment - pad + + def is_empty(self) -> bool: + return len(self.entries) == 0 + + def get_size(self, context=None): + if self.is_field(): + field = self.get_field() + return field.__size__(context) if context else sizeof(field) + + return self.bit_count // 8 + + def get_bits(self, context=None): + return self.get_size(context) * 8 + + def __repr__(self) -> str: + if self.is_field(): + return repr(self.get_field()) + + return f"" + + +class Bitfield(Struct): __slots__ = ( "groups", + "alignment", "_bit_pos", - "_abs_bit_pos", "_current_group", - "__fmt__", - "__bits__", + "_current_alignment", ) def __init__( @@ -92,13 +264,14 @@ def __init__( arch=None, options=None, field_options=None, + alignment=None, ) -> None: - self.groups = [] + self.alignment = alignment or DEFAULT_ALIGNMENT # These fields remain private and will be deleted after processing + self._current_alignment = self.alignment + self._current_group = BitfieldGroup(self._current_alignment) self._bit_pos = 0 - self._abs_bit_pos = 0 - self._current_group = None - + self.groups = [self._current_group] super().__init__( model=model, order=order, @@ -109,225 +282,303 @@ def __init__( # Add additional options based on the struct's type self.options.difference_update(GLOBAL_STRUCT_OPTIONS, GLOBAL_UNION_OPTIONS) self.options.update(GLOBAL_BITFIELD_FLAGS) - self.__bits__ = sum(map(lambda x: x.size, self.groups)) - self.__fmt__ = "".join(map(lambda x: x.fmt, self.groups)) + # self.__bits__ = sum(map(lambda x: x.bit_count, self.groups)) + self.groups = [group for group in self.groups if not group.is_empty()] del self._bit_pos - del self._abs_bit_pos + del self._current_alignment del self._current_group - def __add__(self, other): - if not isinstance(other, BitField): - raise ValidationError( - f"Attempted to add a non-bitfield struct to a bitfield! (type={type(other)})" + def __add__(self, sequence): + if not isinstance(sequence, Bitfield): + raise TypeError( + f"Attempted to add a non-bitfield struct to a bitfield! (type={type(sequence)})" ) - # REVISIT: undefined bahaviour when parsing - return super(Struct, self).__add__(other) - - def _process_field(self, name: str, annotation, default): - """ - Process a field in the model. - - :param name: The name of the field. - :param annotation: The annotation of the field. - :param default: The default value of the field. - :return: The processed field. - """ - - # Fields can be defined as follows: - # name : bit_count [ - struct ] [ = default_value ] - # or - # name : struct [ = default_value ] - struct = None - field = None - - order = byteorder(annotation, self.order) - group = self._current_group - arch = self.arch or system_arch - width = 0 - if group is None: - # First, specify the current group and add it to the rest of - # the internal representation. - group = BitFieldGroup(8, self._abs_bit_pos) - self.groups.append(group) - self._current_group = group - - if isinstance(annotation, int): - # CASE 1: Only the bit amount is specified. We take the current group - # into consideration and decide whether we have to apply a padding. - # TODO: cleanup - struct = uint8 # this struct is only used to infer the right type - if 8 < annotation <= 16: - struct = uint16 - elif 16 < annotation <= 32: - struct = uint32 - elif 32 < annotation <= 64: - struct = uint64 - elif annotation == 1: - struct = boolean - if annotation != 0: - width = annotation - else: - # Special case: a zero indicates we have to start a new byte. To - # accomplish that, we simply create a new BitFieldGroup. The padding - # is generated by moving the internal absolute bit position forwards. - self._current_group = BitFieldGroup(8, self._abs_bit_pos) - width = 8 - self._abs_bit_pos % 8 - struct = Pass # NoneType will be inferred - - elif isinstance(annotation, Field): - # CASE 2: the field has been defined using a type specifier. - field = annotation - if field.bits is None: - # We have to use a special method to calculate the amount of this this - # field takes. - try: - width = getbits(field.struct) - except ValueError as exc: - raise ValidationError( - f"Field {name!r} does not specify a bit width!" - ) from exc - else: - width = field.bits - - elif isinstance(annotation, _StructLike): - # CASE 3: Only the struct is given without a bit width - struct = annotation - try: - # The same applies here: the struct MUST specify a bit width - width = getbits(struct) - except ValueError as exc: - raise ValidationError( - f"Field {name!r} does not specify a bit width!" - ) from exc - - if struct is not None: - field = Field(struct, order, arch=arch, default=default) - - if field is None: - raise ValidationError( - f"Field {name!r} could not be created: {annotation!r}" + self.groups.extend(sequence.groups) + return super(Struct, self).__add__(sequence) + + def _process_align(self, options) -> Field: + for option in options or []: + if self._process_alignment_option(option): + continue + + alignment = self._current_alignment + group = self._current_group + if option.name in (EndGroup.name, NewGroup.name): + # finalize current group (same effect for alignment statement) + group.align_to(alignment) + self._current_group = self._new_group(alignment) + + # 2.: the current group will be finalized + if not self.has_option(B_GROUP_KEEP): + self._current_group.align_to(self._current_alignment) + self._current_group = self._new_group(self._current_alignment) + + return Field(Pass) + + def _process_bits(self, name: str, bits: int, factory=None, options=None) -> Field: + if bits == 0: + return self._process_align(options) + + entry = BitfieldEntry(self._bit_pos, bits, name, factory) + self._bit_pos += bits + if not self._process_options(options, entry): + group = self._current_group + group.entries.append(entry) + # Adjust the size of the goup dynamically + group.bit_count = max(group.bit_count, self._bit_pos) + # this is only symbolic + return Field(Int(bits)) + + def _process_bits_field(self, name: str, field, options=None) -> Field: + if field.bits is None: + # we don't need to check for NewGroup and EndGroup options here as no + # bits are specified and the field gets its own group. + for option in options or []: + self._process_alignment_option(option) + + # bits not present -> treat defintion as simple field, which means we finalize + # the current group, create a new FIELD GROUP and another new one after that + self._current_group.align_to(self._current_alignment) + field_group = self._new_group(-1) + field_group.set_field(field) + self._current_group = self._new_group(self._current_alignment) + return field + + width = field.bits + if not isinstance(width, int): + raise TypeError( + f"Bitfield: field definition of {name!r} requires an integer as bits" ) - field.default = default - field.order = self.order or field.order - field.arch = self.arch or field.arch - field.bits = field.bits or width - field.flags.update({hash(x): x for x in self.field_options}) - - # Now, we have to check whether a new byte has to be started - if group.size - self._bit_pos < width: - if field.bits is None: - # Only type specified, we take its bit count - size = width - else: - leftover = width % 8 - size = width + ((8 - leftover) if leftover > 0 else 0) - group = BitFieldGroup(size, self._abs_bit_pos) - self._current_group = group - self.groups.append(group) - self._bit_pos = 0 - - type_ = typeof(field.struct) - bit_pos = max(group.size - 1 - self._bit_pos, 0) - # NOTE: I know, we're calling this method twice now, but it saves some - # iterations later on. - if self._included(name, default, annotation): - group.fields[(bit_pos, width, type_)] = field + + entry = BitfieldEntry( + self._bit_pos, width, name, BitfieldValueFactory(typeof(field)) + ) self._bit_pos += width - self._abs_bit_pos += width + if not self._process_options(options, entry): + group = self._current_group + group.entries.append(entry) + group.bit_count = max(group.bit_count, self._bit_pos) + + if field.has_flag(B_OVERWRITE_ALIGNMENT): + self._current_alignment = (sizeof(field) * 8) or DEFAULT_ALIGNMENT + self._current_group.align_to(self._current_alignment) + return field + def _process_options(self, options, entry=None) -> bool: + consumed = False + for option in options or []: + if self._process_alignment_option(option): + continue + + group = self._current_group + alignment = self._current_alignment + if option.name == EndGroup.name: + if entry: + group.entries.append(entry) + consumed = True + group.align_to(alignment) + + self._current_group = self._new_group(alignment) + elif option.name == NewGroup.name: + # finalize current group, create a new one and add the entry to the newly + # created group + group.align_to(alignment) + self._current_group = group = self._new_group(alignment) + + if entry: + group.entries.append(entry) + # position was reset to zero + self._bit_pos += entry.width + group.bit_count = max(group.bit_count, self._bit_pos) + consumed = True + return consumed + + def _new_group(self, alignment): + new_group = BitfieldGroup(alignment) + self.groups.append(new_group) + self._bit_pos = 0 + return new_group + + def _process_alignment_option(self, option): + if isinstance(option, SetAlignment): + # update current working alignment + self._current_alignment = option.alignment + return True + elif isinstance(option, Flag): + if option.name == "bitfield.new_alignment": + self._current_alignment = option.value or DEFAULT_ALIGNMENT + return True + + return False + + def _process_field(self, name: str, annotation, default): + arch = self.arch or system_arch + order = getattr(annotation, ATTR_BYTEORDER, self.order or SysNative) + match annotation: + case int(): + if annotation == 0: + return self._process_align(None) + else: + # 1. (without field) defines the width using the default value factory + return self._process_bits(name, annotation) + + case tuple(): # NEW EXTENDED DEFINITION + if len(annotation) == 0: + raise ValueError( + f"Extended field definition for {name!r} does not define any values!" + ) + + if len(annotation) == 1: + # definition: (, ) + (width,) = annotation + if width == 0: + raise ValueError( + f"Extended field {name!r} defines forbidden width of zero" + ) + + return self._process_bits(name, width) + + options = [] + factory = None + (width, factory_or_option, *extra_options) = annotation + if isinstance(factory_or_option, BitfieldValueFactory): + factory = factory_or_option + elif isinstance(factory_or_option, type) and issubclass( + factory_or_option, BitfieldValueFactory + ): + factory = factory_or_option() + else: + # treat as option + options = [factory_or_option] + options.extend(extra_options) + + if isinstance(width, int): + # rule no. 5 + return self._process_bits(name, width, factory, options) + + field = width + if not isinstance(field, Field): + field = Field(field, order=order, arch=arch, default=default) + + return self._process_bits_field(name, field, options) + + case _: + # rule 1 (with field) or rule 3 + field = self._process_annotation(annotation, default, order, arch) + if not isinstance(field, Field): + field = Field(field, order=order, arch=arch, default=default) + return self._process_bits_field(name, field) + def _included(self, name: str, default, annotation) -> bool: if not super()._included(name, default, annotation): return False - if isinstance(annotation, int) and annotation == 0: - # padding should be ignored - return False - return True + width = annotation + if isinstance(annotation, tuple): + width, *_ = annotation - def group(self, bit_index: int): - grp = None - for candidate in self.groups: - if bit_index > candidate.pos: - break - grp = candidate - return grp + if isinstance(width, int): + return width != 0 + + if isinstance(width, Field): + return width.bits != 0 + return True def __size__(self, context) -> int: - # The size of a bitfield is alsways static - return self.__bits__ // 8 + # size is different as our model includes correct padding + return sum(map(lambda g: g.get_size(context), self.groups)) + + def __bits__(self) -> int: + return sum(map(lambda g: g.get_bits(), self.groups)) def unpack_one(self, context): - # At first, we define the object context where the parsed values - # will be stored init_data = Context() context[CTX_OBJECT] = Context(_parent=context) - values = libstruct.unpack( - f"{self.order.ch}{self.__fmt__}", - context[CTX_STREAM].read(self.__bits__ // 8), - ) - for i, group in enumerate(self.groups): - # each group specifies the fields we are about to unpack. But first, we have - # to read the bits from the stream - value = values[i] - for bit_info, field in group.fields.items(): - name: str = field.__name__ - # The field should be ignored if it is not within the - # member map (this usually means we have a padding field) - if name not in self._member_map_: - continue - # unnecessary: - # context[CTX_PATH] = f"{base_path}.<{i}>.{name}" - bit_pos, width, factory = bit_info - low_mask = (1 << width) - 1 - if width == 1: - field_value = bool(value & low_mask << bit_pos) - else: - shift = max(bit_pos + 1 - width, 0) - field_value: int = (value >> shift) & low_mask - if factory is not None: - field_value = factory(field_value) - # Finally, apply the new value - init_data[name] = field_value + base_path = context[CTX_PATH] + # REVISIT + endian = "little" if self.order == LittleEndian else "big" + for group in self.groups: + if group.is_field(): + # unpack using field instance + field = group.get_field() + name = field.__name__ + context[CTX_PATH] = f"{base_path}.{name}" + value = field.__unpack__(context) + context[CTX_OBJECT][name] = value + if name in self._member_map_: + init_data[name] = value + + else: + raw_data = context[CTX_STREAM].read(group.get_size()) + if not raw_data: + # set context path to next entry for debugging + context[CTX_PATH] = f"{base_path}.{group.entries[0].name}" + raise StructException( + f"Failed to parse group of size {group.bit_count}bits: unexpected EOF!", + context, + ) + raw_value = int.from_bytes(raw_data, endian) + for entry in group.entries: + # each entry may be an action + context[CTX_PATH] = f"{base_path}.{entry.name}" + if entry.is_action(): + func = getattr(entry.action, ATTR_ACTION_UNPACK, None) + if func: + func(context) + continue + + print(bin(raw_value), entry.shift(group.bit_count), bin(entry.low_mask)) + value = (raw_value >> entry.shift(group.bit_count)) & entry.low_mask + if entry.factory: + value = entry.factory.from_int(value) + + init_data[entry.name] = value return self.model(**init_data) def pack_one(self, obj, context) -> None: - # REVISIT: this function is very time consuming. should be do something - # about that? - stream = context[CTX_STREAM] - values = [] + base_path = context[CTX_PATH] + # REVISIT + endian = "little" if self.order == LittleEndian else "big" for group in self.groups: - # The same applies here, but we convert all values to int instead of reading - # them from the stream - value = 0 - for bit_info, field in group.fields.items(): - # Setup the field's context - name: str = field.__name__ - # Padding is translated into zeros - if name not in self._member_map_: - continue - - bit_pos, width, _ = bit_info - field_value = getattr(obj, name, 0) or 0 - shift = bit_pos + 1 - width - # Here's the tricky part: we have to convert all values to int - # without knowing their type. We make use of Python's data model, - # which defines a function particularly for this use-case: __int__ - # - # See https://docs.python.org/3/reference/datamodel.html#object.__int__ - try: - # REVISIT: what about the field's width - value |= int(field_value) << shift - except NotImplementedError as exc: - raise DelegationError( - f"Field {name!r} does not support to-int conversion!" - ) from exc - # REVISIT: is this cheating? - values.append(value) - stream.write(libstruct.pack(f"{self.order.ch}{self.__fmt__}", *values)) + if group.is_field(): + field = group.get_field() + name = field.__name__ + context[CTX_PATH] = f"{base_path}.{name}" + if name in self._member_map_: + value = self.get_value(obj, name, field) + else: + value = field.default if field.default != INVALID_DEFAULT else None + + field.__pack__(value, context) + else: + value = 0 + for entry in group.entries: + context[CTX_PATH] = f"{base_path}.{entry.name}" + if entry.is_action(): + func = getattr(entry.action, ATTR_ACTION_PACK, None) + if func: + func(context) + continue + + entry_value = self.get_value(obj, entry.name, None) + if entry.factory: + entry_value = entry.factory.to_int(entry_value) + + # silently ignore invalid values + value |= (entry_value & entry.low_mask) << entry.shift( + group.bit_count + ) + context[CTX_STREAM].write(value.to_bytes(group.bit_count // 8, endian)) + + def add_action(self, action) -> None: + self._current_group.entries.append( + BitfieldEntry(0, 0, "", action=action) + ) + return super().add_action(action) def _make_bitfield( @@ -338,9 +589,15 @@ def _make_bitfield( order=None, arch=None, field_options=None, + alignment=None, ) -> type: - _ = BitField( - cls, order=order, arch=arch, options=options, field_options=field_options + _ = Bitfield( + cls, + order=order, + arch=arch, + options=options, + field_options=field_options, + alignment=alignment, ) return cls @@ -353,15 +610,26 @@ def bitfield( order=None, arch=None, field_options=None, + alignment=None, ): def wrap(cls): return _make_bitfield( - cls, options=options, order=order, arch=arch, field_options=field_options + cls, + options=options, + order=order, + arch=arch, + field_options=field_options, + alignment=alignment, ) if cls is not None: return _make_bitfield( - cls, options=options, order=order, arch=arch, field_options=field_options + cls, + options=options, + order=order, + arch=arch, + field_options=field_options, + alignment=alignment, ) return wrap diff --git a/src/caterpillar/model/_bitfield.pyi b/src/caterpillar/model/_bitfield.pyi index 831ad5c5..2b630e59 100755 --- a/src/caterpillar/model/_bitfield.pyi +++ b/src/caterpillar/model/_bitfield.pyi @@ -12,73 +12,121 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from typing import Any, Callable, Dict, Iterable, Optional, Self, Type, TypeVar, overload +from typing import ( + Any, + Callable, + Dict, + Generic, + Iterable, + List, + Optional, + Self, + Type, + TypeVar, + overload, +) -from caterpillar.abc import _ContextLike +from caterpillar.abc import _ContextLike, _ActionLike from caterpillar.byteorder import Arch, ByteOrder -from caterpillar.options import Flag +from caterpillar.options import B_GROUP_END, B_GROUP_NEW, Flag from caterpillar.fields._base import Field from caterpillar.model._struct import Struct _ModelT = TypeVar("_ModelT") -BitTuple = tuple[int, int, type] -BITS_ATTR: str -SIGNED_ATTR: str +DEFAULT_ALIGNMENT: int def getbits(obj: Any) -> int: ... def issigned(obj: Any) -> bool: ... -class BitFieldGroup: - size: int - pos: int - fmt: str - fields: dict[BitTuple, Field] = ... +class SetAlignment: + alignment: int + def __init__(self, new_alignment: int) -> None: ... + @staticmethod + def flag(new_alignment: int) -> Flag[int]: ... + def __hash__(self) -> int: ... + +NewGroup = B_GROUP_NEW +EndGroup = B_GROUP_END + +_VT = TypeVar("_VT", default=int) + +class BitfieldValueFactory(Generic[_VT]): + def to_int(self, obj: _VT) -> int: ... + def from_int(self, value: int) -> _VT: ... + +class BoolFactory(BitfieldValueFactory[bool]): + def from_int(self, value: int) -> bool: ... + +class BitfieldEntry: + bit: int + width: int + name: str + factory: Optional[BitfieldValueFactory] + action: Optional[_ActionLike] + low_mask: int + def __init__( self, - size: int, - pos: int, - fields: Optional[Dict[BitTuple, Field]] = None, + bit: int, + width: int, + name: str, + factory: BitfieldValueFactory | Type | None = ..., + action: _ActionLike | None = ..., ) -> None: ... + @staticmethod + def new_action(action: _ActionLike) -> BitfieldEntry: ... + def shift(self, value_width: int) -> int: ... + def is_action(self) -> bool: ... -class BitField(Struct[_ModelT]): - groups: list[BitFieldGroup] - __bits__: int - __fmt__: str +class BitfieldGroup: + bit_count: int + entries: List[BitfieldEntry | Field] + def __init__(self, bit_count: int) -> None: ... + def is_field(self) -> bool: ... + def get_field(self) -> Field: ... + def set_field(self, field: Field) -> None: ... + def align_to(self, alignment: int) -> None: ... + def is_empty(self) -> bool: ... + def get_size(self, context: _ContextLike | None = ...) -> int: ... + def get_bits(self, context: _ContextLike | None = ...) -> int: ... +class Bitfield(Struct[_ModelT]): + alignment: int + groups: List[BitfieldGroup] def __init__( self, - model: Type[_ModelT], - options: Optional[Iterable[Flag]] = None, - order: Optional[ByteOrder] = None, - arch: Optional[Arch] = None, - field_options: Optional[Flag] = None, + model, + order: ByteOrder | None = ..., + arch: Arch | None = ..., + options: Iterable[Flag] | None = ..., + field_options: Iterable[Flag] | None = ..., + alignment: int | None = ..., ) -> None: ... - def __add__(self, other: BitField) -> Self: ... + def __add__(self, sequence): ... def __size__(self, context: _ContextLike) -> int: ... - def group(self, bit_index: int) -> BitFieldGroup | None: ... - def unpack_one(self, context: _ContextLike) -> _ModelT: ... + def __bits__(self) -> int: ... + def unpack_one(self, context: _ContextLike): ... def pack_one(self, obj: _ModelT, context: _ContextLike) -> None: ... + def add_action(self, action: _ActionLike) -> None: ... @overload def bitfield( cls: None = None, /, *, - options: Iterable[Flag] | None = None, - order: ByteOrder | None = None, - arch: Arch | None = None, - field_options: Iterable[Flag] | None = None, + options: Iterable[Flag] | None = ..., + order: ByteOrder | None = ..., + arch: Arch | None = ..., + field_options: Iterable[Flag] | None = ..., ) -> Callable[[Type[_ModelT]], Type[_ModelT]]: ... - @overload def bitfield( cls: Type[_ModelT], /, *, - options: Iterable[Flag] | None = None, - order: ByteOrder | None = None, - arch: Arch | None = None, - field_options: Iterable[Flag] | None = None, -) -> Type[_ModelT]:... - + options: Iterable[Flag] | None = ..., + order: ByteOrder | None = ..., + arch: Arch | None = ..., + field_options: Iterable[Flag] | None = ..., +) -> Type[_ModelT]: ... diff --git a/src/caterpillar/options.py b/src/caterpillar/options.py index 497c1ad4..db82efaf 100755 --- a/src/caterpillar/options.py +++ b/src/caterpillar/options.py @@ -160,3 +160,9 @@ def get_flag(name: str, obj, attr=None): # value intentionally left blank O_ARRAY_FACTORY = Flag("option.array_factory", value=None) + +# bitfield options +B_OVERWRITE_ALIGNMENT = Flag("bitfield.overwrite_alignment") +B_GROUP_END = Flag("bitfield.group.end") +B_GROUP_NEW = Flag("bitfield.group.new") +B_GROUP_KEEP = Flag("bitfield.group.keep") \ No newline at end of file diff --git a/src/caterpillar/options.pyi b/src/caterpillar/options.pyi index b437a0d8..996892a2 100755 --- a/src/caterpillar/options.pyi +++ b/src/caterpillar/options.pyi @@ -15,7 +15,7 @@ from types import NoneType from typing import Any, Callable, Collection, Generic, Iterable, TypeVar -_VT = TypeVar("_VT") +_VT = TypeVar("_VT", default=NoneType) class Flag(Generic[_VT]): name: str @@ -37,15 +37,20 @@ def get_flags(obj: Any, attr: str | None = None) -> set[Flag] | None: ... def has_flag(flag: str | Flag, obj: Any, attr: str | None = None) -> bool: ... def get_flag(name: str, obj: Any, attr: str | None = None) -> Flag | None: ... -S_DISCARD_UNNAMED: Flag[NoneType] -S_DISCARD_CONST: Flag[NoneType] -S_UNION: Flag[NoneType] -S_REPLACE_TYPES: Flag[NoneType] -S_EVAL_ANNOTATIONS: Flag[NoneType] -S_ADD_BYTES: Flag[NoneType] -S_SLOTS: Flag[NoneType] -F_KEEP_POSITION: Flag[NoneType] -F_DYNAMIC: Flag[NoneType] -F_SEQUENTIAL: Flag[NoneType] -F_OFFSET_OVERRIDE: Flag[NoneType] +S_DISCARD_UNNAMED: Flag +S_DISCARD_CONST: Flag +S_UNION: Flag +S_REPLACE_TYPES: Flag +S_EVAL_ANNOTATIONS: Flag +S_ADD_BYTES: Flag +S_SLOTS: Flag +F_KEEP_POSITION: Flag +F_DYNAMIC: Flag +F_SEQUENTIAL: Flag +F_OFFSET_OVERRIDE: Flag O_ARRAY_FACTORY: Flag[Callable[[Iterable], Collection]] +B_OVERWRITE_ALIGNMENT: Flag + +B_GROUP_END: Flag +B_GROUP_NEW: Flag +B_GROUP_KEEP: Flag \ No newline at end of file From ceecbc7e58222f7955436bc3bd8e094b5ae8ed4f Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Fri, 27 Jun 2025 08:39:40 +0200 Subject: [PATCH 23/41] Added documentation comments and new option for Bitfield class --- + New option: B_NO_AUTO_BOOL to disable automatically assigning fields with one bit width a boolean factory --- src/caterpillar/model/_bitfield.py | 466 +++++++++++++++++++++++----- src/caterpillar/model/_bitfield.pyi | 24 ++ src/caterpillar/options.py | 21 +- src/caterpillar/options.pyi | 4 +- 4 files changed, 430 insertions(+), 85 deletions(-) diff --git a/src/caterpillar/model/_bitfield.py b/src/caterpillar/model/_bitfield.py index 52f4fabe..de8b5c92 100755 --- a/src/caterpillar/model/_bitfield.py +++ b/src/caterpillar/model/_bitfield.py @@ -34,6 +34,7 @@ B_OVERWRITE_ALIGNMENT, B_GROUP_END, B_GROUP_KEEP, + B_NO_AUTO_BOOL, Flag, ) from caterpillar.fields import ( @@ -49,10 +50,10 @@ # --- BItfield Concept --- # NEW REVISED CONCEPT -# Each Bitfield stores a sequence of so-called bitfield-groups, whereby each group stores -# a collection of sized fields. A bitfield-group may be represented by a collection of other -# fields (_StructLike) or a single _StructLike object. For instance, consider the following -# bitfield definition: +# Each Bitfield instance maintains a sequence of bitfield groups, where each group +# contains a collection of sized fields. A bitfield group may consist of either multiple +# entries (i.e., any types that can be converted to an integral type) or a single +# _StructLike object. For example, consider the following bitfield definition: # # @bitfield # class Format: @@ -62,87 +63,115 @@ # b1: char # c1: uint32 # -# The created Bitfield will store three formal bitfield-groups (marked with characters a to c -# here). By default, the bitfield uses 8bit alignment, which results in 1 byte for the first -# group: +# This Bitfield definition will generate three distinct bitfield groups (labeled here as +# groups a, b, and c). By default, bitfields use 8-bit alignment, leading to the following +# layout: # # Group Pos Bits # a 0x00 8 # b 0x01 8 # c 0x02 32 # -# Internally, only the first group requires special parsing since the other two are structs -# without a specific bit number. This dynamic grouping model now allows us to use the power -# of struct class definitions in bitfields. +# Internally, only the first group requires special bit-level parsing. The remaining groups +# (b and c) are treated as standard structures since they span full bytes or words without +# sub-byte alignment. This dynamic grouping mechanism allows leveraging full struct-like +# class definitions within bitfields. # -# This new approach enables more complect bitfield definitions. Therefore, the syntax will be -# extended: +# This new approach enables more complex and expressive bitfield definitions. The annotation +# syntax is therefore extended as follows: # -# +---------------------------------------------------+--------------------------------+ -# 1.| : [ - ] | default definition | -# +---------------------------------------------------+--------------------------------+ -# 2.| : 0 | start new byte | -# +---------------------------------------------------+--------------------------------+ -# 3.| : | custom field (no bits used) | -# +---------------------------------------------------+--------------------------------+ -# 4.| : (,) | field with custom type factory | -# +---------------------------------------------------+--------------------------------+ -# 5.| : (,[,]) | bits with custom type factory | -# | : (,[]) | and options | -# +---------------------------------------------------+--------------------------------+ +# +---------------------------------------------------+--------------------------------------+ +# 1.| : [ - ] | Standard field with optional type | +# +---------------------------------------------------+--------------------------------------+ +# 2.| : 0 | Aligns to the next byte boundary | +# +---------------------------------------------------+--------------------------------------+ +# 3.| : | Struct-like field (no bits consumed) | +# +---------------------------------------------------+--------------------------------------+ +# 4.| : (,) | Field with custom type factory | +# +---------------------------------------------------+--------------------------------------+ +# 5.| : (,[,]) | bits with custom type factory | +# | : (,[]) | and options | +# +---------------------------------------------------+--------------------------------------+ # -# The generation process will follow some implications and rules derived from this extended -# syntax. These rules are as follows: +# Processing Rules: # -# For 1.: -# - The default alignment is one byte (8bits). If an annotation of rule no. 2 follows, -# eight the current byte will be filled. -# - When the is given, typeof will be called to retrieve the and -# getbits+sizeof to retrieve the current alignment. The new alignment will be -# ignored if a custom alignment has been set in the constructor of the BItfield. -# If the stores the B_OVERWRITE_ALIGNMENT option, the current alignment -# will be overwritten. -# - If the B_GROUP_END option is present, the current group will be finalized and a -# new one will be started. +# Rule 1.: +# - Default alignment is 1 byte (8 bits). +# - If followed by a rule 2 declaration, the remaining bits in the current byte are padded. +# - If a is provided: +# - typeof() is used to infer the factory. +# - etbits() and sizeof() determine the field’s alignment. +# - If a custom alignment is configured in the Bitfield constructor, inferred alignment is +# ignored unless the field includes the B_OVERWRITE_ALIGNMENT option. +# - If the B_GROUP_END option is set, the current group is finalized and a new one is started. # -# For 2.: -# - This field definition will be removed regardless of the given name -# - The current configured alignment will be used to fill bits up to the alignment. -# - This field definition finalizes the current bitfield-group unless B_GROUP_KEEP -# has been configured on the bitfield. +# Rule 2.: +# - This rule forces alignment to the next byte boundary. +# - The field is ignored during final class generation (name is discarded). +# - The current group is finalized unless the bitfield is configured with B_GROUP_KEEP # -# For 3.: -# - The same rules as for defining fields in struct classes apply here. -# - Additionally, this definition implies a definition of rule no. 2. The current group -# will be finalized regardless of whether B_GROUP_KEEP has been set. +# Rule 3.: +# - Equivalent to struct-like class field definitions. +# - Automatically implies a rule 2 alignment. +# - Always finalizes the current group regardless of B_GROUP_KEEP. # -# For 4.: -# - Extension of rule no. 1 that explicitly defines the conversion factory, which must be -# one of the following types: Type (such as int, bool) that supports the __int__ method, -# or a type or instance of a BitfieldValueFactory. +# Rule 4.: +# - Extension of Rule 1. +# - Explicitly defines a conversion factory for the field. +# - The factory must be: +# - A built-in type (e.g., int, bool) supporting __int__, or +# - A type or instance of BitfieldValueFactory. # -# For 5.: -# - Same processing as for rule no. 4 but defined options (either flags or options described -# below) can be present either as list or single element. Additional options are: -# NewGroup: Aligns the previous group to the current alignment and creates a new one -# while also adding the specified bitfield entry to the new group. -# EndGroup: Adds the bitfield entry to the current group and aligns to according to the -# current alignment. -# SetAlignment: Updates the current working alignment -# The order of specified options matters. - +# Rule 5.: +# - Builds upon Rule 4 with support for options. +# - Options can be passed as a list or single element. +# - Supported Options: +# - NewGroup: Aligns the current group, starts a new one, and adds the entry to it. +# - EndGroup: Adds the entry to the current group, then aligns it. +# - SetAlignment: Changes the current working alignment. +# - Note: Option order affects behavior and must be considered carefully. + +#: The default alignment (in bits) used for bitfield group boundaries DEFAULT_ALIGNMENT = 8 +#: Alias for the `B_GROUP_NEW` flag, used to indicate that a new bitfield group should be started. NewGroup = B_GROUP_NEW + +#: Alias for the `B_GROUP_END` flag, used to indicate that the current bitfield group +#: should be finalized. EndGroup = B_GROUP_END class SetAlignment: + """ + Instructional flag used to update the current bitfield alignment dynamically during + bitfield generation. + + This class allows to explicitly set a new alignment boundary (in bits) for subsequent fields + or groups in a bitfield definition. This enables finer control over how bitfield groups are + organized and aligned. + + :param new_alignment: The alignment size in bits to be used from this point forward in the bitfield layout. + :type new_alignment: int + """ + def __init__(self, new_alignment: int) -> None: self.alignment = new_alignment @staticmethod def flag(new_alignment: int): + """Create a :class:`Flag` instance representing a request to set a new alignment. + + This method is intended for use where a generic :class:`Flag` is expected rather than a full + :class:`SetAlignment` object, e.g. for setting options for a :class:`Field`. + + >>> field = 5 - uint32 | SetAlignment.flag(32) + + :param new_alignment: The alignment size in bits. + :type new_alignment: int + :return: A `Flag` object with the key `"bitfield.new_alignment"` and the specified alignment as its value. + :rtype: Flag + """ return Flag("bitfield.new_alignment", new_alignment) def __hash__(self) -> int: @@ -150,26 +179,94 @@ def __hash__(self) -> int: def getbits(obj) -> int: + """Retrieve the bit-width of a given object. + + This function checks for a :py:func:`__bits__` attribute on the object. The object must either implement + the :class:`_SupportsBits` or :class:`_ContainsBits` protocol. + + >>> class A: + ... __bits__ = 3 + ... + >>> a = A() + >>> getbits(a) + 3 + + :param obj: The object for which the bit-width should be determined. It is expected to have an :attr:`ATTR_BITS` attribute. + :type obj: Any + :return: The number of bits used by the object. + :rtype: int + :raises AttributeError: If the object does not have an attribute defined by :attr:`ATTR_BITS`. + """ __bits__ = getattr(obj, ATTR_BITS) return __bits__() if callable(__bits__) else __bits__ def issigned(obj) -> bool: + """Determine whether a given object represents a signed field. + + :param obj: The object for which signedness should be determined. + :type obj: Any + :return: :code:`True` if the field is marked as signed, :code:`False` otherwise. + :rtype: bool + """ return bool(getattr(obj, ATTR_SIGNED, None)) class BitfieldValueFactory: + """ + A generic factory class responsible for converting values between Python objects and integers + for use in bitfield entries. + + By default, the factory converts to and from Python's built-in :code:`int` type, but it can be customized + to support any type that accepts an integer in its constructor and implements :code:`__int__`. + + :param target: The target type to which integer values will be converted., defaults to None + :type target: type, optional + """ + def __init__(self, target=None) -> None: self.target = target or int def to_int(self, obj) -> int: + """Convert a Python object to an integer. + + :param obj: The object to convert. + :type obj: Any + :return: The integer representation of the object. + :rtype: int + """ return int(obj) def from_int(self, value: int): + """Convert an integer to the target object type. + + :param value: The integer to convert. + :type value: int + :return: The value converted to the target type. + :rtype: Any + """ return self.target(value) class BitfieldEntry: + """ + Represents a single entry in a bitfield, including its bit position, width, name, and + conversion behavior. + + May also represent a special action or directive instead of a field. + + :param bit: The starting bit position within its group. + :type bit: int + :param width: The number of bits used by this field. + :type width: int + :param name: The name of the field. + :type name: str + :param factory: A factory for type conversion. Defaults to BitfieldValueFactory. + :type factory: type or BitfieldValueFactory or None + :param action: Optional action object for special handling (e.g., alignment or padding). + :type action: Any + """ + def __init__( self, bit: int, width: int, name: str, factory=None, action=None ) -> None: @@ -184,16 +281,44 @@ def __init__( @staticmethod def new_action(action): + """ + Create a new action-type entry (e.g., padding, control directive). + + :param action: The action object to encapsulate. + :type action: Any + :return: A BitfieldEntry instance with no bit-width, used for meta instructions. + :rtype: BitfieldEntry + """ return BitfieldEntry(0, 0, "", action=action) def shift(self, value_width: int) -> int: + """ + Calculate how much to shift the field when extracting it from a value. + + :param value_width: The total bit width of the container. + :type value_width: int + :return: The number of bits to shift. + :rtype: int + """ bit_pos = max(0, value_width - self.bit) return max(bit_pos - self.width, 0) def is_action(self) -> bool: + """ + Check whether this entry is an action (i.e., not a data field). + + :return: True if this is an action entry. + :rtype: bool + """ return self.action is not None def __repr__(self) -> str: + """ + Return a human-readable string representation of the bitfield entry. + + :return: String representation. + :rtype: str + """ if self.is_action(): return repr(self.action) @@ -201,37 +326,83 @@ def __repr__(self) -> str: if self.factory: type_ = type(self.factory) if type_ is BitfieldValueFactory: - type_ = int + type_ = self.factory.target r = f"{r}, factory={type_.__name__}" return f"{r}>" class BitfieldGroup: + """ + A group of one or more bitfield entries. Groups are used to organize fields within a single + alignment unit and may represent either packed fields or standalone fields. + + :param bit_count: The number of bits in the group, or -1 for single field representation. + :type bit_count: int + """ + def __init__(self, bit_count: int) -> None: self.bit_count = bit_count self.entries = [] def is_field(self) -> bool: + """ + Determine whether the group contains a single non-bitfield field. + + :return: True if the group holds a single struct-like field. + :rtype: bool + """ return self.bit_count == -1 def get_field(self): + """ + Get the single field from this group. + + :return: The field object. + :rtype: BitfieldEntry + """ return self.entries[0] def set_field(self, field): + """ + Set the group to hold only the given field and mark it as a standalone field group. + + :param field: The field to store in this group. + :type field: BitfieldEntry + """ self.entries = [field] self.bit_count = -1 def align_to(self, alignment: int): + """ + Align the bit count of this group to the specified boundary. + + :param alignment: The number of bits to align to. + :type alignment: int + """ if not self.is_field(): pad = self.bit_count % alignment if pad > 0: self.bit_count += alignment - pad def is_empty(self) -> bool: + """ + Check if the group contains any entries. + + :return: True if the group is empty. + :rtype: bool + """ return len(self.entries) == 0 def get_size(self, context=None): + """ + Get the size of this group in bytes. + + :param context: Optional context used for size evaluation. + :type context: Any + :return: The size of the group in bytes. + :rtype: int + """ if self.is_field(): field = self.get_field() return field.__size__(context) if context else sizeof(field) @@ -239,6 +410,14 @@ def get_size(self, context=None): return self.bit_count // 8 def get_bits(self, context=None): + """ + Get the total number of bits in this group. + + :param context: Optional context used for size evaluation. + :type context: Any + :return: The number of bits. + :rtype: int + """ return self.get_size(context) * 8 def __repr__(self) -> str: @@ -249,12 +428,39 @@ def __repr__(self) -> str: class Bitfield(Struct): + """ + A Bitfield represents a packed structure composed of bit-level fields. This + class allows for the declarative definition of compact memory representations + where each field can occupy an arbitrary number of bits, not necessarily + aligned to byte boundaries. + + Core Implementation: + - Bitfields are organized into BitfieldGroups, which manage alignment and field aggregation. + - Entries can be individual bit widths or wrapped fields with explicit alignment. + - Special field options like NewGroup and EndGroup can control group layout. + - Supports value factories for type conversion and symbolic runtime actions. + + Available global options: + - :code:`B_NO_AUTO_BOOL`: disables automatically converting 1bit fields to boolean + - :code:`B_GROUP_KEEP`: disables finalizing groups when using the alignment definition syntax + + :param model: The model for the structure. + :type model: Any + :param order: Byte order of the structure. + :type order: Optional[str] + :param arch: Target architecture. + :type arch: Optional[str] + :param options: Global structure options. + :type options: Optional[set] + :param field_options: Field-specific options. + :type field_options: Optional[set] + :param alignment: Bit alignment size. + :type alignment: Optional[int] + """ + __slots__ = ( "groups", "alignment", - "_bit_pos", - "_current_group", - "_current_alignment", ) def __init__( @@ -282,15 +488,25 @@ def __init__( # Add additional options based on the struct's type self.options.difference_update(GLOBAL_STRUCT_OPTIONS, GLOBAL_UNION_OPTIONS) self.options.update(GLOBAL_BITFIELD_FLAGS) - # self.__bits__ = sum(map(lambda x: x.bit_count, self.groups)) self.groups = [group for group in self.groups if not group.is_empty()] + # REVISIT: should be enable modification after processing? del self._bit_pos del self._current_alignment del self._current_group def __add__(self, sequence): + """ + Append another Bitfield instance to this one. + + :param sequence: Another Bitfield instance. + :type sequence: Bitfield + :return: Combined Bitfield. + :rtype: Bitfield + :raises TypeError: If sequence is not a Bitfield. + """ if not isinstance(sequence, Bitfield): + # REVISIT: we could just add each field as a group individually? raise TypeError( f"Attempted to add a non-bitfield struct to a bitfield! (type={type(sequence)})" ) @@ -299,6 +515,18 @@ def __add__(self, sequence): return super(Struct, self).__add__(sequence) def _process_align(self, options) -> Field: + """ + Process an alignment directive. + + .. code-block:: bnf + + : 0 + + :param options: A list of alignment-related options. + :type options: Optional[list] + :return: A placeholder field. + :rtype: Field + """ for option in options or []: if self._process_alignment_option(option): continue @@ -318,9 +546,26 @@ def _process_align(self, options) -> Field: return Field(Pass) def _process_bits(self, name: str, bits: int, factory=None, options=None) -> Field: + """ + Process a bitfield entry with a given width. + + :param name: Field name. + :type name: str + :param bits: Width in bits. + :type bits: int + :param factory: Optional value factory. + :type factory: Optional[BitfieldValueFactory] + :param options: Field-specific options. + :type options: Optional[list] + :return: Resulting Field. + :rtype: Field + """ if bits == 0: return self._process_align(options) + if not factory and bits == 1 and not self.has_option(B_NO_AUTO_BOOL): + factory = BitfieldValueFactory(bool) + entry = BitfieldEntry(self._bit_pos, bits, name, factory) self._bit_pos += bits if not self._process_options(options, entry): @@ -331,7 +576,24 @@ def _process_bits(self, name: str, bits: int, factory=None, options=None) -> Fie # this is only symbolic return Field(Int(bits)) - def _process_bits_field(self, name: str, field, options=None) -> Field: + def _process_bits_field( + self, name: str, field, options=None, factory=None + ) -> Field: + """ + Process a bitfield that wraps another field instance. + + :param name: Field name. + :type name: str + :param field: The field instance. + :type field: Field + :param options: List of options. + :type options: Optional[list] + :param factory: Optional value factory. + :type factory: Optional[BitfieldValueFactory] + :return: Resulting Field. + :rtype: Field + :raises TypeError: If bit width is not an integer. + """ if field.bits is None: # we don't need to check for NewGroup and EndGroup options here as no # bits are specified and the field gets its own group. @@ -353,7 +615,7 @@ def _process_bits_field(self, name: str, field, options=None) -> Field: ) entry = BitfieldEntry( - self._bit_pos, width, name, BitfieldValueFactory(typeof(field)) + self._bit_pos, width, name, factory or BitfieldValueFactory(typeof(field)) ) self._bit_pos += width if not self._process_options(options, entry): @@ -362,7 +624,12 @@ def _process_bits_field(self, name: str, field, options=None) -> Field: group.bit_count = max(group.bit_count, self._bit_pos) if field.has_flag(B_OVERWRITE_ALIGNMENT): - self._current_alignment = (sizeof(field) * 8) or DEFAULT_ALIGNMENT + try: + field_bits = getbits(field.struct) + except Exception: + field_bits = sizeof(field) * 8 + + self._current_alignment = field_bits or DEFAULT_ALIGNMENT self._current_group.align_to(self._current_alignment) return field @@ -446,10 +713,12 @@ def _process_field(self, name: str, annotation, default): (width, factory_or_option, *extra_options) = annotation if isinstance(factory_or_option, BitfieldValueFactory): factory = factory_or_option - elif isinstance(factory_or_option, type) and issubclass( - factory_or_option, BitfieldValueFactory - ): - factory = factory_or_option() + elif isinstance(factory_or_option, type): + # here we enable just specifying types instead of a factory + if not issubclass(factory_or_option, BitfieldValueFactory): + factory = BitfieldValueFactory(factory_or_option) + else: + factory = factory_or_option() else: # treat as option options = [factory_or_option] @@ -463,7 +732,7 @@ def _process_field(self, name: str, annotation, default): if not isinstance(field, Field): field = Field(field, order=order, arch=arch, default=default) - return self._process_bits_field(name, field, options) + return self._process_bits_field(name, field, options, factory) case _: # rule 1 (with field) or rule 3 @@ -488,10 +757,24 @@ def _included(self, name: str, default, annotation) -> bool: return True def __size__(self, context) -> int: + """ + Calculate the total size of the bitfield structure. + + :param context: Packing context. + :type context: Any + :return: Total size in bytes. + :rtype: int + """ # size is different as our model includes correct padding return sum(map(lambda g: g.get_size(context), self.groups)) def __bits__(self) -> int: + """ + Compute the total number of bits in the structure. + + :return: Total bit count. + :rtype: int + """ return sum(map(lambda g: g.get_bits(), self.groups)) def unpack_one(self, context): @@ -530,7 +813,6 @@ def unpack_one(self, context): func(context) continue - print(bin(raw_value), entry.shift(group.bit_count), bin(entry.low_mask)) value = (raw_value >> entry.shift(group.bit_count)) & entry.low_mask if entry.factory: value = entry.factory.from_int(value) @@ -612,6 +894,44 @@ def bitfield( field_options=None, alignment=None, ): + """ + Decorator that transforms a class definition into a :class:`Bitfield` structure. + + This decorator enables defining bitfields using simple class syntax, + with support for custom alignment, ordering, architecture, and field options. + + :param cls: The user-defined class to transform. + :type cls: Optional[type] + :param options: A set of global or structure-specific options. + :type options: Optional[set] + :param order: Optional byte order for serialization (e.g., 'little' or 'big'). + :type order: Optional[str] + :param arch: Optional architecture string (e.g., 'x86', 'arm'). + :type arch: Optional[str] + :param field_options: Optional default options for fields. + :type field_options: Optional[set] + :param alignment: Optional alignment in bits. + :type alignment: Optional[int] + :return: The decorated class, enhanced as a `Bitfield` structure. + :rtype: type + + .. code-block:: python + + from caterpillar.py import bitfield, SetAlignment, uint16 + + @bitfield + class Packet: + version : 3 + type : (5, SetAlignment(16)) + length : 10 + _ : 0 # align to 16bits + payload : uint16 + + # You can now pack/unpack Packet instances as compact binary bitfields + pkt = Packet(version=1, type=2, length=128, payload=0xABCD) + packed = pack(pkt) + unpacked = unpack(Packet, packed) + """ def wrap(cls): return _make_bitfield( cls, diff --git a/src/caterpillar/model/_bitfield.pyi b/src/caterpillar/model/_bitfield.pyi index 2b630e59..9b97a63f 100755 --- a/src/caterpillar/model/_bitfield.pyi +++ b/src/caterpillar/model/_bitfield.pyi @@ -109,6 +109,30 @@ class Bitfield(Struct[_ModelT]): def unpack_one(self, context: _ContextLike): ... def pack_one(self, obj: _ModelT, context: _ContextLike) -> None: ... def add_action(self, action: _ActionLike) -> None: ... + def _process_align( + self, options: Optional[Iterable[Flag | SetAlignment]] + ) -> Field: ... + def _process_bits( + self, + name: str, + bits: int, + factory: Optional[BitfieldValueFactory | Type] = ..., + options: Optional[Iterable[Flag | SetAlignment]] = ..., + ) -> Field: ... + def _process_bits_field( + self, + name: str, + field: Field, + factory: Optional[BitfieldValueFactory | Type] = ..., + options: Optional[Iterable[Flag | SetAlignment]] = ..., + ) -> Field: ... + def _process_options( + self, + options: Iterable[Flag | SetAlignment], + entry: Optional[BitfieldEntry] = ..., + ) -> bool: ... + def _new_group(self, alignment: int) -> BitfieldGroup: ... + def _process_alignment_option(self, option: Flag | SetAlignment) -> bool: ... @overload def bitfield( diff --git a/src/caterpillar/options.py b/src/caterpillar/options.py index db82efaf..322b41db 100755 --- a/src/caterpillar/options.py +++ b/src/caterpillar/options.py @@ -144,19 +144,19 @@ def get_flag(name: str, obj, attr=None): ############################################################################### # for structs and unions -S_DISCARD_UNNAMED = Flag("discard_unnamed") -S_DISCARD_CONST = Flag("discard_const") -S_UNION = Flag("union") -S_REPLACE_TYPES = Flag("replace_types") -S_EVAL_ANNOTATIONS = Flag("eval_annotations") +S_DISCARD_UNNAMED = Flag("struct.discard_unnamed") +S_DISCARD_CONST = Flag("struct.discard_const") +S_UNION = Flag("struct.union") +S_REPLACE_TYPES = Flag("struct.replace_types") +S_EVAL_ANNOTATIONS = Flag("struct.eval_annotations") S_ADD_BYTES = Flag("struct.bytes_method") S_SLOTS = Flag("struct.slots") # for fields -F_KEEP_POSITION = Flag("keep_position") -F_DYNAMIC = Flag("dynamic") -F_SEQUENTIAL = Flag("sequential") -F_OFFSET_OVERRIDE = Flag("offset_override") +F_KEEP_POSITION = Flag("field.keep_position") +F_DYNAMIC = Flag("field.dynamic") +F_SEQUENTIAL = Flag("field.sequential") +F_OFFSET_OVERRIDE = Flag("field.offset_override") # value intentionally left blank O_ARRAY_FACTORY = Flag("option.array_factory", value=None) @@ -165,4 +165,5 @@ def get_flag(name: str, obj, attr=None): B_OVERWRITE_ALIGNMENT = Flag("bitfield.overwrite_alignment") B_GROUP_END = Flag("bitfield.group.end") B_GROUP_NEW = Flag("bitfield.group.new") -B_GROUP_KEEP = Flag("bitfield.group.keep") \ No newline at end of file +B_GROUP_KEEP = Flag("bitfield.group.keep") +B_NO_AUTO_BOOL = Flag("bitfield.no_auto_bool") \ No newline at end of file diff --git a/src/caterpillar/options.pyi b/src/caterpillar/options.pyi index 996892a2..b03a48e5 100755 --- a/src/caterpillar/options.pyi +++ b/src/caterpillar/options.pyi @@ -50,7 +50,7 @@ F_SEQUENTIAL: Flag F_OFFSET_OVERRIDE: Flag O_ARRAY_FACTORY: Flag[Callable[[Iterable], Collection]] B_OVERWRITE_ALIGNMENT: Flag - B_GROUP_END: Flag B_GROUP_NEW: Flag -B_GROUP_KEEP: Flag \ No newline at end of file +B_GROUP_KEEP: Flag +B_NO_AUTO_BOOL: Flag \ No newline at end of file From d7f97521a054feb4184be406754770c424a2468e Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Fri, 27 Jun 2025 09:57:41 +0200 Subject: [PATCH 24/41] Updated bitfield behaviour to match concept and added tests --- + Updated imports in __init__ + Added two new value factories: CharFactory and EnumFactory + Bitfields now corrently populate documentation types if S_REPLACE_TYPES is active --- src/caterpillar/model/__init__.py | 6 + src/caterpillar/model/_bitfield.py | 122 ++++++++++++++++++- src/caterpillar/model/_bitfield.pyi | 14 ++- src/caterpillar/model/_struct.pyi | 5 +- test/_Py/model/test_bitfield.py | 175 ++++++++++++++++++++++++++++ 5 files changed, 314 insertions(+), 8 deletions(-) create mode 100644 test/_Py/model/test_bitfield.py diff --git a/src/caterpillar/model/__init__.py b/src/caterpillar/model/__init__.py index 2d8b97d6..7957f103 100644 --- a/src/caterpillar/model/__init__.py +++ b/src/caterpillar/model/__init__.py @@ -36,6 +36,9 @@ NewGroup, EndGroup, SetAlignment, + EnumFactory, + CharFactory, + DEFAULT_ALIGNMENT, ) from ._template import istemplate, template, TemplateTypeVar, derive @@ -65,4 +68,7 @@ "SetAlignment", "BitfieldEntry", "BitfieldValueFactory", + "EnumFactory", + "CharFactory", + "DEFAULT_ALIGNMENT", ] diff --git a/src/caterpillar/model/_bitfield.py b/src/caterpillar/model/_bitfield.py index de8b5c92..12e4f13f 100755 --- a/src/caterpillar/model/_bitfield.py +++ b/src/caterpillar/model/_bitfield.py @@ -224,6 +224,8 @@ class BitfieldValueFactory: :type target: type, optional """ + __slots__ = ("target",) + def __init__(self, target=None) -> None: self.target = target or int @@ -248,6 +250,89 @@ def from_int(self, value: int): return self.target(value) +class EnumFactory(BitfieldValueFactory): + """A value factory for enum-like types used in bitfields. + + This factory attempts to convert between integers and enumeration instances, + using the provided :code:`model` (which should support :code:`__int__`). It + can operate in strict or lenient mode: + + - In strict mode, a :class:`ValueError` is raised if conversion fails. + - In lenient mode, the raw integer is returned if the value is not in the enum. + + :param model: The enum model or mapping type to use. + :type model: Type + :param strict: Whether to raise an error on unknown values. + :type strict: bool + + .. code-block:: python + :caption: Example + + class Status(enum.IntEnum): + OK = 0 + ERROR = 1 + + factory = EnumFactory(Status, strict=True) + factory.from_int(0) # -> Status.OK + factory.from_int(2) # -> ValueError (strict mode) + """ + + def __init__(self, model, strict=False) -> None: + super().__init__(model) + self.strict = strict + + def from_int(self, value: int): + """ + Convert an integer into an enum instance or raw int. + + :param value: The integer to convert. + :type value: int + :return: Enum instance or raw int if not found (in non-strict mode). + :rtype: Any + :raises ValueError: If strict is enabled and value is not valid. + """ + try: + return super().from_int(value) + except ValueError: + if self.strict: + raise + return value + + +class CharFactory(BitfieldValueFactory): + """ + A value factory for handling single ASCII/Unicode characters as integers. + + This factory allows treating a character field as a one-byte integer and vice versa, + automatically converting during packing and unpacking. + """ + + def __init__(self) -> None: + super().__init__(str) + + def from_int(self, value: int): + """ + Convert an integer to its character representation. + + :param value: Integer ASCII or Unicode code point. + :type value: int + :return: Corresponding character. + :rtype: str + """ + return chr(value) + + def to_int(self, obj) -> int: + """ + Convert a character to its integer (ordinal) representation. + + :param obj: The character to convert. + :type obj: str + :return: Corresponding integer value. + :rtype: int + """ + return ord(obj) + + class BitfieldEntry: """ Represents a single entry in a bitfield, including its bit position, width, name, and @@ -267,6 +352,8 @@ class BitfieldEntry: :type action: Any """ + __slots__ = ("bit", "width", "name", "factory", "action", "low_mask") + def __init__( self, bit: int, width: int, name: str, factory=None, action=None ) -> None: @@ -341,6 +428,8 @@ class BitfieldGroup: :type bit_count: int """ + __slots__ = ("entries", "bit_count") + def __init__(self, bit_count: int) -> None: self.bit_count = bit_count self.entries = [] @@ -527,6 +616,11 @@ def _process_align(self, options) -> Field: :return: A placeholder field. :rtype: Field """ + # 2.: the current group will be finalized + if not self.has_option(B_GROUP_KEEP): + self._current_group.align_to(self._current_alignment) + self._current_group = self._new_group(self._current_alignment) + for option in options or []: if self._process_alignment_option(option): continue @@ -538,11 +632,6 @@ def _process_align(self, options) -> Field: group.align_to(alignment) self._current_group = self._new_group(alignment) - # 2.: the current group will be finalized - if not self.has_option(B_GROUP_KEEP): - self._current_group.align_to(self._current_alignment) - self._current_group = self._new_group(self._current_alignment) - return Field(Pass) def _process_bits(self, name: str, bits: int, factory=None, options=None) -> Field: @@ -756,6 +845,19 @@ def _included(self, name: str, default, annotation) -> bool: return width.bits != 0 return True + def _replace_type(self, name: str, type_: type) -> None: + entry = self.get_entry(name) + if entry is not None: + if not entry.factory: + type_ = int + elif isinstance(entry.factory, BitfieldValueFactory): + type_ = entry.factory.target or object + else: + type_ = object + + # else: must be a field with a known type + return super()._replace_type(name, type_) + def __size__(self, context) -> int: """ Calculate the total size of the bitfield structure. @@ -862,6 +964,15 @@ def add_action(self, action) -> None: ) return super().add_action(action) + def get_entry(self, name: str): + for group in self.groups: + if group.is_field(): + continue + + for entry in group.entries: + if entry.name == name: + return entry + def _make_bitfield( cls: type, @@ -932,6 +1043,7 @@ class Packet: packed = pack(pkt) unpacked = unpack(Packet, packed) """ + def wrap(cls): return _make_bitfield( cls, diff --git a/src/caterpillar/model/_bitfield.pyi b/src/caterpillar/model/_bitfield.pyi index 9b97a63f..aa66a718 100755 --- a/src/caterpillar/model/_bitfield.pyi +++ b/src/caterpillar/model/_bitfield.pyi @@ -12,6 +12,7 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . +from enum import Enum from typing import ( Any, Callable, @@ -55,8 +56,16 @@ class BitfieldValueFactory(Generic[_VT]): def to_int(self, obj: _VT) -> int: ... def from_int(self, value: int) -> _VT: ... -class BoolFactory(BitfieldValueFactory[bool]): - def from_int(self, value: int) -> bool: ... +_EnumT = TypeVar("_EnumT", bound=Enum) + +class EnumFactory(Generic[_EnumT], BitfieldValueFactory[_EnumT | int]): + strict: bool + def __init__(self, model: Type[_EnumT], strict: bool = ...) -> None: ... + def from_int(self, value: int) -> _EnumT | int: ... + +class CharFactory(BitfieldValueFactory[str]): + def from_int(self, value: int) -> str: ... + def to_int(self, obj: str) -> int: ... class BitfieldEntry: bit: int @@ -109,6 +118,7 @@ class Bitfield(Struct[_ModelT]): def unpack_one(self, context: _ContextLike): ... def pack_one(self, obj: _ModelT, context: _ContextLike) -> None: ... def add_action(self, action: _ActionLike) -> None: ... + def get_entry(self, name: str) -> Optional[BitfieldEntry]: ... def _process_align( self, options: Optional[Iterable[Flag | SetAlignment]] ) -> Field: ... diff --git a/src/caterpillar/model/_struct.pyi b/src/caterpillar/model/_struct.pyi index fc772f7d..e64d9d82 100755 --- a/src/caterpillar/model/_struct.pyi +++ b/src/caterpillar/model/_struct.pyi @@ -223,4 +223,7 @@ def unpack_file( as_field: bool = ..., **kwds, ) -> _OT: ... -def sizeof(obj: Union[_SupportsSize, _ContainsStruct, _StructLike], **kwds) -> int: ... +def sizeof( + obj: Union[_SupportsSize, _ContainsStruct[Any, Any], _StructLike[Any, Any], type], + **kwds, +) -> int: ... diff --git a/test/_Py/model/test_bitfield.py b/test/_Py/model/test_bitfield.py new file mode 100644 index 00000000..503a4ebe --- /dev/null +++ b/test/_Py/model/test_bitfield.py @@ -0,0 +1,175 @@ +# pyright: reportInvalidTypeForm=false, reportGeneralTypeIssues=false +import pytest +import enum + +from caterpillar.model import ( + Bitfield, + bitfield, + NewGroup, + EndGroup, + SetAlignment, + sizeof, + unpack, + CharFactory, + pack, +) +from caterpillar.options import ( + B_GROUP_END, + B_GROUP_KEEP, + B_GROUP_NEW, + B_NO_AUTO_BOOL, + B_OVERWRITE_ALIGNMENT, + S_REPLACE_TYPES, +) +from caterpillar.fields import uint16, uint24, uint32, Bytes, uint8 +from caterpillar.shared import getstruct + + +def test_bitfield_syntax__standard(): + # Syntax no. 1 + @bitfield + class FormatA: + a: 3 - uint16 + + groups = FormatA.__struct__.groups + assert len(groups) == 1 + assert not groups[0].is_empty() + # default alignment is 0x08 unless B_OVERWRITE_ALIGNMENT is set + assert groups[0].bit_count == 0x08 + assert groups[0].entries[0].width == 3 + + +def test_bitfield_syntax__align(): + # Syntax no. 1 + no. 2 + @bitfield + class FormatA: + a: 3 + _: 0 + b: 4 + + groups = FormatA.__struct__.groups + assert len(groups) == 2 + + # The alignment syntax finalizes the first group and start a new one + a = groups[0] + b = groups[1] + assert a.bit_count == 0x08 and b.bit_count == 0x08 + assert a.entries[0].width == 3 + assert b.entries[0].width == 4 + + +def test_bitfield_syntax__struct(): + # syntax no. 3 (generic struct) + @bitfield + class FormatA: + a: uint16 + b: uint32 + c: uint24 + + # just like a @struct definition + assert sizeof(FormatA) == 2 + 4 + 3 + + +def test_bitfield_syntax__field_factory(): + # syntax no. 4 + @bitfield + class FormatA: + a: (uint16, int) # this won't work + b: (5 - uint8, str) + + struct = getstruct(FormatA) + assert struct is not None + + groups = struct.groups + assert len(groups) == 2 + # The first definition will revert to a simple field, because + # no bits are given + assert groups[0].is_field() is True + # conversion to string + assert groups[1].entries[0].factory.target is str + + +def test_bitfield_syntax__extended(): + class SimpleEnum(enum.IntEnum): + A = 0 + B = 1 + C = 2 + + # syntax no. 5 + @bitfield + class FormatA: + # Explanation: + # 4bits converted to string + a1: (4, str) + # 2bits converted to Enum + a2: (2, SimpleEnum) + # alignment is 8 bits, finalize group and set alignment + # to 16bits for next group + _: (0, SetAlignment(16)) + # 10bits entry for current group, then finalize group + b1: (10, EndGroup) + # 12bits in new group + c1: 12 + + # We should see exactly three groups here + groups = FormatA.__struct__.groups + assert len(groups) == 3 + assert groups[0].bit_count == 8 + assert groups[1].bit_count == 16 + assert groups[2].bit_count == 16 + + assert len(groups[0].entries) == 2 + assert groups[0].entries[1].factory.target is SimpleEnum + + +def test_bitfield__replace_types(): + class SimpleEnum(enum.IntEnum): + A = 0 + B = 1 + C = 2 + + @bitfield(options={S_REPLACE_TYPES}) + class FormatA: + a1: (4, str) # a1: str + a2: (2, SimpleEnum) # a2: SimpleEnum + _: 0 + b1: Bytes(6) # b1: bytes + + annotations = FormatA.__annotations__ + assert annotations["a1"] is str + assert annotations["a2"] is SimpleEnum + assert annotations["b1"] is bytes + + +def test_bitfield__unpack(): + # same as before + class SimpleEnum(enum.IntEnum): + A = 0 + B = 1 + C = 2 + + @bitfield + class FormatA: + a1: (4, CharFactory) # a1: str + a2: (2, SimpleEnum) # a2: SimpleEnum + _: 0 + b1: Bytes(6) # b1: bytes + + data = 0b00110100.to_bytes() + b"12" * 3 + obj = unpack(FormatA, data) + assert obj.a1 == "\x03" + assert obj.a2 == SimpleEnum.B + assert obj.b1 == b"12" * 3 + + +def test_bitfield__pack(): + @bitfield + class FormatA: + a1: 1 + a2: 2 + a3: 3 + _: 0 + b1: uint16 + + obj = FormatA(a1=True, a2=3, a3=5, b1=0xFF00) + assert pack(obj) == 0b1_11_101_00.to_bytes() + b"\x00\xff" From 391f4cb9c707050567b2b67e621d5515755d3c6c Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Fri, 27 Jun 2025 10:07:00 +0200 Subject: [PATCH 25/41] Fix inonsistencies with automatically sizing a bitfield-group --- src/caterpillar/model/_bitfield.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/caterpillar/model/_bitfield.py b/src/caterpillar/model/_bitfield.py index 12e4f13f..62847d54 100755 --- a/src/caterpillar/model/_bitfield.py +++ b/src/caterpillar/model/_bitfield.py @@ -48,7 +48,7 @@ from ._struct import Struct, sizeof -# --- BItfield Concept --- +# --- Bitfield Concept --- # NEW REVISED CONCEPT # Each Bitfield instance maintains a sequence of bitfield groups, where each group # contains a collection of sized fields. A bitfield group may consist of either multiple @@ -734,6 +734,8 @@ def _process_options(self, options, entry=None) -> bool: if option.name == EndGroup.name: if entry: group.entries.append(entry) + self._bit_pos += entry.width + group.bit_count = max(group.bit_count, self._bit_pos) consumed = True group.align_to(alignment) From 291d7616f503cceb0f9fa6ae0277a39dbf461a7e Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Fri, 27 Jun 2025 10:20:06 +0200 Subject: [PATCH 26/41] Fixed bitfield issues with alignment calculation --- src/caterpillar/model/_bitfield.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/caterpillar/model/_bitfield.py b/src/caterpillar/model/_bitfield.py index 62847d54..30419949 100755 --- a/src/caterpillar/model/_bitfield.py +++ b/src/caterpillar/model/_bitfield.py @@ -656,11 +656,11 @@ def _process_bits(self, name: str, bits: int, factory=None, options=None) -> Fie factory = BitfieldValueFactory(bool) entry = BitfieldEntry(self._bit_pos, bits, name, factory) - self._bit_pos += bits if not self._process_options(options, entry): group = self._current_group group.entries.append(entry) # Adjust the size of the goup dynamically + self._bit_pos += entry.width group.bit_count = max(group.bit_count, self._bit_pos) # this is only symbolic return Field(Int(bits)) @@ -706,10 +706,10 @@ def _process_bits_field( entry = BitfieldEntry( self._bit_pos, width, name, factory or BitfieldValueFactory(typeof(field)) ) - self._bit_pos += width if not self._process_options(options, entry): group = self._current_group group.entries.append(entry) + self._bit_pos += width group.bit_count = max(group.bit_count, self._bit_pos) if field.has_flag(B_OVERWRITE_ALIGNMENT): @@ -737,8 +737,8 @@ def _process_options(self, options, entry=None) -> bool: self._bit_pos += entry.width group.bit_count = max(group.bit_count, self._bit_pos) consumed = True - group.align_to(alignment) + group.align_to(alignment) self._current_group = self._new_group(alignment) elif option.name == NewGroup.name: # finalize current group, create a new one and add the entry to the newly From 9d84f0270af96a7928a0e457004e64778f0c8dbb Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Fri, 27 Jun 2025 15:29:04 +0200 Subject: [PATCH 27/41] Removed Manifest file and version to 2.5.0-rc --- MANIFEST.in | 9 --------- examples/formats/caf.py | 2 +- pyproject.toml | 2 +- src/caterpillar/__init__.py | 2 +- src/caterpillar/model/_bitfield.pyi | 2 -- src/ccaterpillar/pyproject.toml | 2 +- 6 files changed, 4 insertions(+), 15 deletions(-) delete mode 100644 MANIFEST.in diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index a18fb49a..00000000 --- a/MANIFEST.in +++ /dev/null @@ -1,9 +0,0 @@ -include caterpillar/*.c -include caterpillar/*.h -include caterpillar/*.py -include caterpillar/*.pyi -include caterpillar/py.typed -include setup.py -include README.md -include LICENSE -include MANIFEST.in \ No newline at end of file diff --git a/examples/formats/caf.py b/examples/formats/caf.py index d914482c..a1636080 100644 --- a/examples/formats/caf.py +++ b/examples/formats/caf.py @@ -114,7 +114,7 @@ class CAFPacketTable: remainder_frames: int32 # The VarInt configuration doesn't have to be changed, as this file format uses # the default implementation. - table_data: VarInt[this.num_packets] + table_data: vint[this.num_packets] @struct(order=BigEndian) diff --git a/pyproject.toml b/pyproject.toml index 63c6a974..dd6a4d8e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ cmake.source-dir = "." [project] name = "caterpillar" -version = "2.4.5" +version = "2.5.0-rc" description="Library to pack and unpack structurized binary data." authors = [ diff --git a/src/caterpillar/__init__.py b/src/caterpillar/__init__.py index 7247e54e..a2ec8c48 100644 --- a/src/caterpillar/__init__.py +++ b/src/caterpillar/__init__.py @@ -14,7 +14,7 @@ # along with this program. If not, see . import warnings -__version__ = "2.4.5" +__version__ = "2.5.0-rc" __release__ = None __author__ = "MatrixEditor" diff --git a/src/caterpillar/model/_bitfield.pyi b/src/caterpillar/model/_bitfield.pyi index aa66a718..abd9304b 100755 --- a/src/caterpillar/model/_bitfield.pyi +++ b/src/caterpillar/model/_bitfield.pyi @@ -16,12 +16,10 @@ from enum import Enum from typing import ( Any, Callable, - Dict, Generic, Iterable, List, Optional, - Self, Type, TypeVar, overload, diff --git a/src/ccaterpillar/pyproject.toml b/src/ccaterpillar/pyproject.toml index 4b3983db..e411dc0b 100644 --- a/src/ccaterpillar/pyproject.toml +++ b/src/ccaterpillar/pyproject.toml @@ -18,7 +18,7 @@ CP_ENABLE_NATIVE = "1" [project] name = "caterpillar" -version = "2.4.5" +version = "2.5.0-rc" description="Library to pack and unpack structurized binary data." authors = [ From 67321348ab25139bcb4cf80264f032388be1b357 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Fri, 27 Jun 2025 15:48:48 +0200 Subject: [PATCH 28/41] Made root context an attribute of the current context --- + New context attribute: '_root' can be set to point to the root context instance + Removed type hints in exception.py --- src/caterpillar/_common.py | 2 ++ src/caterpillar/context.py | 10 ++-------- src/caterpillar/exception.py | 4 +--- src/caterpillar/model/_base.py | 13 +++++++++++-- src/caterpillar/model/_struct.py | 9 ++++++++- 5 files changed, 24 insertions(+), 14 deletions(-) diff --git a/src/caterpillar/_common.py b/src/caterpillar/_common.py index 3ad1289b..d8ff8a16 100755 --- a/src/caterpillar/_common.py +++ b/src/caterpillar/_common.py @@ -68,6 +68,7 @@ def unpack_seq(context, unpack_one) -> Collection: # the new context. The '_pos' attribute will be adjusted automatically. values = [] # always list (maybe add factory) seq_context = Context( + _root=context._root, _parent=context, _io=stream, _length=length, @@ -142,6 +143,7 @@ def pack_seq(seq, context, pack_one) -> None: # Special elements '_index' and '_length' can be referenced within # the new context. The '_pos' attribute will be adjusted automatically. seq_context = Context( + _root=context._root, _parent=context, _io=stream, _length=count, diff --git a/src/caterpillar/context.py b/src/caterpillar/context.py index ba15a74b..223e09f1 100755 --- a/src/caterpillar/context.py +++ b/src/caterpillar/context.py @@ -37,6 +37,7 @@ CTX_PATH = "_path" CTX_SEQ = "_is_seq" CTX_ARCH = "_arch" +CTX_ROOT = "_root" class Context(dict): @@ -92,14 +93,7 @@ def __context_setattr__(self, path: str, value: Any) -> None: @property def _root(self): - current = self - while CTX_PARENT in current: - # dict-like access is much faster - parent = current[CTX_PARENT] - if parent is None: - break - current = parent - return current + return self.get("_root", self) class ExprMixin: diff --git a/src/caterpillar/exception.py b/src/caterpillar/exception.py index 7aac8c19..c0b8bf52 100644 --- a/src/caterpillar/exception.py +++ b/src/caterpillar/exception.py @@ -13,13 +13,11 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from caterpillar.abc import _ContextLike - class StructException(Exception): """Base class for all struct-related exceptions""" - def __init__(self, message: str, context: _ContextLike | None = None) -> None: + def __init__(self, message: str, context=None) -> None: super().__init__(message) self.context = context if context: diff --git a/src/caterpillar/model/_base.py b/src/caterpillar/model/_base.py index 2f9b8315..af5e0636 100755 --- a/src/caterpillar/model/_base.py +++ b/src/caterpillar/model/_base.py @@ -45,7 +45,12 @@ Const, ) from caterpillar._common import unpack_seq, pack_seq -from caterpillar.shared import ATTR_ACTION_PACK, ATTR_ACTION_UNPACK, Action, ATTR_BYTEORDER +from caterpillar.shared import ( + ATTR_ACTION_PACK, + ATTR_ACTION_UNPACK, + Action, + ATTR_BYTEORDER, +) from caterpillar import registry @@ -387,7 +392,10 @@ def __unpack__(self, context): base_path = context[CTX_PATH] # REVISIT: the name 'this_context' is misleading here this_context = Context( - _parent=context, _io=context[CTX_STREAM], _path=base_path + _root=context._root, + _parent=context, + _io=context[CTX_STREAM], + _path=base_path, ) # See __pack__ for more information field: Optional[Field] = context.get("_field") @@ -450,6 +458,7 @@ def __pack__(self, obj, context) -> None: pack_seq(obj, context, self.pack_one) else: ctx = Context( + _root=context._root, _parent=context, _io=context[CTX_STREAM], _path=context[CTX_PATH], diff --git a/src/caterpillar/model/_struct.py b/src/caterpillar/model/_struct.py index d00c620f..e6817725 100755 --- a/src/caterpillar/model/_struct.py +++ b/src/caterpillar/model/_struct.py @@ -451,8 +451,15 @@ def pack_into( :raises TypeError: If no `struct` is specified and cannot be inferred from the object. """ offsets = OrderedDict() + # NOTE: we don't have to set _root here because the default root context + # will be this instance. context = Context( - _parent=None, _path="", _pos=0, _offsets=offsets, mode=MODE_PACK, **kwds + _parent=None, + _path="", + _pos=0, + _offsets=offsets, + mode=MODE_PACK, + **kwds, ) if struct is None: struct = getstruct(obj) From a1dbd6a1bdba9cfdcd9af437ba50e96e4592dcd2 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Fri, 27 Jun 2025 16:02:19 +0200 Subject: [PATCH 29/41] Update context stub file to reflect changes --- src/caterpillar/context.pyi | 1 + 1 file changed, 1 insertion(+) diff --git a/src/caterpillar/context.pyi b/src/caterpillar/context.pyi index 882c3d1a..d2741056 100755 --- a/src/caterpillar/context.pyi +++ b/src/caterpillar/context.pyi @@ -37,6 +37,7 @@ CTX_INDEX: str = ... CTX_PATH: str = ... CTX_SEQ: str = ... CTX_ARCH: str = ... +CTX_ROOT: str = ... class Context(dict, _ContextLike): def __setattr__(self, key: str, value: Any) -> None: ... From f212a365ef1a1499cfae0d9ff91d9f1c4192be0e Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Fri, 27 Jun 2025 16:12:55 +0200 Subject: [PATCH 30/41] Update project.toml definition --- pyproject.toml | 12 +++++++----- src/caterpillar/py.typed | 0 src/ccaterpillar/pyproject.toml | 32 ++++++++++++++------------------ 3 files changed, 21 insertions(+), 23 deletions(-) create mode 100644 src/caterpillar/py.typed diff --git a/pyproject.toml b/pyproject.toml index dd6a4d8e..db17b11d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,13 +13,15 @@ name = "caterpillar" version = "2.5.0-rc" description="Library to pack and unpack structurized binary data." -authors = [ - { name="MatrixEditor", email="not@supported.com" }, -] +authors = [{ name = "MatrixEditor" }] +maintainers = [{ name = "MatrixEditor" }] readme = "README.md" classifiers = [ - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: MIT License', + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", 'Programming Language :: Python :: 3.12', 'Programming Language :: Python :: 3.13', diff --git a/src/caterpillar/py.typed b/src/caterpillar/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/src/ccaterpillar/pyproject.toml b/src/ccaterpillar/pyproject.toml index e411dc0b..aa5f5543 100644 --- a/src/ccaterpillar/pyproject.toml +++ b/src/ccaterpillar/pyproject.toml @@ -20,17 +20,20 @@ CP_ENABLE_NATIVE = "1" name = "caterpillar" version = "2.5.0-rc" -description="Library to pack and unpack structurized binary data." -authors = [ - { name="MatrixEditor", email="not@supported.com" }, -] +description = "Library to pack and unpack structurized binary data." readme = "../../README.md" +authors = [{ name = "MatrixEditor" }] +maintainers = [{ name = "MatrixEditor" }] classifiers = [ - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: MIT License', + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", - 'Programming Language :: Python :: 3.12', - ] + 'Programming Language :: Python :: 3.12', + 'Programming Language :: Python :: 3.13', +] [project.urls] "Homepage" = "https://github.com/MatrixEditor/caterpillar" @@ -38,13 +41,6 @@ classifiers = [ [project.optional-dependencies] # compression -lzo = [ - "lzallright" -] -crypt = [ - "cryptography" -] -all = [ - "lzallright", - "cryptography" -] \ No newline at end of file +lzo = ["lzallright"] +crypt = ["cryptography"] +all = ["lzallright", "cryptography"] From cca98928f9edcf9ba4f9231b5ca9d565277df798 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sat, 28 Jun 2025 12:06:26 +0200 Subject: [PATCH 31/41] Added bitfield library and reference docs --- + Split up the datamodel reference into multiple files + New custom CSS for tables --- docs/sphinx/source/_static/css/custom.css | 51 ++ docs/sphinx/source/conf.py | 1 + docs/sphinx/source/library/model.rst | 14 +- docs/sphinx/source/library/model/bitfield.rst | 77 +++ docs/sphinx/source/reference/datamodel.rst | 449 +----------------- .../source/reference/datamodel/bitfield.rst | 286 +++++++++++ .../datamodel/processing_classes.rst | 134 ++++++ .../source/reference/datamodel/protocols.rst | 7 + .../source/reference/datamodel/sequence.rst | 64 +++ .../source/reference/datamodel/standard.rst | 17 + .../source/reference/datamodel/struct.rst | 102 ++++ .../source/reference/datamodel/templates.rst | 67 +++ .../source/reference/datamodel/union.rst | 41 ++ docs/sphinx/source/reference/index.rst | 1 + 14 files changed, 864 insertions(+), 447 deletions(-) create mode 100644 docs/sphinx/source/_static/css/custom.css create mode 100644 docs/sphinx/source/library/model/bitfield.rst create mode 100644 docs/sphinx/source/reference/datamodel/bitfield.rst create mode 100644 docs/sphinx/source/reference/datamodel/processing_classes.rst create mode 100644 docs/sphinx/source/reference/datamodel/protocols.rst create mode 100644 docs/sphinx/source/reference/datamodel/sequence.rst create mode 100644 docs/sphinx/source/reference/datamodel/standard.rst create mode 100644 docs/sphinx/source/reference/datamodel/struct.rst create mode 100644 docs/sphinx/source/reference/datamodel/templates.rst create mode 100644 docs/sphinx/source/reference/datamodel/union.rst diff --git a/docs/sphinx/source/_static/css/custom.css b/docs/sphinx/source/_static/css/custom.css new file mode 100644 index 00000000..3225f29e --- /dev/null +++ b/docs/sphinx/source/_static/css/custom.css @@ -0,0 +1,51 @@ +table.t-stbl { + padding: 0em 1em 0.5em 1em; + flex-direction: column; + display: flex; +} + +.t-stbl td.t-hr { + border-top: 1px solid var(--pst-color-surface); + padding: 0 +} + +.t-stbl span.t-decl-id { + color: var(--pst-color-on-surface); + font-style: italic; +} + +.t-stbl span.t-decl-opt { + color: var(--pst-color-primary); + font-style: italic; + padding-left: 0; + font-size: small; +} + +.t-stbl td.t-no { + padding-left: 2em; +} + +table.t-par-begin { + flex-direction: column; + display: flex; +} + +tr.t-bar { + vertical-align: top; +} + +tr.t-par>td { + vertical-align: top; + padding: 0 1em 0 0; +} + +tr.t-par>td:nth-child(1) { + white-space: nowrap; + text-align: right; + font-weight: bold; + font-family: var(--pst-font-family-monospace-system); +} + +tr.t-par>td:nth-child(3) { + width: 100% +} \ No newline at end of file diff --git a/docs/sphinx/source/conf.py b/docs/sphinx/source/conf.py index a529bee2..774039a5 100644 --- a/docs/sphinx/source/conf.py +++ b/docs/sphinx/source/conf.py @@ -70,6 +70,7 @@ # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output html_theme = "pydata_sphinx_theme" html_static_path = ["_static"] +html_css_files = ["css/custom.css"] html_theme_options = { "show_nav_level": 4, "navbar_end": ["navbar-icon-links", "theme-switcher"], diff --git a/docs/sphinx/source/library/model.rst b/docs/sphinx/source/library/model.rst index b7e81ce6..9b1ce8c5 100644 --- a/docs/sphinx/source/library/model.rst +++ b/docs/sphinx/source/library/model.rst @@ -6,6 +6,10 @@ Struct Model *TODO* +.. toctree:: + :maxdepth: 2 + + model/bitfield.rst Base classes ------------ @@ -20,12 +24,6 @@ Base classes .. autoclass:: caterpillar.model.Struct() :members: -.. autoclass:: caterpillar.model.BitFieldGroup - :members: - -.. autoclass:: caterpillar.model.BitField - :members: - .. autoclass:: caterpillar.model.UnionHook :members: :special-members: __model_init__, __model_setattr__ @@ -63,4 +61,6 @@ Templates .. autofunction:: caterpillar.model.template -.. autofunction:: caterpillar.model.derive \ No newline at end of file +.. autofunction:: caterpillar.model.derive + + diff --git a/docs/sphinx/source/library/model/bitfield.rst b/docs/sphinx/source/library/model/bitfield.rst new file mode 100644 index 00000000..ca6b2495 --- /dev/null +++ b/docs/sphinx/source/library/model/bitfield.rst @@ -0,0 +1,77 @@ +.. _library_model_bitfield: + +======== +Bitfield +======== + +.. versionchanged:: 2.5.0 + New revised concept since v2.5.0. + +.. py:currentmodule:: caterpillar.model + +Main Interface +-------------- + +.. autoclass:: Bitfield + :members: + + .. versionchanged:: 2.5.0 + Updated concept. See the _reference_ for more information. + +.. autoclass:: BitfieldGroup + :members: + + .. versionchanged:: 2.5.0 + Renamed from ``BitFieldGroup`` to ``BitfieldGroup`` + +.. autoclass:: BitfieldEntry + :members: + + .. versionadded:: 2.5.0 + + +.. autofunction:: getbits + +.. autofunction:: issigned + +.. autofunction:: bitfield + + .. versionchanged:: 2.5.0 + Added the ``alignment`` parameter. + +Default Factory Classes +----------------------- + +.. autoclass:: BitfieldValueFactory + :members: + + .. versionadded:: 2.5.0 + +.. autoclass:: CharFactory + :members: + + .. versionadded:: 2.5.0 + +.. autoclass:: EnumFactory + :members: + + .. versionadded:: 2.5.0 + + +Default Options +--------------- + +.. py:attr:: EndGroup + + .. versionadded:: 2.5.0 + + Alias for the :attr:`B_GROUP_NEW` flag, used to indicate that a new bitfield group should be started. + + +.. py:attr:: NewGroup + + Alias for the :attr:`B_GROUP_END` flag, used to indicate that the current bitfield group should be finalized. + + +.. autoclass:: SetAlignment + :members: diff --git a/docs/sphinx/source/reference/datamodel.rst b/docs/sphinx/source/reference/datamodel.rst index 7a737d30..b8c2ceb3 100644 --- a/docs/sphinx/source/reference/datamodel.rst +++ b/docs/sphinx/source/reference/datamodel.rst @@ -6,9 +6,6 @@ Data Model .. _objects: -Structs, Sequences and Fields -============================= - :dfn:`Structs` serve as the foundation of this library. All data within the framework undergoes the process of packing and unpacking using structs or :class:`~caterpillar.abc._StructLike` objects. There are three possible types of structs: @@ -31,384 +28,22 @@ objects. There are three possible types of structs: provide a modular approach for extending the library. Consideration of partial structs is essential when aiming to extend the capabilities of this framework. -Standard Types -============== - -Below is a list of types provided by *Caterpillar*. These types are designed to maintain -compatibility with older versions of the library, making them particularly important. - -Sequence --------- - -As previously explained, a sequence functions independently of fields. The library introduces -the :class:`~caterpillar.model.Sequence` as a named finite collection of :class:`~caterpillar.fields.Field` objects. A *Sequence* -operates on a model, which is a string-to-field mapping by default. Later, we will discuss -the distinctions between a *Sequence* and a *Struct* regarding the model representation. - -A sequence definition entails the specification of a :class:`~caterpillar.model.Sequence` object by directly -indicating the model to use. Inheritance poses a challenge with sequences, as they are not -designed to operate on a type hierarchy. The default instantiation with all default options -involves passing the dictionary with all fields directly: - ->>> Format = Sequence({"a": uint8, "b": uint32}) - -.. admonition:: Programmers Note: - - All sequence types introduced by this library can also store so-called *unnamed* fields. - These fields are not visible in the unpacked result and are automatically packed, removing - concerns about them when the option ``S_DISCARD_UNNAMED`` is active. Their names usually - begin with an underscore and must solely contain numbers (e.g., :code:`_123`). - -The sequence follows the :class:`~caterpillar.fields.Field` configuration model, allowing sequence and -field-related options to be set. As mentioned earlier, the ``S_DISCARD_UNNAMED`` option can -be used for example to exclude all unnamed fields from the final representation. A complete -list of all configuration options and their impact can be found in :ref:`options`. - -All sequences store a configurable :class:`ByteOrder` and :class:`Arch` as architecture, -which are passed to **all** fields in the current model. For more information on why these -classes are not specified as an enum class, please refer to :ref:`byteorder`. - -Inheritance in sequences is intricate, as a :class:`~caterpillar.model.Sequence` is constructed from a dictionary -of elements. We can attempt to simulate a chain of extended *base sequences* using the -concatenation of two sequences. The :meth:`~sequence.__add__` method will *import* all fields -from the other specified sequence. The only disadvantage is the placement required by the -operator. For instance: - -.. code-block:: python - - >>> BaseFormat = Sequence({"magic": b"MAGIC", "a": uint8}) - >>> Format = Sequence({"b": uint32, "c": uint16}) + BaseFormat - -will result in the following field order: - -.. code-block:: python - - >>> list(Format.get_members()) - ['b', 'c', 'magic', 'a'] - -which is not the intended order. The correct order should be :code:`['magic', 'a', 'b', 'c']`. -This can be achieved by using the :code:`BaseFormat` instance as the first operand. - -.. warning:: - This will alter the *BaseFormat* sequence, making it unusable elsewhere as the *base* for - all sub-sequences. Therefore, it is not recommended to use inheritance within sequences. - The :class:`~caterpillar.model.Struct` class resolves this issue with ease. - -Nesting sequences is allowed by default and can be achieved by incorporating another -:class:`~caterpillar.model.Sequence` into the model. It is important to note that *nesting* is distinct from -*inheritance*, adding an additional layer of packing and unpacking. - ->>> Format = Sequence({"other": BaseFormat, "b": uint32}) - - -Struct -^^^^^^ - -A *struct* describes a finite collection of named fields. In contrast to a *sequence*, a *struct* -utilizes Python classes as its model. The annotation feature in Python enables the definition of -custom types as annotations, enabling this special struct class to create a model solely based on -class annotations. Additionally, it generates a ``dataclass`` of the provided model, offering a -standardized string representation. - -Several differences exist between a :class:`~caterpillar.model.Sequence` and a -:class:`~caterpillar.model.Struct`, with the most significant ones highlighted below: - - -.. list-table:: Behaviour of structs and sequences - :header-rows: 1 - :widths: 10, 15, 15 - :stub-columns: 1 - - * - - - Sequence - - Struct - * - Model Type - - dict - - type - * - Inheritance - - No - - Yes - * - Attribute Access - - :code:`x["name"]` - - :code:`getattr(x, "name", None)` - * - Unpacked Type (also needed to pack) - - dict [*]_ - - instance of model - * - Documentation - - No - - Yes - - -.. [*] The unpacked values are stored inside a :class:`~caterpillar.context.Context` instance, a direct subclass of a dictionary. - -As evident from the comparison, the :class:`~caterpillar.model.Struct` class introduces new features such as -inheritance and documentation support. It's crucial to note that inheritance uses -struct types exclusively. - -The :class:`~caterpillar.model.Sequence` class implements a specific process for creating an internal representation -of the given model. The :class:`~caterpillar.model.Struct` class enhances this process by handling default values, replacing -types for documentation purposes, or removing annotation fields directly from the model. Additionally, -this class adds :attr:`~class.__struct__` to the model afterward. - -.. admonition:: Implementation Note - - If you decide to use the ``annotation`` feature from the ``__future__`` module, it is necessary to - enable :attr:`~options.S_EVAL_ANNOTATIONS` since it "`Stringizes`_" all annotations. ``inspect`` then - evaluates all strings, introducing a potential security risk. Exercise with caution when evaluating code! - -Specifying structs is as simple as defining `Python Classes`_: - -.. code-block:: python - - >>> @struct - ... class BaseFormat: - ... magic: b"MAGIC" - ... a: uint8 - ... - -Internally, a representation with all required fields and their corresponding names is -created. As :code:`b"MAGIC"` or :code:`uint8` are instances of types, the type replacement -for documentation purposes should be enabled, as shown in :ref:`struct_type`. - -As described above, this class introduces an easy-to-use inheritance system using the method -resolution order of Python: - -.. code-block:: python - - >>> @struct - ... class Format(BaseFormat): - ... b: uint32 - ... c: uint16 - ... - >>> list(Format.__struct__.get_members()) - ['magic', 'a', 'b', 'c'] - -.. admonition:: Programmers Note - - As the :class:`~caterpillar.model.Struct` class is a direct subclass of :class:`~caterpillar.model.Sequence`, nesting is supported - by default. That means, so-called *anonymous inner* structs can be defined within a class - definition. - - .. code-block:: python - - >>> @struct - ... class Format: - ... a: uint32 - ... b: {"c": uint8} - ... - - It is not recommended to use this technique as the inner structs can't be used anywhere else. - Anonymous inner union definitions are tricky and are not officially supported yet. There are - workarounds to that problem, which are discussed in the API documentation of :class:`~caterpillar.model.Sequence`. - - -.. _union-reference: - -Union -^^^^^ - -Internally constructing unions in the library poses challenges. The current implementation uses -the predefined behavior of the :class:`~caterpillar.model.Sequence` class for union types. It selects the field with -the greatest length as its representational size. *Unions*, much like *BitFields*, must store a static -size. - -**In essence, they behave similarly to C unions.** A traditional function hook will be installed on -the model to capture field assignments. What that means will be illustrated by the following example: - -.. code-block:: python - - >>> @union - ... class Format: - ... foo: uint16 - ... bar: uint32 - ... baz: boolean - ... - >>> obj = Format() # union does not need any values - -Right now, all attributes store the default value (:code:`None`). If we assign a new value to one field, it -will be applied to all others. Hence, - ->>> obj.bar = 0xFF00FF00 - -will result in - -.. code-block:: python - - >>> obj - Format(foo=65280, bar=4278255360, baz=False) - - -.. admonition:: Implementation Detail - - The constructor is the only place where there is no synchronization between fields. Additionally, the current - implementation may produce some overhead, because every *refresh* will first pack the new value and then - executes *unpack* on all other fields. - -BitField -^^^^^^^^ - -A *BitField*, despite its name suggesting a field of bits, is a powerful structure designed for -detailed byte inspection. Similar to other structures, it is a finite collection of named fields. This -section will introduce potential challenges associated with the implementation of a :class:`~caterpillar.model.BitField` -and explains its behavior. - -.. caution:: - This class is still experimental, and caution is advised. For a list of known disadvantages or - problems, refer to the information provided below. -As mentioned earlier, a *BitField* allows the inspection of individual bits within parsed bytes. Its -internal model relies on a special function or attribute, namely :meth:`~object.__bits__`. Consequently, -a bitfield has a predefined length and will always possess a length that can be represented in bytes. +.. toctree:: + :caption: Standard Data Model -The :class:`~caterpillar.model.BitField` class not only stores the existing model representation with a name-to-field -mapping and a collection of all fields but also introduces a special organizational class: -:class:`~caterpillar.model.BitFieldGroup`. Each group defines its bit size, the absolute bit position in the bitfield, -and a mapping of fields to their relative bit position in the current group, along with the field's -width. In the following example, three groups are created: + datamodel/standard ->>> @bitfield -... class Format: -... a : uint8 # Group 1, pos=0, size=8 -... _ : 0 # Group 2, pos=8, size=8 -... b : 15 - uint16 # \ -... c : 1 # \ Group 3, pos=16, size=16 -... -- ``a``: The first field creates a group with a size of eight bits at position zero. -- ``_``: Next, a zero-sized field indicates that padding until the end of the current byte should be - added. As we start from bit position ``0``, one byte will be filled with zeros. -- ``b``: The third field only uses 15 bits of a 16-bit wide field (2 bytes inferred using :code:`uint16`) -- ``c``: The last field uses the final bit of our current group. +.. toctree:: + :caption: Caterpillar-specific Protocols -*TODO: describe process of collecting fields, packing and unpacking* + datamodel/protocols.rst +.. toctree:: + :hidden: -Field ------ - -The next core element of this library is the *Field*. It serves as a context storage to store configuration data -about a struct. Even sequences and structs can be used as fields. The process is straightforward: each custom operator -creates an instance of a :class:`~caterpillar.fields.Field` with the applied configuration value. Most of the time, this value can be -static or a :ref:`context_lambda`. A field implements basic behavior that should not be duplicated, such as -conditional execution, exception handling with default values, and support for a built-in switch-case structure. - -As mentioned earlier, some primitive structs depend on being linked to a :class:`~caterpillar.fields.Field`. This is because all -configuration elements are stored in a :class:`~caterpillar.fields.Field` instance rather than in the target struct instance. More -information about each supported configuration can be found in :ref:`operators`. - -.. _greedy: - -Greedy ------- - -This library provides direct support for *greedy* parsing. Leveraging Python's syntactic features, this special form -of parsing is enabled using the `Ellipsis`_ (:code:`...`). All previously introduced structs implement greedy parsing -when enabled. - ->>> field = uint8[...] - -This special type can be used in places where a length has to be specified. Therefore, it can be applied to all array -:code:`[]` declarations and constructors that take the length as an input argument, such as :class:`CString`, for -example. - -.. code-block:: python - - >>> field = Field(CString(...)) - >>> unpack(field, b"abcd\x00") - 'abcd' - -.. _prefixed: - -Prefixed --------- - -In addition to greedy parsing, this library supports prefixed packing and unpacking as well. With *prefixed*, we refer -to the length of an array of elements that should be parsed. In this library, the :code:`slice` class is to achieve a -prefix option. - ->>> field = CString[uint32::] - - -.. _context-reference: - -Context -------- - -The context is another core element of this framework, utilized to store all relevant variables needed during the -process of packing or unpacking objects. The top-level :meth:`~caterpillar.model.unpack` and :meth:`~caterpillar.model.pack` methods are designed to -create the context themselves with some pre-defined (internal) fields. - -.. admonition:: Implementation Note - - :class:`Context` objects are essentially :code:`dict` objects with enhanced capabilities. Therefore, all - operations supported on dictionaries are applicable. - -The context enables special attribute-like access using :code:`getattr` if the attribute wasn't defined in the -instance directly. All custom attributes are stored in the dictionary representation of the instance. - -.. attribute:: CTX_PARENT - :value: "_parent" - - All :class:`Context` instances *SHOULD* contain a reference to the parent context. If the returned reference is - :code:`None`, it can be assumed that the current context is the root context. If this attribute is set, it - *MUST* point to a :class:`Context` instance. - -.. attribute:: CTX_OBJECT - :value: "_obj" - - When packing or unpacking objects, the current object attributes are stored within the *object* context. This - is a special context that allows access to previously parsed fields or attributes of the input object. To - minimize the number of calls using this attribute, a shortcut named :code:`this` was defined, which - automatically inserts a path to the object context. - - -.. attribute:: CTX_STREAM - :value: "_io" - - The input or output stream *MUST* be set in each context instance to prevent access errors on missing stream - objects. - - .. seealso:: - Discussion on `Github `_ why this attribute has - to be set in every context instance. - -.. attribute:: CTX_PATH - :value: "_path" - - Although it is optional to provide the current parsing or building path, it is *recommended*. All nesting - structures implement a behavior that automatically adds a sub-path while packing or unpacking. Special - names are :code:`""` for the starting path and :code:`""` for greedy sequence elements. - -.. attribute:: CTX_FIELD - :value: "_field" - - In case a struct is linked to a field, the :class:`~caterpillar.fields.Field` instance will always set this context variable - to be accessible from within the underlying struct. - - -.. attribute:: CTX_INDEX - :value: "_index" - - When packing or unpacking collections of elements, the current working index is given under this context - variable. This variable is set only in this specific situation. - - -.. attribute:: CTX_VALUE - :value: "_value" - - In case a switch-case statement is activated in a field, the context will receive the parsed value in this - context variable temporarily. - -.. attribute:: CTX_POS - :value: "_pos" - - Currently undefined. - -.. attribute:: CTX_OFFSETS - :value: "_offsets" - - **Internal use only:** This special member is only set in the root context and stores all packed objects that - should be placed at an offset position. + datamodel/processing_classes.rst .. _context_lambda: @@ -454,73 +89,7 @@ way to access the context variables. A shortcut to access the object context of the parent context. -.. _ref-templates: - -Templates ---------- - -A specialized form of structs are *templates*, which are basically generic Python classes. Think of them -as blueprints for your final classes/structs that contain placeholders for actual types. As in C++, a -template needs type arguments, in this case we will name them :class:`~caterpillar.model.TemplateTypeVar`. - -Actually, there are two different types of type variables: - -* Required: - These variables are **required** when creating a new struct based on the template and they - can be used as positional arguments within the type derivation. - -* Positional: - These arguments are usable only as keyword arguments and are may be optional if a default value - is supplied. - -These template type variables can be created using simple variable definitions: - ->>> A = TemplateTypeVar("A") - -.. important:: - A template class is **not** a struct definition. It specifies a blueprint for the final class. - -A template class is defined like a struct, union or bitfield class, but without being a -dataclass nor storing a struct instance. - -.. code-block:: python - - >>> @template(A, "B") - ... class FormatTemplate: - ... foo: A - ... bar: B - ... baz: uint32 - ... - -The defined class then can be used to create new classes based on the provided class -structure. For instance, - -.. code-block:: python - - >>> Format = derive(FormatTemplate, A=uint32, B=uint8) - >>> Format - - -will return an anonymous class (in this case). Normally, *caterpillar* tries to infer the -variable name from the current module (if :code:`name=...`). In summary, every time -:meth:`~caterpillar.model.derive` is called, a new class will be created if not already -defined. - -The current implementation will place template information about the current class using -a special class attribute: :attr:`~class.__template__`. - -To support sub-classes of templates, we can declare a derived class as partial: - -.. code-block:: python - - >>> Format32 = derive(FormatTemplate, A=uint32, partial=True) - -Again, the resulting class is **not** a struct, but another template class. - -.. admonition:: Developer's note - By now, a template won't copy existing field documentation comments. Therefore, you - can't display inherited members using sphinx. Special method names ==================== diff --git a/docs/sphinx/source/reference/datamodel/bitfield.rst b/docs/sphinx/source/reference/datamodel/bitfield.rst new file mode 100644 index 00000000..3fbeda2c --- /dev/null +++ b/docs/sphinx/source/reference/datamodel/bitfield.rst @@ -0,0 +1,286 @@ +.. _datamodel_standard_bitfield: + +Bit-field +========= + + +A *Bit-field*, despite its name suggesting a field of bits, is a powerful structure designed for +detailed byte inspection. Similar to other structures, it is a finite collection of named fields. This +section will introduce potential challenges associated with the implementation of a :class:`~caterpillar.model.Bitfield` +and explains its behavior. + +.. versionchanged:: 2.5.0 + Completely reworked the internal :class:`Bitfield` behaviour, model and processing. + +Concept +------- + +Each Bitfield instance maintains a sequence of bitfield groups, where each group +contains a collection of sized fields. A bitfield group may consist of either multiple +entries (i.e., any types that can be converted to an integral type) or a single +:class:`_StructLike` object. For example, consider the following bitfield definition: + +.. code-block:: python + + @bitfield + class Format: + a1: 2 + a2: 1 + _ : 0 + b1: char + c1: uint32 + +This Bitfield definition will generate three distinct bitfield groups (labeled here as +groups a, b, and c). By default, bitfields use 8-bit alignment, leading to the following +layout: + +.. code-block:: + + Group Pos Bits + a 0x00 8 + b 0x01 8 + c 0x02 32 + +Internally, only the first group requires special bit-level parsing. The remaining groups +(b and c) are treated as standard structures since they span full bytes or words without +sub-byte alignment. This dynamic grouping mechanism allows leveraging full struct-like +class definitions within bitfields. + +Syntax +------ + +This new approach enables more complex and expressive bitfield definitions. The annotation +syntax is therefore extended as follows: + +.. raw:: html + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ name + : + bits + - + field + (optional) + (1) + +
+
+ name + : + 0 + (2) + +
+
+ name + : + field-or-action + (3) + +
+
+ name + : + ( + field + , + factory + ) + (4) + +
+
+ name + : + ( + bits + , + factory + (optional) + , + options + (optional) + ) + (5) + +
+
+ +Each identifier named above is bound to certain constraints: + +.. raw:: html + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
name + - + Any valid Python attribute name. +
bits + - + Any valid positive integer starting from 0. +
field + - + + Any valid field definition for a struct resulting in a + _StructLike + that implements the subtract-operation resulting in a + Field + instance. +
field-or-action + - + + Any valid field definition for a struct resulting or an object implementing the + _ActionLike + protocol. +
factory + - + + Either a target Python type to use as factory or an instance of + BitfieldValueFactory. +
options + - + + One or more options represented either as flags or instances of + SetAlignment. +
+ +What that syntax allows can be seen in the following example: + +.. code-block:: python + :caption: Bit-field definition using all syntax rules + + @bitfield + class Format: # -. + # Any annotation that can be converted by a TypeConverter | 1. Group + # into as _StructLike object is allowed. | (3 Bytes) + magic: b"foo" # bytes -' + + # Syntax according to (1) and (2) -. + version : 4 # int | 2. Group + state : 3 # int | (1 Byte / 8 Bits) + _ : 0 # ignored/removed -' + + # Extended Syntax (5) + flag1 : (1, SetAlignment(16)) # bool, new alignment -. + flag2 : 1 # bool | 3. Group + name : (12, CharFactory) # str | (2 Bytes / 16 Bits) + type : (2, SimpleEnum, EndGroup) # SimpleEnum -' + + +Processing Rules +---------------- + +Based on the previously defined syntax, some processing constraints were derived: + +- Rule 1.: + - Default alignment is 1 byte (8 bits). + - Zero (``0``) bits are prohibited. + - If followed by a (2) declaration, the remaining bits in the current byte are padded. + - If a ```` is provided: + - typeof(````) is used to infer the factory. + - :func:`~caterpillar.model.getbits` and :func:`~cateprillar.model.sizeof` determine the field's alignment. + - If a custom alignment is configured in the Bitfield constructor, inferred alignment is ignored unless the field includes the :attr:`~cateprillar.options.B_OVERWRITE_ALIGNMENT` option. + - If the :attr:`~cateprillar.options.B_GROUP_END` option is set, the current group is finalized and a new one is started. + +- Rule 2.: + - This rule forces alignment to the next byte boundary. + - The field is ignored during final class generation (name is discarded). + - The current group is finalized unless the bitfield is configured with :attr:`~caterpillar.options.B_GROUP_KEEP` + +- Rule 3.: + - Equivalent to struct-like class field definitions. + - Automatically implies a rule 2 alignment. + - Always finalizes the current group regardless of :attr:`~caterpillar.options.B_GROUP_KEEP`. + +- Rule 4.: + - Extension of (1). + - Explicitly defines a conversion factory for the field. + - The factory must be: + - A built-in type (e.g., int, bool) supporting ``__int__``, or + - A type or instance of :class:`BitfieldValueFactory`. + +- Rule 5.: + - Builds upon (4) with support for options. + - Options can be passed as a list or single element. + - Supported Options: + - :attr:`~caterpillar.model.NewGroup`: Aligns the current group, starts a new one, and adds the entry to it. + - :attr:`~caterpillar.model.EndGroup`: Adds the entry to the current group, then aligns it. + - :class:`SetAlignment`: Changes the current working alignment. + - Note: Option order affects behavior and must be considered carefully. diff --git a/docs/sphinx/source/reference/datamodel/processing_classes.rst b/docs/sphinx/source/reference/datamodel/processing_classes.rst new file mode 100644 index 00000000..fe68ce40 --- /dev/null +++ b/docs/sphinx/source/reference/datamodel/processing_classes.rst @@ -0,0 +1,134 @@ +.. _datamodel_processing: + +Processing-related Types +======================== + + +Field +----- + +The next core element of this library is the *Field*. It serves as a context storage to store configuration data +about a struct. Even sequences and structs can be used as fields. The process is straightforward: each custom operator +creates an instance of a :class:`~caterpillar.fields.Field` with the applied configuration value. Most of the time, this value can be +static or a :ref:`context_lambda`. A field implements basic behavior that should not be duplicated, such as +conditional execution, exception handling with default values, and support for a built-in switch-case structure. + +As mentioned earlier, some primitive structs depend on being linked to a :class:`~caterpillar.fields.Field`. This is because all +configuration elements are stored in a :class:`~caterpillar.fields.Field` instance rather than in the target struct instance. More +information about each supported configuration can be found in :ref:`operators`. + +.. _greedy: + +Greedy +------ + +This library provides direct support for *greedy* parsing. Leveraging Python's syntactic features, this special form +of parsing is enabled using the `Ellipsis`_ (:code:`...`). All previously introduced structs implement greedy parsing +when enabled. + +>>> field = uint8[...] + +This special type can be used in places where a length has to be specified. Therefore, it can be applied to all array +:code:`[]` declarations and constructors that take the length as an input argument, such as :class:`CString`, for +example. + +.. code-block:: python + + >>> field = Field(CString(...)) + >>> unpack(field, b"abcd\x00") + 'abcd' + +.. _prefixed: + +Prefixed +-------- + +In addition to greedy parsing, this library supports prefixed packing and unpacking as well. With *prefixed*, we refer +to the length of an array of elements that should be parsed. In this library, the :code:`slice` class is to achieve a +prefix option. + +>>> field = CString[uint32::] + + +.. _context-reference: + +Context +------- + +The context is another core element of this framework, utilized to store all relevant variables needed during the +process of packing or unpacking objects. The top-level :meth:`~caterpillar.model.unpack` and :meth:`~caterpillar.model.pack` methods are designed to +create the context themselves with some pre-defined (internal) fields. + +.. admonition:: Implementation Note + + :class:`Context` objects are essentially :code:`dict` objects with enhanced capabilities. Therefore, all + operations supported on dictionaries are applicable. + +The context enables special attribute-like access using :code:`getattr` if the attribute wasn't defined in the +instance directly. All custom attributes are stored in the dictionary representation of the instance. + +.. attribute:: CTX_PARENT + :value: "_parent" + + All :class:`Context` instances *SHOULD* contain a reference to the parent context. If the returned reference is + :code:`None`, it can be assumed that the current context is the root context. If this attribute is set, it + *MUST* point to a :class:`Context` instance. + +.. attribute:: CTX_OBJECT + :value: "_obj" + + When packing or unpacking objects, the current object attributes are stored within the *object* context. This + is a special context that allows access to previously parsed fields or attributes of the input object. To + minimize the number of calls using this attribute, a shortcut named :code:`this` was defined, which + automatically inserts a path to the object context. + + +.. attribute:: CTX_STREAM + :value: "_io" + + The input or output stream *MUST* be set in each context instance to prevent access errors on missing stream + objects. + + .. seealso:: + Discussion on `Github `_ why this attribute has + to be set in every context instance. + +.. attribute:: CTX_PATH + :value: "_path" + + Although it is optional to provide the current parsing or building path, it is *recommended*. All nesting + structures implement a behavior that automatically adds a sub-path while packing or unpacking. Special + names are :code:`""` for the starting path and :code:`""` for greedy sequence elements. + +.. attribute:: CTX_FIELD + :value: "_field" + + In case a struct is linked to a field, the :class:`~caterpillar.fields.Field` instance will always set this context variable + to be accessible from within the underlying struct. + + +.. attribute:: CTX_INDEX + :value: "_index" + + When packing or unpacking collections of elements, the current working index is given under this context + variable. This variable is set only in this specific situation. + + +.. attribute:: CTX_VALUE + :value: "_value" + + In case a switch-case statement is activated in a field, the context will receive the parsed value in this + context variable temporarily. + +.. attribute:: CTX_POS + :value: "_pos" + + Currently undefined. + +.. attribute:: CTX_OFFSETS + :value: "_offsets" + + **Internal use only:** This special member is only set in the root context and stores all packed objects that + should be placed at an offset position. + +.. _Ellipsis: https://docs.python.org/3/library/constants.html#Ellipsis \ No newline at end of file diff --git a/docs/sphinx/source/reference/datamodel/protocols.rst b/docs/sphinx/source/reference/datamodel/protocols.rst new file mode 100644 index 00000000..9f917935 --- /dev/null +++ b/docs/sphinx/source/reference/datamodel/protocols.rst @@ -0,0 +1,7 @@ +.. _reference_datamodel_protocols: + +Caterpillar's Protocols +======================= + +.. toctree:: + :caption: Protocols \ No newline at end of file diff --git a/docs/sphinx/source/reference/datamodel/sequence.rst b/docs/sphinx/source/reference/datamodel/sequence.rst new file mode 100644 index 00000000..94c9f2dd --- /dev/null +++ b/docs/sphinx/source/reference/datamodel/sequence.rst @@ -0,0 +1,64 @@ +.. _datamodel_standard_sequence: + +Sequence +======== + +A sequence functions independently of fields. The library introduces the :class:`~caterpillar.model.Sequence` +as a named finite collection of :class:`~caterpillar.fields.Field` objects. A *Sequence* +operates on a model, which is a string-to-field mapping by default. Later, we will discuss +the distinctions between a *Sequence* and a *Struct* regarding the model representation. + +A sequence definition entails the specification of a :class:`~caterpillar.model.Sequence` object by directly +indicating the model to use. Inheritance poses a challenge with sequences, as they are not +designed to operate on a type hierarchy. The default instantiation with all default options +involves passing the dictionary with all fields directly: + +>>> Format = Sequence({"a": uint8, "b": uint32}) + +.. admonition:: Programmers Note: + + All sequence types introduced by this library can also store so-called *unnamed* fields. + These fields are not visible in the unpacked result and are automatically packed, removing + concerns about them when the option ``S_DISCARD_UNNAMED`` is active. Their names usually + begin with an underscore and must solely contain numbers (e.g., :code:`_123`). + +The sequence follows the :class:`~caterpillar.fields.Field` configuration model, allowing sequence and +field-related options to be set. As mentioned earlier, the ``S_DISCARD_UNNAMED`` option can +be used for example to exclude all unnamed fields from the final representation. A complete +list of all configuration options and their impact can be found in :ref:`options`. + +All sequences store a configurable :class:`ByteOrder` and :class:`Arch` as architecture, +which are passed to **all** fields in the current model. For more information on why these +classes are not specified as an enum class, please refer to :ref:`byteorder`. + +Inheritance in sequences is intricate, as a :class:`~caterpillar.model.Sequence` is constructed from a dictionary +of elements. We can attempt to simulate a chain of extended *base sequences* using the +concatenation of two sequences. The :meth:`~sequence.__add__` method will *import* all fields +from the other specified sequence. The only disadvantage is the placement required by the +operator. For instance: + +.. code-block:: python + + >>> BaseFormat = Sequence({"magic": b"MAGIC", "a": uint8}) + >>> Format = Sequence({"b": uint32, "c": uint16}) + BaseFormat + +will result in the following field order: + +.. code-block:: python + + >>> list(Format.get_members()) + ['b', 'c', 'magic', 'a'] + +which is not the intended order. The correct order should be :code:`['magic', 'a', 'b', 'c']`. +This can be achieved by using the :code:`BaseFormat` instance as the first operand. + +.. warning:: + This will alter the *BaseFormat* sequence, making it unusable elsewhere as the *base* for + all sub-sequences. Therefore, it is not recommended to use inheritance within sequences. + The :class:`~caterpillar.model.Struct` class resolves this issue with ease. + +Nesting sequences is allowed by default and can be achieved by incorporating another +:class:`~caterpillar.model.Sequence` into the model. It is important to note that *nesting* is distinct from +*inheritance*, adding an additional layer of packing and unpacking. + +>>> Format = Sequence({"other": BaseFormat, "b": uint32}) diff --git a/docs/sphinx/source/reference/datamodel/standard.rst b/docs/sphinx/source/reference/datamodel/standard.rst new file mode 100644 index 00000000..9a883112 --- /dev/null +++ b/docs/sphinx/source/reference/datamodel/standard.rst @@ -0,0 +1,17 @@ +.. _datamodel_standards: + +Standard Types +============== + +Below is a list of types provided by *Caterpillar*. These types are designed to maintain +compatibility with older versions of the library, making them particularly important. + +.. toctree:: + :caption: Standard Types + + sequence.rst + struct + union + bitfield + templates + diff --git a/docs/sphinx/source/reference/datamodel/struct.rst b/docs/sphinx/source/reference/datamodel/struct.rst new file mode 100644 index 00000000..37f58e2c --- /dev/null +++ b/docs/sphinx/source/reference/datamodel/struct.rst @@ -0,0 +1,102 @@ +.. _datamodel_standard_struct: + +Struct +====== + +A *struct* describes a finite collection of named fields. In contrast to a *sequence*, a *struct* +utilizes Python classes as its model. The annotation feature in Python enables the definition of +custom types as annotations, enabling this special struct class to create a model solely based on +class annotations. Additionally, it generates a ``dataclass`` of the provided model, offering a +standardized string representation. + +Several differences exist between a :class:`~caterpillar.model.Sequence` and a +:class:`~caterpillar.model.Struct`, with the most significant ones highlighted below: + + +.. list-table:: Behaviour of structs and sequences + :header-rows: 1 + :widths: 10, 15, 15 + :stub-columns: 1 + + * - + - Sequence + - Struct + * - Model Type + - dict + - type + * - Inheritance + - No + - Yes + * - Attribute Access + - :code:`x["name"]` + - :code:`getattr(x, "name", None)` + * - Unpacked Type (also needed to pack) + - dict [*]_ + - instance of model + * - Documentation + - No + - Yes + + +.. [*] The unpacked values are stored inside a :class:`~caterpillar.context.Context` instance, a direct subclass of a dictionary. + +As evident from the comparison, the :class:`~caterpillar.model.Struct` class introduces new features such as +inheritance and documentation support. It's crucial to note that inheritance uses +struct types exclusively. + +The :class:`~caterpillar.model.Sequence` class implements a specific process for creating an internal representation +of the given model. The :class:`~caterpillar.model.Struct` class enhances this process by handling default values, replacing +types for documentation purposes, or removing annotation fields directly from the model. Additionally, +this class adds :attr:`~class.__struct__` to the model afterward. + +.. admonition:: Implementation Note + + If you decide to use the ``annotation`` feature from the ``__future__`` module, it is necessary to + enable :attr:`~options.S_EVAL_ANNOTATIONS` since it "`Stringizes`_" all annotations. ``inspect`` then + evaluates all strings, introducing a potential security risk. Exercise with caution when evaluating code! + +Specifying structs is as simple as defining `Python Classes`_: + +.. code-block:: python + + >>> @struct + ... class BaseFormat: + ... magic: b"MAGIC" + ... a: uint8 + ... + +Internally, a representation with all required fields and their corresponding names is +created. As :code:`b"MAGIC"` or :code:`uint8` are instances of types, the type replacement +for documentation purposes should be enabled, as shown in :ref:`struct_type`. + +As described above, this class introduces an easy-to-use inheritance system using the method +resolution order of Python: + +.. code-block:: python + + >>> @struct + ... class Format(BaseFormat): + ... b: uint32 + ... c: uint16 + ... + >>> list(Format.__struct__.get_members()) + ['magic', 'a', 'b', 'c'] + +.. admonition:: Programmers Note + + As the :class:`~caterpillar.model.Struct` class is a direct subclass of :class:`~caterpillar.model.Sequence`, nesting is supported + by default. That means, so-called *anonymous inner* structs can be defined within a class + definition. + + .. code-block:: python + + >>> @struct + ... class Format: + ... a: uint32 + ... b: {"c": uint8} + ... + + It is not recommended to use this technique as the inner structs can't be used anywhere else. + Anonymous inner union definitions are tricky and are not officially supported yet. There are + workarounds to that problem, which are discussed in the API documentation of :class:`~caterpillar.model.Sequence`. + diff --git a/docs/sphinx/source/reference/datamodel/templates.rst b/docs/sphinx/source/reference/datamodel/templates.rst new file mode 100644 index 00000000..efae8015 --- /dev/null +++ b/docs/sphinx/source/reference/datamodel/templates.rst @@ -0,0 +1,67 @@ +.. _ref-templates: + +Templates +========= + +A specialized form of structs are *templates*, which are basically generic Python classes. Think of them +as blueprints for your final classes/structs that contain placeholders for actual types. As in C++, a +template needs type arguments, in this case we will name them :class:`~caterpillar.model.TemplateTypeVar`. + +Actually, there are two different types of type variables: + +* Required: + These variables are **required** when creating a new struct based on the template and they + can be used as positional arguments within the type derivation. + +* Positional: + These arguments are usable only as keyword arguments and are may be optional if a default value + is supplied. + +These template type variables can be created using simple variable definitions: + +>>> A = TemplateTypeVar("A") + +.. important:: + A template class is **not** a struct definition. It specifies a blueprint for the final class. + +A template class is defined like a struct, union or bitfield class, but without being a +dataclass nor storing a struct instance. + +.. code-block:: python + + >>> @template(A, "B") + ... class FormatTemplate: + ... foo: A + ... bar: B + ... baz: uint32 + ... + +The defined class then can be used to create new classes based on the provided class +structure. For instance, + +.. code-block:: python + + >>> Format = derive(FormatTemplate, A=uint32, B=uint8) + >>> Format + + +will return an anonymous class (in this case). Normally, *caterpillar* tries to infer the +variable name from the current module (if :code:`name=...`). In summary, every time +:meth:`~caterpillar.model.derive` is called, a new class will be created if not already +defined. + +The current implementation will place template information about the current class using +a special class attribute: :attr:`~class.__template__`. + +To support sub-classes of templates, we can declare a derived class as partial: + +.. code-block:: python + + >>> Format32 = derive(FormatTemplate, A=uint32, partial=True) + +Again, the resulting class is **not** a struct, but another template class. + +.. admonition:: Developer's note + + By now, a template won't copy existing field documentation comments. Therefore, you + can't display inherited members using sphinx. \ No newline at end of file diff --git a/docs/sphinx/source/reference/datamodel/union.rst b/docs/sphinx/source/reference/datamodel/union.rst new file mode 100644 index 00000000..118d2bbc --- /dev/null +++ b/docs/sphinx/source/reference/datamodel/union.rst @@ -0,0 +1,41 @@ +.. _union-reference: + +Union +===== + +Internally constructing unions in the library poses challenges. The current implementation uses +the predefined behavior of the :class:`~caterpillar.model.Sequence` class for union types. It selects the field with +the greatest length as its representational size. *Unions*, much like *BitFields*, must store a static +size. + +**In essence, they behave similarly to C unions.** A traditional function hook will be installed on +the model to capture field assignments. What that means will be illustrated by the following example: + +.. code-block:: python + + >>> @union + ... class Format: + ... foo: uint16 + ... bar: uint32 + ... baz: boolean + ... + >>> obj = Format() # union does not need any values + +Right now, all attributes store the default value (:code:`None`). If we assign a new value to one field, it +will be applied to all others. Hence, + +>>> obj.bar = 0xFF00FF00 + +will result in + +.. code-block:: python + + >>> obj + Format(foo=65280, bar=4278255360, baz=False) + + +.. admonition:: Implementation Detail + + The constructor is the only place where there is no synchronization between fields. Additionally, the current + implementation may produce some overhead, because every *refresh* will first pack the new value and then + executes *unpack* on all other fields. \ No newline at end of file diff --git a/docs/sphinx/source/reference/index.rst b/docs/sphinx/source/reference/index.rst index 5fdf64aa..f9000f45 100644 --- a/docs/sphinx/source/reference/index.rst +++ b/docs/sphinx/source/reference/index.rst @@ -14,6 +14,7 @@ documentation. .. toctree:: :maxdepth: 2 + :numbered: :caption: Python Reference introduction.rst From 1b1cd5fc3b39b6da7482f2461966c23a56238973 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sun, 29 Jun 2025 06:43:23 +0200 Subject: [PATCH 32/41] Add changelog and update library docs --- + Add v2.5.0 changelog to docs + Split up model library docs into several files + All sections are now numbered + Remove caterpillar.abc library docs + Add caterpillar.shared library docs --- docs/sphinx/source/conf.py | 8 +- docs/sphinx/source/development/changelog.rst | 101 +++++++++++++++++- docs/sphinx/source/development/index.rst | 1 - docs/sphinx/source/index.rst | 1 + docs/sphinx/source/library/abc.rst | 41 ------- docs/sphinx/source/library/index.rst | 12 ++- docs/sphinx/source/library/model.rst | 59 +--------- docs/sphinx/source/library/model/bitfield.rst | 4 +- docs/sphinx/source/library/model/sequence.rst | 10 ++ docs/sphinx/source/library/model/struct.rst | 42 ++++++++ docs/sphinx/source/library/model/template.rst | 18 ++++ docs/sphinx/source/library/options.rst | 8 +- docs/sphinx/source/library/shared.rst | 82 ++++++++++++++ docs/sphinx/source/reference/datamodel.rst | 2 - .../datamodel/processing_classes.rst | 8 ++ .../source/reference/datamodel/struct.rst | 2 + docs/sphinx/source/reference/index.rst | 2 - docs/sphinx/source/tutorial/index.rst | 1 - 18 files changed, 286 insertions(+), 116 deletions(-) delete mode 100644 docs/sphinx/source/library/abc.rst create mode 100644 docs/sphinx/source/library/model/sequence.rst create mode 100644 docs/sphinx/source/library/model/struct.rst create mode 100644 docs/sphinx/source/library/model/template.rst create mode 100644 docs/sphinx/source/library/shared.rst diff --git a/docs/sphinx/source/conf.py b/docs/sphinx/source/conf.py index 774039a5..1f3c7271 100644 --- a/docs/sphinx/source/conf.py +++ b/docs/sphinx/source/conf.py @@ -29,7 +29,7 @@ "sphinx_design", "breathe", "c_annotations", - "sphinx_copybutton" + "sphinx_copybutton", ] templates_path = ["_templates"] @@ -63,8 +63,8 @@ # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False -refcount_file = '../../../src/capi.dat' -autodoc_member_order = 'bysource' +refcount_file = "../../../src/capi.dat" +autodoc_member_order = "bysource" # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output @@ -82,7 +82,7 @@ "announcement": "https://raw.githubusercontent.com/MatrixEditor/caterpillar/master/docs/sphinx/source/_templates/announcement.html", } html_sidebars = { - "installing/index": [], + "installing/index": [], } # -- Options for C++ Docs ----------------------------------------------------- diff --git a/docs/sphinx/source/development/changelog.rst b/docs/sphinx/source/development/changelog.rst index e15e69a5..8e101194 100644 --- a/docs/sphinx/source/development/changelog.rst +++ b/docs/sphinx/source/development/changelog.rst @@ -4,4 +4,103 @@ Changelog ********* -*Entries will be added in the future.* \ No newline at end of file +*More entries will be added in the future.* + +.. _changelog_2.5.0: + +2.5.0 +===== + +This version introduces massive changes due to the addition of stub files. Most of the type hints in the Python +file are ported into several stub files. Additionally, the bitfield concept was completely renewed to be more +flexible and dynamic. + +Added +----- + +*caterpillar.abc* +^^^^^^^^^^^^^^^^^ + +- :class:`_SupportsBits` protocol +- :class:`_ContainsBits` protocol +- :class:`_SupportsType` protocol + +*caterpillar.shortcuts* +^^^^^^^^^^^^^^^^^^^^^^^ + +- New shortcuts: :func:`typeof`, :func:`to_struct`, :func:`hasstruct`, :func:`getstruct` and :func:`sizeof` + +*caterpillar.shared* +^^^^^^^^^^^^^^^^^^^^ + +- New constants from other modules: :attr:`ATTR_BYTEORDER`, :attr:`ATTR_TYPE`, :attr:`ATTR_BITS`, :attr:`ATTR_SIGNED`, :attr:`ATTR_TEMPLATE` + +*caterpillar.context* +^^^^^^^^^^^^^^^^^^^^^ + +- New context attribute: `_root` can be set to point to the root context instance. Internally, instead of a for-loop that iterates through parent context instances, a simple :code:`self.get(...)` call is made. + +.. raw:: html + +
+ +Removed +------- + +*caterpillar.abc* +^^^^^^^^^^^^^^^^^ + +- ``_Action`` protocol and create two separate Protocols that form:: + + _ActionLike = _SupportsActionUnpack | _SupportsActionPack + +- ``__type__`` requirement from :class:`_StructLike` +- **Breaking:** ``_EnumLike``, ``_ContextPathStr`` + +*caterpillar.model* +^^^^^^^^^^^^^^^^^^^ + +- Unused ``getformat`` function + +*caterpillar.fields.common* +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Unused ``__fmt__`` function in :class:`Transformer` + +.. raw:: html + +
+ +Changed +------- + +*caterpillar.abc* +^^^^^^^^^^^^^^^^^ + +- Rename ``_Switch`` protocol to :attr:`_SwitchLike` +- Move the following attributes and methods into *caterpillar.shared*: rename ``STRUCT_FIELD`` to :attr:`ATTR_STRUCT`, :func:`hasstruct`, :func:`getstruct` and :func:`typeof` + +*caterpillar.byteorder* +^^^^^^^^^^^^^^^^^^^^^^^ + +- Move ``BYTEORDER_FIELD`` to *caterpillar.shared* as :attr:`ATTR_BYTEORDER` + + +*caterpillar.model* +^^^^^^^^^^^^^^^^^^^ + +- :func:`sizeof` now checks if the provided object implements the :class:`_SupportsSize` protocol +- New :class:`Bitfield` concept with enhanced syntax + +.. raw:: html + +
+ +Fixed +----- + +*caterpillar.model* +^^^^^^^^^^^^^^^^^^^ + +- when parsing union objects with an unbound stream object +- field options defined in Sequences and Structs were not populated when creating fields. diff --git a/docs/sphinx/source/development/index.rst b/docs/sphinx/source/development/index.rst index 8a5a340f..134bba07 100644 --- a/docs/sphinx/source/development/index.rst +++ b/docs/sphinx/source/development/index.rst @@ -7,7 +7,6 @@ Development *TODO* .. toctree:: - :numbered: :maxdepth: 2 roadmap.rst diff --git a/docs/sphinx/source/index.rst b/docs/sphinx/source/index.rst index 4fbbf083..d5f2547d 100644 --- a/docs/sphinx/source/index.rst +++ b/docs/sphinx/source/index.rst @@ -125,6 +125,7 @@ what configuration options can be used. Alternatively you can follow the :ref:`t .. toctree:: :maxdepth: 3 :hidden: + :numbered: 4 :caption: Contents: installing/index.rst diff --git a/docs/sphinx/source/library/abc.rst b/docs/sphinx/source/library/abc.rst deleted file mode 100644 index f80ac8d8..00000000 --- a/docs/sphinx/source/library/abc.rst +++ /dev/null @@ -1,41 +0,0 @@ -.. _abc: - -********************* -Abstract base classes -********************* - -Struct ------- - -.. autoclass:: caterpillar.abc._SupportsPack - :members: - -.. autoclass:: caterpillar.abc._SupportsUnpack - :members: - -.. autoclass:: caterpillar.abc._SupportsSize - :members: - -.. autoclass:: caterpillar.abc._StructLike - :members: - -.. autoclass:: caterpillar.abc._ContainsStruct - :members: - -Context -------- - -.. autoclass:: caterpillar.abc._ContextLike - :members: - -.. autoclass:: caterpillar.abc._ContextLambda - :members: - - -Other base classes ------------------- - -.. autoclass:: caterpillar.abc._SwitchLike - :members: - - diff --git a/docs/sphinx/source/library/index.rst b/docs/sphinx/source/library/index.rst index c7241f3e..5661f7eb 100644 --- a/docs/sphinx/source/library/index.rst +++ b/docs/sphinx/source/library/index.rst @@ -1,4 +1,4 @@ -.. _library-index: +:noindex: ******* Library @@ -7,19 +7,25 @@ Library *descriptions are WIP* .. toctree:: - :maxdepth: 2 + :maxdepth: 1 :caption: Python API byteorder.rst options.rst - abc.rst context.rst exceptions.rst model.rst registry + shared fields/index.rst +.. _library-index-capi: + +*********** +C Extension +*********** + .. toctree:: :maxdepth: 2 :caption: C API Python Types diff --git a/docs/sphinx/source/library/model.rst b/docs/sphinx/source/library/model.rst index 9b1ce8c5..cb09b03d 100644 --- a/docs/sphinx/source/library/model.rst +++ b/docs/sphinx/source/library/model.rst @@ -9,58 +9,7 @@ Struct Model .. toctree:: :maxdepth: 2 - model/bitfield.rst - -Base classes ------------- - -.. autoclass:: caterpillar.py.Action - - .. versionadded:: 2.4.0 - -.. autoclass:: caterpillar.model.Sequence() - :members: - -.. autoclass:: caterpillar.model.Struct() - :members: - -.. autoclass:: caterpillar.model.UnionHook - :members: - :special-members: __model_init__, __model_setattr__ - - -Standard functions ------------------- - -.. autofunction:: caterpillar.model.struct - -.. autofunction:: caterpillar.model.union - -.. autofunction:: caterpillar.model.pack - -.. autofunction:: caterpillar.model.pack_into - -.. autofunction:: caterpillar.model.pack_file - -.. autofunction:: caterpillar.model.unpack - -.. autofunction:: caterpillar.model.unpack_file - -.. autofunction:: caterpillar.model.bitfield - - -Templates ---------- - -.. autoclass:: caterpillar.model.TemplateTypeVar - :members: - -.. autofunction:: caterpillar.model.istemplate - - -.. autofunction:: caterpillar.model.template - - -.. autofunction:: caterpillar.model.derive - - + model/sequence + model/struct + model/bitfield + model/template diff --git a/docs/sphinx/source/library/model/bitfield.rst b/docs/sphinx/source/library/model/bitfield.rst index ca6b2495..f013e40b 100644 --- a/docs/sphinx/source/library/model/bitfield.rst +++ b/docs/sphinx/source/library/model/bitfield.rst @@ -61,14 +61,14 @@ Default Factory Classes Default Options --------------- -.. py:attr:: EndGroup +.. py:attribute:: EndGroup .. versionadded:: 2.5.0 Alias for the :attr:`B_GROUP_NEW` flag, used to indicate that a new bitfield group should be started. -.. py:attr:: NewGroup +.. py:attribute:: NewGroup Alias for the :attr:`B_GROUP_END` flag, used to indicate that the current bitfield group should be finalized. diff --git a/docs/sphinx/source/library/model/sequence.rst b/docs/sphinx/source/library/model/sequence.rst new file mode 100644 index 00000000..3e1812dc --- /dev/null +++ b/docs/sphinx/source/library/model/sequence.rst @@ -0,0 +1,10 @@ +.. _library_model_base: + +======== +Sequence +======== + +.. py:currentmodule:: caterpillar.model + +.. autoclass:: Sequence + :members: \ No newline at end of file diff --git a/docs/sphinx/source/library/model/struct.rst b/docs/sphinx/source/library/model/struct.rst new file mode 100644 index 00000000..6ebd8048 --- /dev/null +++ b/docs/sphinx/source/library/model/struct.rst @@ -0,0 +1,42 @@ +.. _library_model_struct: + +====== +Struct +====== + +.. py:currentmodule:: caterpillar.model + + +The *Struct* class +------------------ + +.. autoclass:: caterpillar.model.Struct + :members: + + +Unions +------ + +.. autofunction:: caterpillar.model.union + +.. autoclass:: caterpillar.model.UnionHook + :members: + :special-members: __model_init__, __model_setattr__ + + +Standard Interface +------------------ + +.. autofunction:: caterpillar.model.struct + +.. autofunction:: caterpillar.model.union + +.. autofunction:: caterpillar.model.pack + +.. autofunction:: caterpillar.model.pack_into + +.. autofunction:: caterpillar.model.pack_file + +.. autofunction:: caterpillar.model.unpack + +.. autofunction:: caterpillar.model.unpack_file \ No newline at end of file diff --git a/docs/sphinx/source/library/model/template.rst b/docs/sphinx/source/library/model/template.rst new file mode 100644 index 00000000..5d5f3704 --- /dev/null +++ b/docs/sphinx/source/library/model/template.rst @@ -0,0 +1,18 @@ +.. _library_model_template: + +========= +Templates +========= + +.. py:currentmodule:: caterpillar.model + +.. autoclass:: caterpillar.model.TemplateTypeVar + :members: + +.. autofunction:: caterpillar.model.istemplate + + +.. autofunction:: caterpillar.model.template + + +.. autofunction:: caterpillar.model.derive diff --git a/docs/sphinx/source/library/options.rst b/docs/sphinx/source/library/options.rst index 16cc057e..05c5c52e 100644 --- a/docs/sphinx/source/library/options.rst +++ b/docs/sphinx/source/library/options.rst @@ -1,12 +1,12 @@ .. _options: -******* -Options -******* +**************** +Options / Flags +**************** *TODO* -Options by type +Options by Type --------------- Global options diff --git a/docs/sphinx/source/library/shared.rst b/docs/sphinx/source/library/shared.rst new file mode 100644 index 00000000..bcc57b52 --- /dev/null +++ b/docs/sphinx/source/library/shared.rst @@ -0,0 +1,82 @@ +.. _lib_shared: + +Shared Concepts +=============== + +.. automodule:: caterpillar.shared + +.. py:attribute:: ATTR_STRUCT + :value: "__struct__" + + All models annotated with either ``@struct`` or ``@bitfield`` are struct + containers. Thus, they store the additional class attribute :code:`__struct__`. + + Internally, any types utilizing this attribute can be employed within a + struct, bitfield, or sequence definition. The type of the stored value + must be conforming to the :class:`_StructLike` protocol. + + .. versionchanged:: 2.5.0 + This attribute is now used when callung :func:`getstruct` or :func:`hasstruct`. + + +.. py:attribute:: ATTR_BYTEORDER + :value: "__byteorder__" + + .. versionadded:: 2.5.0 + Moved from *caterpillar.byteorder*. + + +.. py:attribute:: ATTR_TYPE + :value: "__type__" + + .. versionadded:: 2.5.0 + + +.. py:attribute:: ATTR_BITS + :value: "__bits__" + + .. versionadded:: 2.5.0 + Moved from *caterpillar.model._bitfield*. + + +.. py:attribute:: ATTR_SIGNED + :value: "__signed__" + + .. versionadded:: 2.5.0 + Moved from *caterpillar.model._bitfield*. + + +.. py:attribute:: ATTR_TEMPLATE + :value: "__template__" + + .. versionadded:: 2.5.0 + Moved from *caterpillar.model._template*. + + +.. py:attribute:: ATTR_ACTION_PACK + :value: "__action_pack__" + + .. versionadded:: 2.4.0 + + +.. py:attribute:: ATTR_ACTION_UNPACK + :value: "__action_unpack__" + + .. versionadded:: 2.4.0 + + +.. py:attribute:: MODE_PACK + :value: 0 + + +.. py:attribute:: MODE_UNPACK + :value: 1 + + +.. autofunction:: getstruct + +.. autofunction:: hasstruct + +.. autofunction:: typeof + +.. autoclass:: Action diff --git a/docs/sphinx/source/reference/datamodel.rst b/docs/sphinx/source/reference/datamodel.rst index b8c2ceb3..30cb0ab2 100644 --- a/docs/sphinx/source/reference/datamodel.rst +++ b/docs/sphinx/source/reference/datamodel.rst @@ -259,6 +259,4 @@ Modifying fields .. _struct: https://docs.python.org/3/library/struct.html .. _sphinx-autodoc: https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html -.. _Stringizes: https://docs.python.org/3/howto/annotations.html#manually-un-stringizing-stringized-annotations -.. _Python Classes: https://docs.python.org/3/reference/compound_stmts.html#class .. _Ellipsis: https://docs.python.org/3/library/constants.html#Ellipsis \ No newline at end of file diff --git a/docs/sphinx/source/reference/datamodel/processing_classes.rst b/docs/sphinx/source/reference/datamodel/processing_classes.rst index fe68ce40..60afa125 100644 --- a/docs/sphinx/source/reference/datamodel/processing_classes.rst +++ b/docs/sphinx/source/reference/datamodel/processing_classes.rst @@ -131,4 +131,12 @@ instance directly. All custom attributes are stored in the dictionary representa **Internal use only:** This special member is only set in the root context and stores all packed objects that should be placed at an offset position. +.. attribute:: CTX_ROOT + :value: "_root" + + .. versionadded:: 2.5.0 + + Special attribute set to specify the root context. If this attribute is not present, the current ``Context`` instance + will be returned. + .. _Ellipsis: https://docs.python.org/3/library/constants.html#Ellipsis \ No newline at end of file diff --git a/docs/sphinx/source/reference/datamodel/struct.rst b/docs/sphinx/source/reference/datamodel/struct.rst index 37f58e2c..ba1baed7 100644 --- a/docs/sphinx/source/reference/datamodel/struct.rst +++ b/docs/sphinx/source/reference/datamodel/struct.rst @@ -100,3 +100,5 @@ resolution order of Python: Anonymous inner union definitions are tricky and are not officially supported yet. There are workarounds to that problem, which are discussed in the API documentation of :class:`~caterpillar.model.Sequence`. +.. _Stringizes: https://docs.python.org/3/howto/annotations.html#manually-un-stringizing-stringized-annotations +.. _Python Classes: https://docs.python.org/3/reference/compound_stmts.html#class \ No newline at end of file diff --git a/docs/sphinx/source/reference/index.rst b/docs/sphinx/source/reference/index.rst index f9000f45..eb0e6f64 100644 --- a/docs/sphinx/source/reference/index.rst +++ b/docs/sphinx/source/reference/index.rst @@ -14,7 +14,6 @@ documentation. .. toctree:: :maxdepth: 2 - :numbered: :caption: Python Reference introduction.rst @@ -25,7 +24,6 @@ If you want to switch to the C API instead, please refer to the sections describ .. toctree:: :maxdepth: 2 - :numbered: :caption: C API Reference capi/extension diff --git a/docs/sphinx/source/tutorial/index.rst b/docs/sphinx/source/tutorial/index.rst index 5487508e..81d6a97a 100644 --- a/docs/sphinx/source/tutorial/index.rst +++ b/docs/sphinx/source/tutorial/index.rst @@ -23,7 +23,6 @@ is an important resource to start from. .. toctree:: :caption: Python - :numbered: :maxdepth: 4 first_steps/index.rst From b7a1de13bbfb29d556da6ba46135130f387c9caf Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sun, 29 Jun 2025 08:03:47 +0200 Subject: [PATCH 33/41] Add caterpillar's protocols to the reference --- docs/sphinx/source/library/index.rst | 2 +- docs/sphinx/source/reference/capi/context.rst | 17 +- docs/sphinx/source/reference/datamodel.rst | 210 ------------------ .../datamodel/processing_classes.rst | 28 +++ .../source/reference/datamodel/protocols.rst | 11 +- .../datamodel/protocols/actionlike.rst | 45 ++++ .../protocols/bitfield_extensions.rst | 36 +++ .../protocols/byteorder_extensions.rst | 24 ++ .../datamodel/protocols/contextlambda.rst | 23 ++ .../datamodel/protocols/contextlike.rst | 47 ++++ .../datamodel/protocols/structlike.rst | 157 +++++++++++++ 11 files changed, 372 insertions(+), 228 deletions(-) create mode 100644 docs/sphinx/source/reference/datamodel/protocols/actionlike.rst create mode 100644 docs/sphinx/source/reference/datamodel/protocols/bitfield_extensions.rst create mode 100644 docs/sphinx/source/reference/datamodel/protocols/byteorder_extensions.rst create mode 100644 docs/sphinx/source/reference/datamodel/protocols/contextlambda.rst create mode 100644 docs/sphinx/source/reference/datamodel/protocols/contextlike.rst create mode 100644 docs/sphinx/source/reference/datamodel/protocols/structlike.rst diff --git a/docs/sphinx/source/library/index.rst b/docs/sphinx/source/library/index.rst index 5661f7eb..24fede34 100644 --- a/docs/sphinx/source/library/index.rst +++ b/docs/sphinx/source/library/index.rst @@ -1,4 +1,4 @@ -:noindex: +.. _library-index: ******* Library diff --git a/docs/sphinx/source/reference/capi/context.rst b/docs/sphinx/source/reference/capi/context.rst index dcdfc029..2f97e731 100644 --- a/docs/sphinx/source/reference/capi/context.rst +++ b/docs/sphinx/source/reference/capi/context.rst @@ -6,19 +6,4 @@ Context Protocol **************** -Caterpillar provides a special protocol for working with contexts. It is -designed to enable access to context variables while packing or unpacking -data. This procotol is implemented by *all* context-related classes in -this package. - - -.. py:method:: object.__context_getattr__(self, path) -> object - :noindex: - - This function is used to retrieve a value from the context. It is implementation - dependent whether nested paths are supported. By default, multiple path elements - are sperated by a single dot. - - For example, consider the following path: :code:`"foo.bar"`. The context - implementation should first resolve the value of :code:`"foo"` and then - retrieve the value of :code:`"bar"` from the result. +*Moved to* :ref:`ref_datamodel_protocol_contextlike` \ No newline at end of file diff --git a/docs/sphinx/source/reference/datamodel.rst b/docs/sphinx/source/reference/datamodel.rst index 30cb0ab2..8f8c880f 100644 --- a/docs/sphinx/source/reference/datamodel.rst +++ b/docs/sphinx/source/reference/datamodel.rst @@ -45,216 +45,6 @@ objects. There are three possible types of structs: datamodel/processing_classes.rst -.. _context_lambda: - -Context lambda -^^^^^^^^^^^^^^ - -Dynamic sized structs are supported by this library using the power of so-called *context lambdas*. This library -introduces a special callable :class:`_ContextLambda`, that takes a :class:`Context` instance and returns the -desired result. To mimic a *context lambda*, the :meth:`__call__` method has to be implemented. - -Dynamic-sized structs are supported by this library using the power of so-called *context lambdas*. This library -introduces a special callable :class:`_ContextLambda` that takes a :class:`Context` instance and returns the # -desired result. To mimic a *context lambda*, the :meth:`__call__` method has to be implemented. - -.. function:: object.__call__(self, context) - - This library does not distinguish between callable objects and *context lambdas*. They are treated as the - same class (this aspect is under subject to changes). - - -Context path -^^^^^^^^^^^^ - -The path of a context is a specialized form of a :ref:`context_lambda` and supports lazy evaluation of most -operators (conditional ones excluded). Once called, they try to retrieve the requested value from within -the given :class:`Context` instance. Below is a list of default paths designed to provide a relatively easy -way to access the context variables. - -.. attribute:: ctx - :value: "" - - This special path acts as a wrapper to access all variables within the top-level :class:`Context` object. - -.. attribute:: this - :value: "_obj" - - As described before, a special *object context* is created when packing or unpacking structs that store - more than one field. - -.. attribute:: parent - :value: "_parent._obj" - - A shortcut to access the object context of the parent context. - - - - -Special method names -==================== - -A class can either extend :class:`_StructLike` or implement the special methods needed -to act as a struct. The subsequent sections provide an overview of all special methods -and attributes introduced by this library. Further insights into extending structs with -custom operators can be found in :ref:`operators`. - -Emulating Struct Types ----------------------- - -.. method:: object.__pack__(self, obj, context) - - Invoked to serialize the given object into an output stream, :meth:`~object.__pack__` - is designed to implement the behavior necessary for packing a collection of elements - or a single element. Accordingly, the input obj may be an :code:`Iterable` or a - singular element. - - The absence of a standardized implementation for deserializing a collection of elements - is deliberate. For example, all instances of the :class:`FormatField` utilize the Python - library `struct`_ internally to pack and unpack data. To optimize execution times, a - collection of elements is packed and unpacked in a single call, rather than handling each - element individually. - - The context must incorporate specific members, mentioned in :ref:`context`. Any data - input verification is implemented by the corresponding class. - - :meth:`~__pack__` is invoked by the :code:`pack()` method defined within this library. - Its purpose is to dictate how input objects are written to the stream. It is crucial - to note that the outcome of this function is ignored. - - .. versionchanged:: beta - The *stream* parameter has been removed and was instead moved into the context. - -.. method:: object.__unpack__(self, context) - - Called to desersialize objects from an input stream (the stream is stored in the given context). - The result of :meth:`~object.__unpack__` is not going to be ignored. - - Every implementation is tasked with the decision of whether to support the deserialization - of multiple elements concurrently. By default, the :class:`~caterpillar.fields.Field` class stores all essential - attributes required to determine the length of elements set for unpacking. The :meth:`~__unpack__` - method is activated through the :code:`unpack()` operation, integrated with the default - struct classes — namely, :class:`~caterpillar.model.Sequence`, :class:`~caterpillar.model.Struct`, and :class:`~caterpillar.fields.Field`. - - .. versionchanged:: beta - The *stream* parameter has been removed and was instead moved into the context. - -.. method:: object.__size__(self, context) - - This method serves the purpose of determining the space occupied by this struct, - expressed in bytes. The availability of a context enables the execution of a - :class:`_ContextLambda`, offering support for dynamically sized structs. Furthermore, - for the explicit definition of dynamic structs, the option to raise a :class:`DynamicSizeError` - is provided. - - -.. _struct_type: - -Customizing the struct's type -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. method:: object.__type__(self) - - The configuration of *Structs* incorporates type replacement before a dataclass is - created. This feature was specifically introduced for documentation purposes. - The optional :meth:`~object.__type__` method allows for the specification of a - type, with the default being :code:`Any` if not explicitly defined. - - .. note:: - The implementation of the :meth:`~object.__type__` method is optional and, - therefore, not mandatory as per the library's specifications. - - The following example demonstrates the use of the `sphinx-autodoc`_ extension to document - struct classes with the :code:`S_REPLACE_TYPE` option enabled. Only documented members - are displayed. - - .. code-block:: rst - - .. autoclass:: examples.formats.nibarchive.NIBHeader() - :members: - - Will be displayed as: - - .. autoclass:: examples.formats.nibarchive.NIBHeader() - :members: - :no-undoc-members: - - In this illustration, the extra parentheses at the end are included to prevent the - automatic creation of constructors. - - -Struct containers -^^^^^^^^^^^^^^^^^ - -.. attribute:: class.__struct__ - - All models annotated with either :code:`@struct` or :code:`@bitfield` fall into the - category of *struct containers*. These containers store the additional class attribute - :func:`~class.__struct__`. - - Internally, any types utilizing this attribute can be employed within a struct, bitfield, - or sequence definition. The type of the stored value must be a subclass of :class:`_StructLike`. - - -Template Containers -^^^^^^^^^^^^^^^^^^^ - -.. attribute:: class.__template__ - - All template classes store information about the used template type variables. Whether they - are required or just positional. In addition, default inferred types are stored as well. - -BitField specific methods -------------------------- - -The introduced :class:`~caterpillar.model.BitField` class is special in many different ways. One key -attribute is its fixed size. To determine the size of a struct, it leverages a special -member, which can be either a function or an attribute. - -.. method:: object.__bits__(self) - - Called to measure the bit count of the current object. :meth:`~object.__bits__` - serves as the sole requirement for the defined fields in the current implementation - of the :class:`~caterpillar.model.BitField` class. - - .. note:: - This class member can also be expressed as an attribute. The library automatically - adapts to the appropriate representation based on the context. - - -Customizing the object's byteorder ----------------------------------- - -.. attribute:: object.__byteorder__ - - The byteorder of a struct can be temporarily configured using the corresponding - operator. It is important to note that this attribute is utilized internally and - should not be used elsewhere. - - .. code-block:: python - - >>> struct = BigEndian | struct # Automatically sets __byteorder__ - - -.. method:: object.__set_byteorder__(self, byteorder) - - In contrast to the attribute :attr:`~object.__byteorder__`, the :meth:`~object.__set_byteorder__` - method is invoked to apply the current byteorder to a struct. The default behavior, - as described in :class:`FieldMixin`, is to return a new :class:`~caterpillar.fields.Field` instance with - the byteorder applied. Note the use of another operator here. - - >>> field = BigEndian + struct - - -Modifying fields ----------------- - -.. attribute:: field.__name__ - - The name of a regular field is not explicitly specified in a typical attribute but is - instead set using a dedicated one. This naming convention is automatically applied by - all default :class:`~caterpillar.model.Sequence` implementations. The name can be retrieved through the - use of :code:`field.__name__`. .. _struct: https://docs.python.org/3/library/struct.html diff --git a/docs/sphinx/source/reference/datamodel/processing_classes.rst b/docs/sphinx/source/reference/datamodel/processing_classes.rst index 60afa125..c3967de6 100644 --- a/docs/sphinx/source/reference/datamodel/processing_classes.rst +++ b/docs/sphinx/source/reference/datamodel/processing_classes.rst @@ -139,4 +139,32 @@ instance directly. All custom attributes are stored in the dictionary representa Special attribute set to specify the root context. If this attribute is not present, the current ``Context`` instance will be returned. + +Context path +------------ + +The path of a context is a specialized form of a :ref:`context_lambda` and supports lazy evaluation of most +operators (conditional ones excluded). Once called, they try to retrieve the requested value from within +the given :class:`Context` instance. Below is a list of default paths designed to provide a relatively easy +way to access the context variables. + +.. attribute:: ctx + :value: "" + + This special path acts as a wrapper to access all variables within the top-level :class:`Context` object. + +.. attribute:: this + :value: "_obj" + + As described before, a special *object context* is created when packing or unpacking structs that store + more than one field. + +.. attribute:: parent + :value: "_parent._obj" + + A shortcut to access the object context of the parent context. + + + + .. _Ellipsis: https://docs.python.org/3/library/constants.html#Ellipsis \ No newline at end of file diff --git a/docs/sphinx/source/reference/datamodel/protocols.rst b/docs/sphinx/source/reference/datamodel/protocols.rst index 9f917935..830e1816 100644 --- a/docs/sphinx/source/reference/datamodel/protocols.rst +++ b/docs/sphinx/source/reference/datamodel/protocols.rst @@ -4,4 +4,13 @@ Caterpillar's Protocols ======================= .. toctree:: - :caption: Protocols \ No newline at end of file + :caption: Protocols + :maxdepth: 1 + + protocols/actionlike + protocols/contextlike + protocols/contextlambda + protocols/structlike + protocols/bitfield_extensions + protocols/byteorder_extensions + diff --git a/docs/sphinx/source/reference/datamodel/protocols/actionlike.rst b/docs/sphinx/source/reference/datamodel/protocols/actionlike.rst new file mode 100644 index 00000000..1507edba --- /dev/null +++ b/docs/sphinx/source/reference/datamodel/protocols/actionlike.rst @@ -0,0 +1,45 @@ +.. _ref_datamodel_protocol_actionlike: + +Action-like Objects +=================== + +Action-like objects provide a flexible mechanism for performing custom operations +during data processing. Rather than directly reading, writing, or storing a value, +actions are defined to modify or interact with the data at various stages +of serialization or deserialization. + +There are generally two kinds of actions that can be implemented: + +.. method:: object.__action_pack__(self, context) + + Invoked when data is serialized. This method can be used for tasks + such as calculating checksums, logging.. + +.. method:: object.__action_unpack__(self, context) + + Invoked when data is deserialized. This method is typically used + for validation, verification, or any other operation that should run during + unpacking process. + +.. note:: + + To implement an action-like object, only one of these methods needs to be defined; + defining both is optional. + + +Derived Protocols +----------------- + +.. py:class:: _ActionLike + + .. py:function:: __action_pack__(self, context: _ContextLike) -> None + __action_unpack__(self, context: _ContextLike) -> None + +.. py:class:: _SupportsActionUnpack + + .. py:function:: __action_unpack__(self, context: _ContextLike) -> None + + +.. py:class:: _SupportsActionPack + + .. py:function:: __action_pack__(self, context: _ContextLike) -> None diff --git a/docs/sphinx/source/reference/datamodel/protocols/bitfield_extensions.rst b/docs/sphinx/source/reference/datamodel/protocols/bitfield_extensions.rst new file mode 100644 index 00000000..e750d487 --- /dev/null +++ b/docs/sphinx/source/reference/datamodel/protocols/bitfield_extensions.rst @@ -0,0 +1,36 @@ +.. _ref_datamodel_protocol_bitfield_ext: + +Bit-field Extensions +==================== + +The introduced :class:`~caterpillar.model.Bitfield` class is special in many different ways. One key +attribute is its fixed size. To determine the size of a struct, it leverages a special +member, which can be either a function or an attribute. + +.. method:: object.__bits__(self) + + Called to measure the bit count of the current object. :meth:`~object.__bits__` + serves as the sole requirement for the defined fields in the current implementation + of the :class:`~caterpillar.model.Bitfield` class. + + .. note:: + This class member can also be expressed as an attribute. The library automatically + adapts to the appropriate representation based on the context. + +.. method:: object.__signed__(self) + + *Proposed for future use - currently unused* + +Derived Protocols +----------------- + +.. py:class:: _SupportsBits + + .. py:function:: __bits__(self) -> int + + +.. py:class:: _ContainsBits + + .. py:attribute:: __bits__ + :type: int + diff --git a/docs/sphinx/source/reference/datamodel/protocols/byteorder_extensions.rst b/docs/sphinx/source/reference/datamodel/protocols/byteorder_extensions.rst new file mode 100644 index 00000000..9d14c695 --- /dev/null +++ b/docs/sphinx/source/reference/datamodel/protocols/byteorder_extensions.rst @@ -0,0 +1,24 @@ +.. _ref_datamodel_protocol_byteorder_ext: + +Byteorder Extensions +==================== + +.. attribute:: object.__byteorder__ + + The byteorder of a struct can be temporarily configured using the corresponding + operator. It is important to note that this attribute is utilized internally and + should not be used elsewhere. + + .. code-block:: python + + >>> struct = BigEndian | struct # Automatically sets __byteorder__ + + +.. method:: object.__set_byteorder__(self, byteorder) + + In contrast to the attribute :attr:`~object.__byteorder__`, the :meth:`~object.__set_byteorder__` + method is invoked to apply the current byteorder to a struct. The default behavior, + as described in :class:`FieldMixin`, is to return a new :class:`~caterpillar.fields.Field` instance with + the byteorder applied. Note the use of another operator here. + + >>> field = BigEndian + struct diff --git a/docs/sphinx/source/reference/datamodel/protocols/contextlambda.rst b/docs/sphinx/source/reference/datamodel/protocols/contextlambda.rst new file mode 100644 index 00000000..b118047f --- /dev/null +++ b/docs/sphinx/source/reference/datamodel/protocols/contextlambda.rst @@ -0,0 +1,23 @@ +.. _context_lambda: + +Context Lambdas +=============== + +Dynamic-sized structs are supported by this library through the use of so-called +*context lambdas*. The library introduces a special callable, :class:`_ContextLambda`, +which accepts a :class:`_ContextLike` instance and returns the desired result. + +To implement a *context lambda*, a class must define the :meth:`__call__` method. + +.. method:: object.__call__(self, context) + + The library does not distinguish between general callable objects and *context lambdas*; + both are treated as instances of the same class. + + +Derived Protocols +----------------- + +.. py:class:: _ContextLambda[_RT] + + .. py:function:: __call__(self, context: _ContextLike) -> _RT diff --git a/docs/sphinx/source/reference/datamodel/protocols/contextlike.rst b/docs/sphinx/source/reference/datamodel/protocols/contextlike.rst new file mode 100644 index 00000000..75303f19 --- /dev/null +++ b/docs/sphinx/source/reference/datamodel/protocols/contextlike.rst @@ -0,0 +1,47 @@ +.. _ref_datamodel_protocol_contextlike: + +Context-like Objects +==================== + +Caterpillar provides a dedicated protocol for working with context-like objects. +This protocol enables seamless access to context variables during data packing +and unpacking operations. It is consistently implemented by all context-related +classes within this package. + +.. method:: object.__context_getattr__(self, path) + + Retrieves a value from the context based on the given path. It is + implementation-dependent whether nested paths are supported; by default, + multiple path elements are separated by a single dot. + + For example, given the path :code:`"foo.bar"`, the context implementation + should first resolve the value associated with :code:`"foo"`, then retrieve + the :code:`"bar"` attribute from that result. + +.. method:: object.__context_setattr__(self, path, value) + + Sets the value of a context variable specified by the given path. + Similar to :meth:`__context_getattr__`, multiple path elements are + separated by a dot by default. Implementations should resolve the + intermediate path components and update the target variable with the + provided value. + +.. attribute:: object._root + + References the root context object. This attribute provides access to the + top-level context, which can be useful for resolving global context + variables or for operations that require awareness of the entire + context hierarchy. If no root context has been set, the current instance + will be returned. + + +Derived Protocols +----------------- + +.. py:class:: _ContextLike + + .. py:attribute:: _root: _ContextLike | None + .. py:function:: __context_getattr__(self, path: str) -> Any: ... + __context_setattr__(self, path: str, value: Any) -> None: ... + __getitem__(self, key, /) -> Any: ... + __setitem__(self, key, value: Any, /) -> None: ... \ No newline at end of file diff --git a/docs/sphinx/source/reference/datamodel/protocols/structlike.rst b/docs/sphinx/source/reference/datamodel/protocols/structlike.rst new file mode 100644 index 00000000..42669998 --- /dev/null +++ b/docs/sphinx/source/reference/datamodel/protocols/structlike.rst @@ -0,0 +1,157 @@ +.. _ref_datamodel_protocol_structlike: + +Struct-Like Objects +=================== + + +The :class:`_StructLike` protocol can be used to emulate struct types. Even though, :func:`pack` +and :func:`unpack` allow so-called *partial* struct-like objects, there won't be a conversion +within struct class definitions. It is always recommended to implement all methods conforming +to the :code:`_StructLike` protocol. + +Special Methods for Struct-Like objects +--------------------------------------- + +.. method:: object.__pack__(self, obj, context) -> None + + Invoked to serialize the given object into an output stream, :meth:`~object.__pack__` + is designed to implement the behavior necessary for packing a collection of elements + or a single element. Accordingly, the input obj may be an :code:`Iterable` or a + singular element. + + The absence of a standardized implementation for deserializing a collection of elements + is deliberate. For example, all instances of the :class:`PyStructFormattedField` utilize the Python + library `struct`_ internally to pack and unpack data. To optimize execution times, a + collection of elements is packed and unpacked in a single call, rather than handling each + element individually. + + The context must incorporate specific members, mentioned in :ref:`context`. Any data + input verification is implemented by the corresponding class. + + :meth:`~__pack__` is invoked by the :code:`pack()` method defined within this library. + Its purpose is to dictate how input objects are written to the stream. It is crucial + to note that the outcome of this function is ignored. + + .. versionchanged:: beta + The *stream* parameter has been removed and was instead moved into the context. + + +.. method:: object.__unpack__(self, context) + + Called to desersialize objects from an input stream (the stream is stored in the given context). + The result of :meth:`~object.__unpack__` is not going to be ignored. + + Every implementation is tasked with the decision of whether to support the deserialization + of multiple elements concurrently. By default, the :class:`~caterpillar.fields.Field` class stores all essential + attributes required to determine the length of elements set for unpacking. The :meth:`~__unpack__` + method is activated through the :code:`unpack()` operation, integrated with the default + struct classes — namely, :class:`~caterpillar.model.Sequence`, :class:`~caterpillar.model.Struct`, and + :class:`~caterpillar.fields.Field`. + + .. versionchanged:: beta + The *stream* parameter has been removed and was instead moved into the context. + + +.. method:: object.__size__(self, context) + + This method serves the purpose of determining the space occupied by this struct, + expressed in bytes. The availability of a context enables the execution of a + :class:`_ContextLambda`, offering support for dynamically sized structs. Furthermore, + for the explicit definition of dynamic structs, the option to raise a :class:`DynamicSizeError` + is provided. + +.. _struct_type: + +.. method:: object.__type__(self) + + The configuration of *Structs* incorporates type replacement before a dataclass is + created. This feature was specifically introduced for documentation purposes. + The optional :meth:`~object.__type__` method allows for the specification of a + type, with the default being :code:`Any` if not explicitly defined. + + .. note:: + The implementation of the :meth:`~object.__type__` method is optional and, + therefore, not mandatory as per the library's specifications. + + The following example demonstrates the use of the `sphinx-autodoc`_ extension to document + struct classes with the :code:`S_REPLACE_TYPE` option enabled. Only documented members + are displayed. + + .. code-block:: rst + + .. autoclass:: examples.formats.nibarchive.NIBHeader() + :members: + + Will be displayed as: + + .. autoclass:: examples.formats.nibarchive.NIBHeader() + :members: + :no-undoc-members: + + In this illustration, the extra parentheses at the end are included to prevent the + automatic creation of constructors. + + +Struct containers +^^^^^^^^^^^^^^^^^ + +.. attribute:: class.__struct__ + + All models annotated with either :code:`@struct` or :code:`@bitfield` fall into the + category of *struct containers*. These containers store the additional class attribute + :func:`~class.__struct__`. + + Internally, any types utilizing this attribute can be employed within a struct, bitfield, + or sequence definition. The type of the stored value must be conforming to the :class:`_StructLike` protocol. + + +Template Containers +^^^^^^^^^^^^^^^^^^^ + +.. attribute:: class.__template__ + + All template classes store information about the used template type variables. Whether they + are required or just positional. In addition, default inferred types are stored as well. + + +Protocols for Struct-like objects +--------------------------------- + +To represent a :code:`_StructLike` object, all previously described methods must be implemented: + +.. py:class:: _StructLike[_IT, _OT] + + .. py:function:: __pack__(self, obj: _IT, context: _ContextLike) -> None + __unpack__(self, context: _ContextLike) -> _OT + __size__(self, context: _ContextLike) -> int + + +.. py:class:: _ContainsStruct[_IT, _OT] + + .. py:attribute:: __struct__ + :type: _StructLike[_IT, _OT] + + +.. py:class:: _SupportsPack[_IT] + + .. py:function:: __pack__(self, obj: _IT, context: _ContextLike) -> None + + +.. py:class:: _SupportsUnpack[_OT] + + .. py:function:: __unpack__(self, context: _ContextLike) -> _OT + + +.. py:class:: _SupportsSize + + .. py:function:: __size__(self, context: _ContextLike) -> int + + +.. py:class:: _SupportsType + + .. py:function:: __type__(self) -> Optional[type | str] + + + +.. _struct: https://docs.python.org/3/library/struct.html +.. _sphinx-autodoc: https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html \ No newline at end of file From 651e0429d41e3540d591c2e4c82107fd5fa0c74d Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sun, 29 Jun 2025 08:56:42 +0200 Subject: [PATCH 34/41] Update library docs --- + Update Changelog title + Create CHANGELOG.md --- CHANGELOG.md | 56 +++ docs/sphinx/source/development/changelog.rst | 10 +- docs/sphinx/source/index.rst | 6 + docs/sphinx/source/library/byteorder.rst | 46 ++- docs/sphinx/source/library/context.rst | 4 +- docs/sphinx/source/library/fields/common.rst | 79 ++-- .../source/library/fields/compression.rst | 10 +- docs/sphinx/source/library/fields/crypto.rst | 8 +- .../source/library/fields/field_model.rst | 12 +- docs/sphinx/source/library/model/struct.rst | 10 +- docs/sphinx/source/library/options.rst | 388 +++++++++++------- 11 files changed, 421 insertions(+), 208 deletions(-) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..2ba7c69c --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,56 @@ +# Changelog + +## [2.5.0] - Minor Release + +### Added +- **caterpillar.abc** + - Add `_SupportsBits` protocol. + - Add `_ContainsBits` protocol. + - Add `_SupportsType` protocol. + +- **caterpillar.shortcuts** + - Add new shortcuts: `typeof`, `to_struct`, `hasstruct`, `getstruct`, and `sizeof`. + +- **caterpillar.shared** + - Add new constants from other modules: `ATTR_BYTEORDER`, `ATTR_TYPE`, `ATTR_BITS`, `ATTR_SIGNED`, `ATTR_TEMPLATE`. + +- **caterpillar.context** + - Add context attribute `_root` to point to the root context instance. Internal parent iteration now uses `self.get(...)`. + +--- + +### Removed +- **caterpillar.abc** + - Remove `_Action` protocol; split into `_ActionLike = _SupportsActionUnpack | _SupportsActionPack`. + - Remove `__type__` requirement from `_StructLike`. + - **Breaking:** Remove `_EnumLike` and `_ContextPathStr`. + +- **caterpillar.model** + - Remove unused `getformat` function. + +- **caterpillar.fields.common** + - Remove unused `__fmt__` function in `Transformer`. + +--- + +### Changed +- **caterpillar.abc** + - Rename `_Switch` protocol to `_SwitchLike`. + - Move `STRUCT_FIELD` to `caterpillar.shared` as `ATTR_STRUCT`; move `hasstruct`, `getstruct`, and `typeof` to `caterpillar.shared`. + +- **caterpillar.byteorder** + - Move `BYTEORDER_FIELD` to `caterpillar.shared` as `ATTR_BYTEORDER`. + +- **caterpillar.model** + - Update `sizeof` to check if the object implements `_SupportsSize` protocol. + - Renew `Bitfield` concept with enhanced syntax. + +- **Documentation** + - Update reference and library docs; improve section numbering. + +--- + +### Fixed +- **caterpillar.model** + - Fix parsing of union objects with an unbound stream. + - Fix field options in Sequences and Structs not being populated when creating fields. diff --git a/docs/sphinx/source/development/changelog.rst b/docs/sphinx/source/development/changelog.rst index 8e101194..afab34fa 100644 --- a/docs/sphinx/source/development/changelog.rst +++ b/docs/sphinx/source/development/changelog.rst @@ -8,8 +8,8 @@ Changelog .. _changelog_2.5.0: -2.5.0 -===== +[2.5.0] - Minor Release +======================= This version introduces massive changes due to the addition of stub files. Most of the type hints in the Python file are ported into several stub files. Additionally, the bitfield concept was completely renewed to be more @@ -92,6 +92,12 @@ Changed - :func:`sizeof` now checks if the provided object implements the :class:`_SupportsSize` protocol - New :class:`Bitfield` concept with enhanced syntax + +*Documentation* +^^^^^^^^^^^^^^^ + +- Update reference and library docs as well as section numbering + .. raw:: html
diff --git a/docs/sphinx/source/index.rst b/docs/sphinx/source/index.rst index d5f2547d..2546b4df 100644 --- a/docs/sphinx/source/index.rst +++ b/docs/sphinx/source/index.rst @@ -132,8 +132,14 @@ what configuration options can be used. Alternatively you can follow the :ref:`t tutorial/index.rst reference/index.rst library/index.rst + +.. toctree:: + :maxdepth: 3 + :hidden: + development/index.rst + .. seealso:: * `Github Source `_ * `Github Issues `_ diff --git a/docs/sphinx/source/library/byteorder.rst b/docs/sphinx/source/library/byteorder.rst index e2da36ab..7ca757d1 100644 --- a/docs/sphinx/source/library/byteorder.rst +++ b/docs/sphinx/source/library/byteorder.rst @@ -4,12 +4,19 @@ Byteorder and Architecture ************************** +.. py:currentmodule:: caterpillar.byteorder + Byteorder --------- -.. autoclass:: caterpillar.byteorder.ByteOrder +.. autoclass:: ByteOrder :members: +.. autofunction:: byteorder(obj, default: Optional[ByteOrder] = None) -> ByteOrder + +Standard Byteorder Instances +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + .. autoattribute:: caterpillar.byteorder.Native .. autoattribute:: caterpillar.byteorder.BigEndian @@ -20,13 +27,46 @@ Byteorder .. autoattribute:: caterpillar.byteorder.SysNative -.. autofunction:: caterpillar.byteorder.byteorder + Architecture ------------ -.. autoclass:: caterpillar.byteorder.Arch +.. autoclass:: Arch :members: .. autoattribute:: caterpillar.byteorder.system_arch + +Standard Architectures +~~~~~~~~~~~~~~~~~~~~~~ + +.. autoattribute:: caterpillar.byteorder.x86 + +.. autoattribute:: caterpillar.byteorder.x86_64 + +.. autoattribute:: caterpillar.byteorder.ARM + +.. autoattribute:: caterpillar.byteorder.ARM64 + +.. autoattribute:: caterpillar.byteorder.AARCH64 + +.. autoattribute:: caterpillar.byteorder.PowerPC + +.. autoattribute:: caterpillar.byteorder.PowerPC64 + +.. autoattribute:: caterpillar.byteorder.MIPS + +.. autoattribute:: caterpillar.byteorder.MIPS64 + +.. autoattribute:: caterpillar.byteorder.SPARC + +.. autoattribute:: caterpillar.byteorder.SPARC64 + +.. autoattribute:: caterpillar.byteorder.RISC_V64 + +.. autoattribute:: caterpillar.byteorder.RISC_V + +.. autoattribute:: caterpillar.byteorder.AMD + +.. autoattribute:: caterpillar.byteorder.AMD64 diff --git a/docs/sphinx/source/library/context.rst b/docs/sphinx/source/library/context.rst index e38bdb85..47308974 100644 --- a/docs/sphinx/source/library/context.rst +++ b/docs/sphinx/source/library/context.rst @@ -10,7 +10,7 @@ Context classes --------------- .. autoclass:: caterpillar.context.Context - :members: + :members: __context_getattr__, __context_setattr__, _root .. autoclass:: caterpillar.context.ContextPath :members: @@ -53,6 +53,8 @@ Special Attributes .. autoattribute:: caterpillar.context.CTX_SEQ +.. autoattribute:: caterpillar.context.CTX_ROOT + Expressions ----------- diff --git a/docs/sphinx/source/library/fields/common.rst b/docs/sphinx/source/library/fields/common.rst index 5b0a302d..8ebb977d 100644 --- a/docs/sphinx/source/library/fields/common.rst +++ b/docs/sphinx/source/library/fields/common.rst @@ -7,121 +7,124 @@ Common Structs Numeric Structs --------------- -.. autoclass:: caterpillar.py.PyStructFormattedField +.. autoclass:: caterpillar.fields.PyStructFormattedField :members: .. versionchanged:: 2.4.0 :code:`FormatField` renamed to :code:`PyStructFormattedField` -.. autoattribute:: caterpillar.py.uint8 +.. autoattribute:: caterpillar.fields.uint8 -.. autoattribute:: caterpillar.py.int8 +.. autoattribute:: caterpillar.fields.int8 -.. autoattribute:: caterpillar.py.uint16 +.. autoattribute:: caterpillar.fields.uint16 -.. autoattribute:: caterpillar.py.int16 +.. autoattribute:: caterpillar.fields.int16 -.. autoattribute:: caterpillar.py.uint32 +.. autoattribute:: caterpillar.fields.uint32 -.. autoattribute:: caterpillar.py.int32 +.. autoattribute:: caterpillar.fields.int32 -.. autoattribute:: caterpillar.py.uint64 +.. autoattribute:: caterpillar.fields.uint64 -.. autoattribute:: caterpillar.py.int64 +.. autoattribute:: caterpillar.fields.int64 -.. autoattribute:: caterpillar.py.size_t +.. autoattribute:: caterpillar.fields.size_t -.. autoattribute:: caterpillar.py.ssize_t +.. autoattribute:: caterpillar.fields.ssize_t -.. autoattribute:: caterpillar.py.float16 +.. autoattribute:: caterpillar.fields.float16 -.. autoattribute:: caterpillar.py.float32 +.. autoattribute:: caterpillar.fields.float32 -.. autoattribute:: caterpillar.py.float64 +.. autoattribute:: caterpillar.fields.float64 -.. autoattribute:: caterpillar.py.void_ptr +.. autoattribute:: caterpillar.fields.void_ptr -.. autoattribute:: caterpillar.py.char +.. autoattribute:: caterpillar.fields.char -.. autoattribute:: caterpillar.py.boolean +.. autoattribute:: caterpillar.fields.boolean -.. autoattribute:: caterpillar.py.padding +.. autoattribute:: caterpillar.fields.padding -.. autoclass:: caterpillar.py.Int +.. autoclass:: caterpillar.fields.Int :members: -.. autoclass:: caterpillar.py.UInt +.. autoclass:: caterpillar.fields.UInt :members: -.. autoattribute:: caterpillar.py.vint +.. autoattribute:: caterpillar.fields.vint -.. autoclass:: caterpillar.py.VarInt +.. autoclass:: caterpillar.fields.VarInt :members: Bytes, Strings -------------- -.. autoclass:: caterpillar.py.Memory +.. autoclass:: caterpillar.fields.Memory :members: .. versionchanged:: 2.4.0 Removed :code:`encoding` argument -.. autoclass:: caterpillar.py.Bytes +.. autoclass:: caterpillar.fields.Bytes :members: -.. autoclass:: caterpillar.py.String +.. autoclass:: caterpillar.fields.String :members: -.. autoclass:: caterpillar.py.Prefixed +.. autoclass:: caterpillar.fields.Prefixed :members: .. versionadded:: 2.4.0 Added support for arbitrary structs. **Warning: the second parameter is now a struct instead of the encoding string.** -.. autoclass:: caterpillar.py.CString +.. autoclass:: caterpillar.fields.CString :members: -.. autoclass:: caterpillar.py.ConstString +.. autoclass:: caterpillar.fields.ConstString :members: -.. autoclass:: caterpillar.py.ConstBytes +.. autoclass:: caterpillar.fields.ConstBytes :members: Special Structs --------------- -.. autoattribute:: caterpillar.py.Pass +.. autoattribute:: caterpillar.fields.Pass See source code for details -.. autoclass:: caterpillar.py.Aligned +.. autoclass:: caterpillar.fields.Aligned :members: .. versionadded:: 2.4.0 -.. autofunction:: caterpillar.py.align +.. autofunction:: caterpillar.fields.align .. versionadded:: 2.4.0 -.. autoclass:: caterpillar.py.Computed +.. autoclass:: caterpillar.fields.Computed :members: -.. autoclass:: caterpillar.py.Transformer +.. autoclass:: caterpillar.fields.Transformer :members: -.. autoclass:: caterpillar.py.Enum + .. versionchanged:: 2.5.0 + Removed ``__fmt__`` method + +.. autoclass:: caterpillar.fields.Enum :members: -.. autoclass:: caterpillar.py.Const +.. autoclass:: caterpillar.fields.Const :members: -.. autoclass:: caterpillar.py.Lazy +.. autoclass:: caterpillar.fields.Lazy :members: -.. autoclass:: caterpillar.py.Uuid +.. autoclass:: caterpillar.fields.Uuid :members: .. versionchanged:: 2.4.0 diff --git a/docs/sphinx/source/library/fields/compression.rst b/docs/sphinx/source/library/fields/compression.rst index cf7296ad..6fbbaf43 100644 --- a/docs/sphinx/source/library/fields/compression.rst +++ b/docs/sphinx/source/library/fields/compression.rst @@ -7,16 +7,16 @@ Compression Structs Basic structs ------------- -.. autoclass:: caterpillar.py.Compressed +.. autoclass:: caterpillar.fields.Compressed :members: Supported compression types --------------------------- -.. autofunction:: caterpillar.py.ZLibCompressed +.. autofunction:: caterpillar.fields.ZLibCompressed -.. autofunction:: caterpillar.py.Bz2Compressed +.. autofunction:: caterpillar.fields.Bz2Compressed -.. autofunction:: caterpillar.py.LZMACompressed +.. autofunction:: caterpillar.fields.LZMACompressed -.. autofunction:: caterpillar.py.LZOCompressed +.. autofunction:: caterpillar.fields.LZOCompressed diff --git a/docs/sphinx/source/library/fields/crypto.rst b/docs/sphinx/source/library/fields/crypto.rst index 92ad1865..e00c4b7f 100644 --- a/docs/sphinx/source/library/fields/crypto.rst +++ b/docs/sphinx/source/library/fields/crypto.rst @@ -8,14 +8,14 @@ Cryptographic Structs Hashes ------ -.. autoclass:: caterpillar.py.Algorithm +.. autoclass:: caterpillar.fields.Algorithm -.. autoclass:: caterpillar.py.Digest +.. autoclass:: caterpillar.fields.Digest -.. autoclass:: caterpillar.py.DigestField +.. autoclass:: caterpillar.fields.DigestField -.. autoclass:: caterpillar.py.DigestFieldAction +.. autoclass:: caterpillar.fields.DigestFieldAction Ciphers diff --git a/docs/sphinx/source/library/fields/field_model.rst b/docs/sphinx/source/library/fields/field_model.rst index e737cc09..c146e533 100644 --- a/docs/sphinx/source/library/fields/field_model.rst +++ b/docs/sphinx/source/library/fields/field_model.rst @@ -4,24 +4,24 @@ Field Model *********** -.. autoclass:: caterpillar.py.Field() +.. autoclass:: caterpillar.fields.Field :members: :private-members: :no-undoc-members: -.. autoclass:: caterpillar.py.FieldMixin +.. autoclass:: caterpillar.fields.FieldMixin :special-members: -.. autoclass:: caterpillar.py.FieldStruct +.. autoclass:: caterpillar.fields.FieldStruct :members: :special-members: -.. autoclass:: caterpillar.py.Chain +.. autoclass:: caterpillar.fields.Chain :members: :special-members: -.. autoclass:: caterpillar.py.If +.. autoclass:: caterpillar.fields.If :members: -.. autoclass:: caterpillar.py.ElseIf +.. autoclass:: caterpillar.fields.ElseIf :members: \ No newline at end of file diff --git a/docs/sphinx/source/library/model/struct.rst b/docs/sphinx/source/library/model/struct.rst index 6ebd8048..b9af5a03 100644 --- a/docs/sphinx/source/library/model/struct.rst +++ b/docs/sphinx/source/library/model/struct.rst @@ -29,8 +29,6 @@ Standard Interface .. autofunction:: caterpillar.model.struct -.. autofunction:: caterpillar.model.union - .. autofunction:: caterpillar.model.pack .. autofunction:: caterpillar.model.pack_into @@ -39,4 +37,10 @@ Standard Interface .. autofunction:: caterpillar.model.unpack -.. autofunction:: caterpillar.model.unpack_file \ No newline at end of file +.. autofunction:: caterpillar.model.unpack_file + +.. autofunction:: caterpillar.model.sizeof + + .. versionchanged:: 2.5.0 + + Now checks if the provided object implements the :class:`_SupportsSize` protocol \ No newline at end of file diff --git a/docs/sphinx/source/library/options.rst b/docs/sphinx/source/library/options.rst index 05c5c52e..499c56d3 100644 --- a/docs/sphinx/source/library/options.rst +++ b/docs/sphinx/source/library/options.rst @@ -4,214 +4,310 @@ Options / Flags **************** -*TODO* +This library provides a flexible system of options and flags to control various aspects +of data structures, serialization, and deserialization behaviors. Options can be set +globally or applied to specific types to fine-tune performance, memory usage, and +structure representation. Options by Type --------------- -Global options -^^^^^^^^^^^^^^ - -.. data:: caterpillar.options.O_ARRAY_FACTORY - - To control the returned array type, a factory class or method can be set - in this option using its attached value. For instance, we can incorporate - the :code:`numpy.ndarray` into our unpacked objects: - - .. code-block:: python - - from caterpillar.options import O_ARRAY_FACTORY - from numpy import array - - # just set the option's value - O_ARRAY_FACTORY.value = array +Options are grouped by the type of object they affect. +Global Options +^^^^^^^^^^^^^^ - With the new configuration applied, your unpacked objects will occupy less - memory space. The following table shows the size of unpacked objects in bytes: +.. attribute:: caterpillar.options.O_ARRAY_FACTORY + + Specifies a factory function or class for creating array instances. This allows you + to replace the default array type with a more efficient or specialized implementation. + For example, you can use :code:`numpy.ndarray` for large numeric arrays to reduce + memory usage: + + .. code-block:: python + + from caterpillar.options import O_ARRAY_FACTORY + from numpy import array + + # Set the array factory globally + O_ARRAY_FACTORY.value = array + + With this configuration, unpacked arrays will use the specified factory, potentially + leading to significant memory savings. The following table demonstrates the memory + footprint of unpacked objects under different configurations: + + .. list-table:: Object sizes for different configuration options + :header-rows: 1 + :stub-columns: 1 + :widths: 10, 15, 15 + + * - Configuration + - :code:`formats/nibarchive` + - :code:`formats/caf` [*]_ + * - Default configuration + - 26520 + - 10608 + * - :code:`__slots__` classes + - 14240 + - 3848 + * - Default configuration with :code:`numpy.ndarray` + - 7520 + - 1232 + * - :code:`__slots__` classes with :code:`numpy.ndarray` + - 6152 + - 384 + * - Original file size + - **1157** + - **5433** + +.. [*] A CAF audio file may include a chunk that contains only zeroed data. By ignoring + this chunk during unpacking, the in-memory size can be smaller than the original file. + +Sequence Options +^^^^^^^^^^^^^^^^ - .. list-table:: Object sizes between different configuration options - :header-rows: 1 - :stub-columns: 1 - :widths: 10, 15, 15 +.. note:: - * - Configuration - - :code:`formats/nibarchive` - - :code:`formats/caf` [*]_ - * - Default configuration - - 26520 - - 10608 - * - :code:`__slots__` classes - - 14240 - - 3848 - * - Default configuration and :code:`numpy.ndarray` - - 7520 - - 1232 - * - :code:`__slots__` classes and :code:`numpy.ndarray` - - 6152 - - 384 - * - Original filesize - - **1157** - - **5433** + All sequence-related options also apply to structs. -.. [*] A CAF audio comes with a special chunk type that stores only zeros. By ingoring - the data in this chunk, we can achieve less bytes in memory than the file originally used. +.. attribute:: caterpillar.options.S_DISCARD_UNNAMED -Sequence options -^^^^^^^^^^^^^^^^ + When enabled, this option discards all *unnamed* fields from the final unpacked result. + An *unnamed* field must follow the convention: -.. note:: + .. code-block:: bnf - All sequence-related configuration options are applied to structs as well. + := '_' [0-9]* + This allows you to include padding or other non-essential fields without polluting + the output. For example: -.. data:: caterpillar.options.S_DISCARD_UNNAMED + .. code-block:: python + :caption: Sequence with an unnamed field - Using this option, all *unnamed* fields will be discarded and won't be - visible in the final result object. An *unnamed* field must follow the - following naming convention: + >>> schema = Seq({ + ... "a": uint8, + ... "_": padding[10] + ... }, options={opt.S_DISCARD_UNNAMED}) + >>> data = b"\xFF" + bytes(10) + >>> unpack(schema, data) + {'a': 255} + >>> pack(_, schema) + b'\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - .. code-block:: bnf +.. data:: caterpillar.options.S_DISCARD_CONST - := '_' [0-9]* + Discards all constant fields from the final result. This is useful for fields + that serve only validation or padding purposes. - Therefore, it is possible to include more than one unnamed field, for - example: +Struct Options +^^^^^^^^^^^^^^ - .. code-block:: python - :caption: Simple sequence with an unnamed field +.. data:: caterpillar.options.S_SLOTS - >>> schema = Seq({ - ... "a": uint8, - ... "_": padding[10] - ... }, options={opt.S_DISCARD_UNNAMED}) - >>> data = b"\xFF" + bytes(10) - >>> unpack(schema, data) - {'a': 255} - >>> pack(_, schema) - b'\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + When enabled, this option generates a struct class with a :code:`__slots__` declaration. + Using :code:`__slots__` significantly reduces the per-object memory overhead by preventing + the creation of an instance :code:`__dict__`. For example: + .. code-block:: python -.. data:: caterpillar.options.S_DISCARD_CONST + @struct + class DictClass: + a: uint8 + b: uint8 + c: uint8 + d: uint8 + e: uint8 + f: uint8 + g: uint8 + + @struct(options={opt.S_SLOTS}) + class SlotsClass: + a: uint8 + b: uint8 + c: uint8 + d: uint8 + e: uint8 + f: uint8 + g: uint8 - This option will only discard constant fields. + Comparing memory usage using `pympler `_: + .. code-block:: python + :caption: Memory usage difference with :code:`__slots__` + >>> o1 = DictClass(*[0xFF]*7) + >>> asizeof.asizeof(o1) + 712 + >>> o2 = SlotsClass(*[0xFF]*7) + >>> asizeof.asizeof(o2) + 120 -Struct options -^^^^^^^^^^^^^^ + The class definition itself also occupies less memory: -.. data:: caterpillar.options.S_SLOTS + .. code-block:: python - Feature option that will create a new class with the :code:`__slots__` attribute - to lower required space. Take the following two structs into consideration: - - .. code-block:: python - - @struct - class DictClass: - a: uint8 - b: uint8 - c: uint8 - d: uint8 - e: uint8 - f: uint8 - g: uint8 - - @struct(options={opt.S_SLOTS}) - class SlotsClass: - a: uint8 - b: uint8 - c: uint8 - d: uint8 - e: uint8 - f: uint8 - g: uint8 - - Each struct stores seven fields in total, whereby :code:`Format2` uses :code:`__slots__` - to store each member. We used `pympler `_ to - retrieve the amount of occupied bytes per object: - - .. code-block:: python - :caption: Size difference between a :code:`__slots__` class and a normal Python class - - >>> o1 = DictClass(*[0xFF]*7) - >>> asizeof.asizeof(o1) - 712 - >>> o2 = SlotsClass(*[0xFF]*7) - >>> asizeof.asizeof(o2) - 120 - - In addition, the overall used memory will be reduced, because the defined type will - also occupy less memory: - - .. code-block:: python - - >>> from sys import getsizeof - >>> getsizeof(DictClass) - 1704 - >>> getsizeof(SlotsClass) - 936 + >>> from sys import getsizeof + >>> getsizeof(DictClass) + 1704 + >>> getsizeof(SlotsClass) + 936 .. data:: caterpillar.options.S_REPLACE_TYPES - This option was designed for documentation purposes only and should be - used in that context only. It will alter the class' annotations and remove - all :class:`caterpillar.fields.Field` instances. + Replaces field types in the class annotations with their native Python equivalents. + This option is primarily intended for documentation purposes. It removes all + :class:`caterpillar.fields.Field` instances from annotations. For example: - Consider the following struct: - - .. code-block:: python + .. code-block:: python @struct class Format: a: uint8 b: String(10) - c: uuid + c: Uuid - # use the following line to enable type replacement globally + # Enable type replacement globally opt.set_struct_flags(opt.S_REPLACE_TYPES) - # otherwise, just add options={opt.S_REPLACE_TYPES} to the - # @struct call. + # Or apply it directly: + @struct(options={opt.S_REPLACE_TYPES}) - You will notice the difference in the following output on disabled - type replacement (1) and enabled replacement (2): + Comparison of annotations: - .. code-block:: python + .. code-block:: python - >>> Format.__annotations__ # (1) - {'a': , 'b': , 'c': } - >>> Format.__annotations__ # (2) + >>> Format.__annotations__ # Without replacement + {'a': , 'b': , 'c': } + >>> Format.__annotations__ # With replacement {'a': , 'b': , 'c': } - .. data:: caterpillar.options.S_EVAL_ANNOTATIONS - If you decide to use :code:`from __future__ import annotations`, you have to set this - option for each struct in the scope of this import, because it will stringify all - placed annotations. Therefore, they need to be executed before analyzed. + Ensures that annotations are evaluated at runtime if you use + :code:`from __future__ import annotations`. When enabled, stringified annotations + are evaluated before analysis. - .. caution:: + .. caution:: - Use this option with caution! It may result in execution of untrusted code, be aware! + Use with care! Evaluating annotations can lead to the execution of untrusted code. .. data:: caterpillar.options.S_UNION - Internal option to add union behaviour to the :code:`caterpillar.model.Struct` class. - - + Internal option that enables union behavior for the :class:`caterpillar.model.Struct` class. -Field options +Field Options ^^^^^^^^^^^^^ .. attribute:: caterpillar.options.F_KEEP_POSITION + When enabled, this option retains the position information of each field in the + serialized data. This is the default behavior. + .. attribute:: caterpillar.options.F_DYNAMIC + Marks the field as dynamic, indicating that its size or format is determined at + runtime rather than being statically defined. This is used internally to support + advanced features like variable-length fields. + .. attribute:: caterpillar.options.F_SEQUENTIAL + Indicates that this field should be processed sequentially relative to other + fields, ensuring that order-dependent parsing or packing logic is respected. + This is primarily for internal use in complex layouts. + .. attribute:: caterpillar.options.F_OFFSET_OVERRIDE + Allows you to override the calculated offset for this field within its parent + structure. This means, the offset used by the :meth:`struct.__matmul__` + operation will be used from here on. + +Bit-field Options +^^^^^^^^^^^^^^^^^ + +Bit-field options provide fine-grained control over the alignment, grouping, +and interpretation of individual fields and entire bit-field classes. + + +.. attribute:: caterpillar.options.B_OVERWRITE_ALIGNMENT + + Replaces the current alignment with the alignment explicitly defined by the field. + This option is applicable only to fields within a bit-field class. + + For example, the following structure overrides the default alignment of 8 bits + with a 32-bit alignment for the specified field: + + .. code-block:: python + + @bitfield + class Format: + # Override alignment from 8 bits to 32 bits + a: 4 - uint32 | B_OVERWRITE_ALIGNMENT + + + .. versionadded:: 2.5.0 + +.. attribute:: caterpillar.options.B_GROUP_END + + Adds the annotated field to the current bit-field group and immediately aligns + the group's total size according to the active alignment constraints. + This option is applicable only to fields. + + For example, to avoid automatic alignment to the next multiple of 16 bits: + + .. code-block:: python + + @bitfield + class Format: + a1: 4 + a2: 3 + # Prevent automatic alignment to 16 bits: + a3: (1, B_GROUP_END) + b1: 1 + ... + + .. versionadded:: 2.5.0 + +.. attribute:: caterpillar.options.B_GROUP_NEW + + Finalizes the current bit-field group by aligning it, then starts a new group + and adds the annotated field to this new group. + This option is applicable only to fields. + + Example usage where a new group is started with its own alignment: + + .. code-block:: python + + @bitfield + class Format: + a1: 4 + a2: 4 + # Finalize current group and start new one with 8-bit alignment: + b1: (1, B_GROUP_NEW) + + .. versionadded:: 2.5.0 + +.. attribute:: caterpillar.options.B_GROUP_KEEP + + *Applicable only to classes.* + + When applied at the class level, this option instructs the bit-field structure + to preserve existing group alignments throughout parsing and packing. + It affects how alignment statements are interpreted within the class body. + + .. versionadded:: 2.5.0 + +.. attribute:: caterpillar.options.B_NO_AUTO_BOOL + + *Applicable only to classes.* + + Prevents the automatic assignment of a boolean type factory for fields + that are exactly one bit in size. By default, one-bit fields are treated + as boolean values; enabling this option disables that behavior, preserving + the raw integer representation instead. + + .. versionadded:: 2.5.0 + + Interface From c7c913ce7a970270d5ee5bd217b84d0ff412bee8 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sun, 29 Jun 2025 09:07:26 +0200 Subject: [PATCH 35/41] Fix bitfield behaviour for B_GROUP_KEEP Option. --- + Remove annotations from shared.py + Change warning for conditional context to be displayed only when entering the condition with __enter__ + Update pack and unpack examples in doc-comments --- src/caterpillar/context.py | 10 +++++----- src/caterpillar/fields/conditional.py | 10 ---------- src/caterpillar/model/_bitfield.py | 4 +++- src/caterpillar/model/_struct.py | 4 ++++ src/caterpillar/shared.py | 7 +------ 5 files changed, 13 insertions(+), 22 deletions(-) diff --git a/src/caterpillar/context.py b/src/caterpillar/context.py index 223e09f1..fe8fd1c6 100755 --- a/src/caterpillar/context.py +++ b/src/caterpillar/context.py @@ -233,11 +233,6 @@ class Format: __slots__ = "func", "annotations", "namelist", "depth" def __init__(self, condition, depth=2): - if (sys.version_info.major, sys.version_info.minor) >= (3, 14): - warnings.warn( - "Python3.14 breaks support for Contitional fields. Conditional " - "statements must be defined manually until a fix has been released." - ) self.func = condition self.annotations = None self.namelist = None @@ -250,6 +245,11 @@ def getframe(self, num: int, msg=None) -> FrameType: raise StructException(msg) from exc def __enter__(self) -> Self: + if (sys.version_info.major, sys.version_info.minor) >= (3, 14): + warnings.warn( + "Python3.14 breaks support for Contitional fields. Conditional " + "statements must be defined manually until a fix has been released." + ) frame = self.getframe(self.depth, "Could not enter condition context!") # keep track of all annotations try: diff --git a/src/caterpillar/fields/conditional.py b/src/caterpillar/fields/conditional.py index e9bb5a4c..d16b82b0 100755 --- a/src/caterpillar/fields/conditional.py +++ b/src/caterpillar/fields/conditional.py @@ -12,13 +12,8 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . -import sys -import warnings - from typing import Union, Any from typing import Optional -from caterpillar.abc import _ContextLambda, _StructLike -from caterpillar.abc import _ContextLike from caterpillar.context import ConditionContext from caterpillar.exception import ValidationError from caterpillar.shared import typeof @@ -42,11 +37,6 @@ class definition. While this class can't be used in class definitions, __slots__ = "chain", "conditions" def __init__(self, struct, condition) -> None: - if (sys.version_info.major, sys.version_info.minor) >= (3, 14): - warnings.warn( - "Python3.14 breaks support for Contitional fields. Conditional " - "statements must be defined manually until a fix has been released." - ) self.chain = {} self.conditions = [] self.add(struct, condition) diff --git a/src/caterpillar/model/_bitfield.py b/src/caterpillar/model/_bitfield.py index 30419949..8363bd51 100755 --- a/src/caterpillar/model/_bitfield.py +++ b/src/caterpillar/model/_bitfield.py @@ -617,9 +617,11 @@ def _process_align(self, options) -> Field: :rtype: Field """ # 2.: the current group will be finalized + self._current_group.align_to(self._current_alignment) if not self.has_option(B_GROUP_KEEP): - self._current_group.align_to(self._current_alignment) self._current_group = self._new_group(self._current_alignment) + else: + self._bit_pos = self._current_group.bit_count for option in options or []: if self._process_alignment_option(option): diff --git a/src/caterpillar/model/_struct.py b/src/caterpillar/model/_struct.py index e6817725..f5350fd0 100755 --- a/src/caterpillar/model/_struct.py +++ b/src/caterpillar/model/_struct.py @@ -425,6 +425,7 @@ def pack_into( data is written to the `buffer`. Example 1: Packing an object into a bytes buffer + >>> buffer = BytesIO() >>> my_obj = SomeObject() # Assume SomeObject is a valid object to be packed >>> pack_into(my_obj, buffer, struct=SomeStruct()) # Using a specific struct @@ -432,10 +433,12 @@ def pack_into( b"..." Example 2: Packing into a file-like stream (e.g., file) + >>> with open('packed_data.bin', 'wb') as f: ... pack_into(my_obj, f, struct=SomeStruct()) # Pack into a file Example 3: Using `as_field` to wrap the struct in a Field before packing + >>> buffer = BytesIO() >>> pack_into(42, buffer, struct=uint8, as_field=True) >>> buffer.getvalue() @@ -546,6 +549,7 @@ def unpack( context as attributes. Example: + >>> buffer = b'\\x00\\x01\\x02\\x03' >>> struct = SomeStruct() >>> unpack(struct, buffer) diff --git a/src/caterpillar/shared.py b/src/caterpillar/shared.py index 4a2c78e6..c21e4df7 100644 --- a/src/caterpillar/shared.py +++ b/src/caterpillar/shared.py @@ -99,12 +99,7 @@ class Action: __slots__ = (ATTR_ACTION_PACK, ATTR_ACTION_UNPACK) - def __init__( - self, - pack: _ContextLambda | None = None, - unpack: _ContextLambda | None = None, - both: _ContextLambda | None = None, - ) -> None: + def __init__(self, pack=None, unpack=None, both=None) -> None: self.__action_pack__ = pack self.__action_unpack__ = unpack if both is not None: From 2cc030d27abf2fa73f9438b6eec93638e5ab7cc6 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sun, 29 Jun 2025 11:41:42 +0200 Subject: [PATCH 36/41] Update bitfield tutorial --- + Update README text + Change bitfield example to use extended syntax --- README.md | 32 +++-- docs/sphinx/source/conf.py | 1 - .../datamodel/protocols/structlike.rst | 4 +- .../source/tutorial/advanced/bitfield.rst | 133 +++++++++++++----- examples/bitfield_example.py | 7 +- 5 files changed, 123 insertions(+), 54 deletions(-) diff --git a/README.md b/README.md index c7470933..4a22ef5e 100644 --- a/README.md +++ b/README.md @@ -10,15 +10,8 @@ > [!WARNING] > This project is still in beta/testing phase. Expect bugs, naming changes and errors while using this -> library. C API Reference is WIP, C extensions are supported since v2.1.0. - -> [!NOTE] -> Python 3.14 breaks `with` statements in class definitions since `__annotations__` are added at the end -> of a class definition. Therefore, `Digest` and conditional statements **ARE NOT SUPPORTED** in using the `with` syntax Python 3.14+. -> As of version `2.4.5` the `Digest` class has a counterpart (`DigestField`), which can be used to manually specify a digest without -> the need of a `ẁith` statement. - - +> library. C API Reference is WIP, C extensions are supported since v2.1.0. The latest stable release +> is available at PyPI. Caterpillar is a Python 3.12+ library to pack and unpack structurized binary data. It enhances the capabilities of [Python Struct](https://docs.python.org/3/library/struct.html) @@ -27,7 +20,7 @@ options will be added in the future. Documentation is [here >](https://matrixedi *Caterpillar* is able to: -* Pack and unpack data just from processing Python class definitions (including support for bitfields, c++-like templates and c-like unions!), +* Pack and unpack data just from processing Python class definitions (including support for powerful bitfields, c++-like templates and c-like unions!), * apply a wide range of data types (with endianess and architecture configuration), * dynamically adapt structs based on their inheritance layout, * reduce the used memory space using `__slots__`, @@ -36,6 +29,13 @@ options will be added in the future. Documentation is [here >](https://matrixedi * it helps you to create cleaner and more compact code. * You can even extend Caterpillar and write your parsing logic in C or C++!! +> [!NOTE] +> Python 3.14 breaks `with` statements in class definitions since `__annotations__` are added at the end +> of a class definition. Therefore, `Digest` and conditional statements **ARE NOT SUPPORTED** using the `with` syntax in Python 3.14+. +> As of version `2.4.5` the `Digest` class has a counterpart (`DigestField`), which can be used to manually specify a digest without +> the need of a `ẁith` statement. + + ## Give me some code! ```python @@ -49,12 +49,14 @@ class Format: length: uint8 # String fields with computed lengths name: String(this.length) # -> you can also use Prefixed(uint8) - # wraps all following fields and creates a new attr - # only for Python <= 3.13 - with Md5(name="hash", verify=True): - # Sequences with prefixed, computed lengths - names: CString[uint8::] + # custom actions, e.g. for hashes + _hash_begin: DigestField.begin("hash", Md5_Algo) + + # Sequences with prefixed, computed lengths + names: CString[uint8::] + # automatic hash creation and verification + default value + hash: Md5_Field("hash", verify=True) = None # Instantiation (keyword-only arguments, magic is auto-inferred): obj = Format(a=1, b=2, length=3, name="foo", names=["a", "b"]) diff --git a/docs/sphinx/source/conf.py b/docs/sphinx/source/conf.py index 1f3c7271..f56735d8 100644 --- a/docs/sphinx/source/conf.py +++ b/docs/sphinx/source/conf.py @@ -79,7 +79,6 @@ "logo": { "text": f"Caterpillar {version}", }, - "announcement": "https://raw.githubusercontent.com/MatrixEditor/caterpillar/master/docs/sphinx/source/_templates/announcement.html", } html_sidebars = { "installing/index": [], diff --git a/docs/sphinx/source/reference/datamodel/protocols/structlike.rst b/docs/sphinx/source/reference/datamodel/protocols/structlike.rst index 42669998..55413e0a 100644 --- a/docs/sphinx/source/reference/datamodel/protocols/structlike.rst +++ b/docs/sphinx/source/reference/datamodel/protocols/structlike.rst @@ -9,6 +9,8 @@ and :func:`unpack` allow so-called *partial* struct-like objects, there won't be within struct class definitions. It is always recommended to implement all methods conforming to the :code:`_StructLike` protocol. +.. _struct_type: + Special Methods for Struct-Like objects --------------------------------------- @@ -60,8 +62,6 @@ Special Methods for Struct-Like objects for the explicit definition of dynamic structs, the option to raise a :class:`DynamicSizeError` is provided. -.. _struct_type: - .. method:: object.__type__(self) The configuration of *Structs* incorporates type replacement before a dataclass is diff --git a/docs/sphinx/source/tutorial/advanced/bitfield.rst b/docs/sphinx/source/tutorial/advanced/bitfield.rst index 4de922fe..6bef26a4 100644 --- a/docs/sphinx/source/tutorial/advanced/bitfield.rst +++ b/docs/sphinx/source/tutorial/advanced/bitfield.rst @@ -3,25 +3,97 @@ Bitfields ========= -.. attention:: - This section is still under development. +*Bitfields* are a compact way to pack multiple fields into a fixed-size binary +representation while also allowing dynamic structs in-between. -**BitFields** are a specialized feature in *Caterpillar* that allow you to define -fields at the bit level within a struct. This is particularly useful when dealing -with compact binary formats, such as network protocols, file formats, or hardware -interfaces where each bit has a specific meaning. +.. versionchanged:: 2.5.0 -BitFields allow you to specify the number of bits allocated for each field and -provide the ability to fine-tune how data is stored and retrieved from a binary -stream. This feature is useful when you need to work with bit-level manipulations, -such as when defining flags, options, or small data values packed into a single -byte or multiple bytes. + Completely reworked the concept of a *Bit-field*. The reference explains the + new concept and its implementation rules in detail: :ref:`datamodel_standard_bitfield`. -Syntax ------- + In short, the new system supports: -In *Caterpillar*, BitFields are defined using the :code:`@bitfield` decorator, and the -individual fields are specified with their respective bit widths. + - Dynamic grouping: Each Bitfield can have multiple *dynamic* bitfield groups. + - Struct-like fields: Full struct classes can be embedded. + - **Custom alignment and type factories.** + - Flexible syntax with 5 powerful rules. + + +Core Concepts +------------- + +Each Bitfield instance contains one or more *bitfield groups*. A +group is simply a collection of *bitfield entries* that are packed together based on +alignment constraints or a single :class:`~caterpillar.fields.Field`. + +How it works: Groups and Fields +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When you define a Bitfield class, each field line describes either a simple bit-sized +entry or a more complex structure. The fields are automatically grouped based on pre-defined +syntax definitions and processing rules. + +Here is the general idea: + +- Bitfield groups collect fields until the group is finalized (e.g. because of an alignment boundary or a struct-like field). +- Within a group, fields may consume only part of a byte, or a whole number of bytes. +- Once a group is finalized, any new fields start a new group. + +This approach means you don't have to manually calculate where each field starts and ends. Instead, +the system infers layout while giving you the tools to align fields precisely when needed. Neat, isn't it? + + +The Syntax +---------- + +Each field is defined using one of five patterns: + +1. Basic fixed-size bits field: + + A standard field defines a name and the number of bits it occupies. Example:: + + @bitfield + class Format: + flag : 1 # means the field flag uses 1 bit. + +2. Basic fixed-size bits field with type from struct: + + You can specify how the bits should be interpreted using a type that will be + resolved from the given struct using the :class:`_SupportsType` protocol:: + + @bitfield + class Format: + valid : 1 - boolean # the 1-bit field should be interpreted as a bool + +3. Alignment: + + If you define a field with a size of ``0``, it forces the Bitfield to align the current group + to the next byte boundary. Afterwards, the current group is finalized and a new one is started:: + + @bitfield + class Format: + flag : 1 + _ : 0 # align to 8bits + valid : 1 - boolean # size will be 2x 8bits + +4. Struct-like Field: + + You can embed an entire struct-like class inside the Bitfield. (Any object conforming to the + :class:`_StructLike` protocol). + +5. Custom Factory with Options: + + For advanced cases, you can specify not only the number of bits and a factory (a type converter) + but also additional options to fine-tune grouping or alignment:: + + @bitfield + class Format: + # 7bits, converted to char and alignment set to 24bits + name : (7, CharFactory, SetAlignment(24)) + + +Practical Example +----------------- One practical example of using BitFields is implementing the chunk-naming convention for PNG files. Here's how you might define the options for a chunk using a bitfield structure: @@ -29,27 +101,24 @@ for PNG files. Here's how you might define the options for a chunk using a bitfi .. code-block:: python :caption: Implementing the `chunk-naming `_ convention - @bitfield(options={S_DISCARD_UNNAMED}) - class ChunkOptions: - _ : 2 # <-- first two bits are not used - ancillary : 1 # f0 - _1 : 0 - _2 : 2 - private : 1 # <-- the 5-th bit (from right to left) - _3 : 0 - _4 : 2 - reserved : 1 # f2 - _5 : 0 # <-- padding until the end of this byte - _6 : 2 - safe_to_copy : 1 # f3 + @bitfield(opions={S_DISCARD_UNNAMED}) + class ChunkOption: + _ : 2 # <-- first two bits are not used + value : 1 # automatically boolean + @struct + class ChunkOptions: + ancillary : ChunkOption # f0 + private : ChunkOption # f1 + reserved : ChunkOption # f2 + safe_to_copy : ChunkOption # f3 In the example above, each field within the :code:`ChunkOptions` class is assigned -a specific number of bits: +8bits according to the :code:`ChunkOption` bitfield. - The first two bits (:code:`_`) are unused (or "unnamed"). -- The :code:`ancillary` field uses 1 bit, representing a flag. -- Other fields like :code:`private`, :code:`reserved`, and :code:`safe_to_copy` are allocated 1 or 2 bits as needed. +- The :code:`value` field uses 1 bit, representing a flag (automatically boolean type). +- all other bits are ignored automatically Here's a breakdown of how the bits are laid out in memory: @@ -59,7 +128,7 @@ Here's a breakdown of how the bits are laid out in memory: bit : 76543210 76543210 76543210 76543210 ---------------------------------------------- breakdown: 00100000 00100000 00100000 00100000 - \/|\___/ \/|\___/ \/|\___/ \/|\___/ + ├┘│└─┬─┘ ├┘│└─┬─┘ ├┘│└──┬┘ ├┘│└─┬─┘ u f0 a u f1 a u f2 a u f3 a Where: diff --git a/examples/bitfield_example.py b/examples/bitfield_example.py index ba8b35b5..0f209698 100644 --- a/examples/bitfield_example.py +++ b/examples/bitfield_example.py @@ -1,6 +1,5 @@ # type: ignore -from caterpillar.fields.common import uint8 -from caterpillar.py import bitfield, char, int8, unpack, pack +from caterpillar.py import bitfield, CharFactory, int8, unpack, pack try: from rich import print @@ -12,7 +11,7 @@ class Format: b1: 1 # inferred uint8 type with a width of one bit _: 0 # start new uint8 with 7 unused bits - b2: 2 - char # wraps parsed int to char (string) + b2: (2, CharFactory) # wraps parsed int to char (string) b3: 3 - int8 = 1 # default value is applied -> REVISIT: necessary? _1: 3 # unnamed padding to the rest of the byte @@ -20,7 +19,7 @@ class Format: print(Format.__struct__.groups) obj = unpack(Format, b"\x80\x80") print(obj) -# prints: Format(b1=1, b2='2', b3=0, _1=0) +# prints: Format(b1=1, b2='\x02', b3=0, _1=0) # real_pos: 0123456701234567 # bit_pos: 7654321076543210 # ---------------- # right to left From e3f0500d13de18e349f2cb80a5cad7fa84ff0e98 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sun, 29 Jun 2025 11:48:27 +0200 Subject: [PATCH 37/41] Fix SetAlignment option and option parsing in bitfield --- src/caterpillar/model/_bitfield.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/caterpillar/model/_bitfield.py b/src/caterpillar/model/_bitfield.py index 8363bd51..b35815e2 100755 --- a/src/caterpillar/model/_bitfield.py +++ b/src/caterpillar/model/_bitfield.py @@ -12,6 +12,7 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . +from collections.abc import Iterable from caterpillar.fields.common import Int from caterpillar.shared import ( ATTR_ACTION_PACK, @@ -765,11 +766,13 @@ def _new_group(self, alignment): def _process_alignment_option(self, option): if isinstance(option, SetAlignment): # update current working alignment - self._current_alignment = option.alignment + self._current_alignment = option.alignment or DEFAULT_ALIGNMENT + self._current_group.align_to(self._current_alignment) return True elif isinstance(option, Flag): if option.name == "bitfield.new_alignment": self._current_alignment = option.value or DEFAULT_ALIGNMENT + self._current_group.align_to(self._current_alignment) return True return False @@ -813,9 +816,16 @@ def _process_field(self, name: str, annotation, default): else: factory = factory_or_option() else: - # treat as option - options = [factory_or_option] - options.extend(extra_options) + # treat as option or as a list of options + options = ( + [factory_or_option] + if not isinstance(factory_or_option, Iterable) + else list(factory_or_option) + ) + + # extra options may be a list or single element + for extra in extra_options: + options.extend(extra if isinstance(extra, Iterable) else [extra]) if isinstance(width, int): # rule no. 5 From 08d58922dcd66f968fc9ecc08cb39a3ca4761e54 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sun, 29 Jun 2025 12:03:25 +0200 Subject: [PATCH 38/41] Update version badges in README --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4a22ef5e..a8ffe1c9 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # Caterpillar - 🐛 -[![python](https://img.shields.io/python/required-version-toml?tomlFilePath=https%3A%2F%2Fraw.githubusercontent.com%2FMatrixEditor%2Fcaterpillar%2Fmaster%2Fpyproject.toml&logo=python)](https://www.python.org/downloads/) +[![python](https://img.shields.io/badge/Python-3.12+-blue?logo=python&logoColor=yellow)](https://www.python.org/downloads/) +![![Latest Version](https://pypi.org/project/caterpillar-py/)](https://img.shields.io/github/v/release/MatrixEditor/caterpillar.svg?logo=github) [![Build and Deploy Docs](https://github.com/MatrixEditor/caterpillar/actions/workflows/python-sphinx.yml/badge.svg)](https://github.com/MatrixEditor/caterpillar/actions/workflows/python-sphinx.yml) [![Run Tests](https://github.com/MatrixEditor/caterpillar/actions/workflows/python-test.yml/badge.svg)](https://github.com/MatrixEditor/caterpillar/actions/workflows/python-test.yml) ![GitHub issues](https://img.shields.io/github/issues/MatrixEditor/caterpillar?logo=github) From 79e7ac0c02476516b73e3f2920ec1cc232e3aa55 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sun, 29 Jun 2025 12:07:11 +0200 Subject: [PATCH 39/41] Rename _Switch to _SwitchLike in stub files --- src/caterpillar/abc.pyi | 2 +- src/caterpillar/fields/_base.pyi | 8 ++++---- src/caterpillar/fields/_mixin.pyi | 4 ++-- src/caterpillar/model/_template.pyi | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/caterpillar/abc.pyi b/src/caterpillar/abc.pyi index b306c327..e1af0461 100755 --- a/src/caterpillar/abc.pyi +++ b/src/caterpillar/abc.pyi @@ -85,7 +85,7 @@ class _SupportsUnpack(Protocol[_OT_co]): def __unpack__(self, context: _ContextLike) -> _OT_co: ... _SwitchLambda = Callable[[Any, _ContextLike], _StructLike[_IT, _OT]] -_Switch = Union[ +_SwitchLike = Union[ dict[Any, Union[_ContainsStruct[_IT, _OT], _StructLike[_IT, _OT]]], _SwitchLambda[_IT, _OT], ] diff --git a/src/caterpillar/fields/_base.pyi b/src/caterpillar/fields/_base.pyi index ea78de10..55f37245 100755 --- a/src/caterpillar/fields/_base.pyi +++ b/src/caterpillar/fields/_base.pyi @@ -19,7 +19,7 @@ from caterpillar.abc import ( _GreedyType, _PrefixedType, _StructLike, - _Switch, + _SwitchLike, _IT, _OT, _LengthT, @@ -47,7 +47,7 @@ class Field(_StructLike[_IT, _OT]): offset: _ContextLambda | int flags: dict[int, Flag] amount: _LengthT - options: Optional[_Switch[_IT, _OT]] + options: Optional[_SwitchLike[_IT, _OT]] condition: _ContextLambda | bool arch: Arch default: _OT | None @@ -59,7 +59,7 @@ class Field(_StructLike[_IT, _OT]): offset: _ContextLambda | int = -1, flags: Optional[set[Flag]] = None, amount: _ContextLambda | int | _PrefixedType = 0, - options: _Switch | dict[Any, _StructLike] | None = None, + options: _SwitchLike | dict[Any, _StructLike] | None = None, condition: _ContextLambda | bool = True, arch: Optional[Arch] = None, default: _OT | None = ..., @@ -69,7 +69,7 @@ class Field(_StructLike[_IT, _OT]): def __xor__(self, flag: Flag) -> Self: ... def __matmul__(self, offset: _ContextLambda | int) -> Self: ... def __getitem__(self, dim: _LengthT) -> Self: ... - def __rshift__(self, switch: _Switch[_IT, _OT]) -> Self: ... + def __rshift__(self, switch: _SwitchLike[_IT, _OT]) -> Self: ... def __floordiv__(self, condition: _ContextLambda | bool) -> Self: ... def __rsub__(self, bits: _ContextLambda | int) -> Self: ... def __set_byteorder__(self, order: ByteOrder) -> Self: ... diff --git a/src/caterpillar/fields/_mixin.pyi b/src/caterpillar/fields/_mixin.pyi index f79342b5..a616c445 100755 --- a/src/caterpillar/fields/_mixin.pyi +++ b/src/caterpillar/fields/_mixin.pyi @@ -17,7 +17,7 @@ from caterpillar.abc import ( _ContextLambda, _ContextLike, _StructLike, - _Switch, + _SwitchLike, _LengthT, _IT, _OT, @@ -44,7 +44,7 @@ class FieldMixin(Generic[_IT, _OT]): def __xor__(self, flag: Flag) -> Field[_IT, _OT]: ... def __matmul__(self, offset: _ContextLambda | int) -> Field[_IT, _OT]: ... def __getitem__(self, dim: _LengthT) -> Field[Collection[_IT], Collection[_OT]]: ... - def __rshift__(self, switch: _Switch) -> Field[_IT, _OT]: ... + def __rshift__(self, switch: _SwitchLike) -> Field[_IT, _OT]: ... def __floordiv__(self, condition: _ContextLambda | bool) -> Field[_IT, _OT]: ... def __set_byteorder__(self, order: ByteOrder) -> Field[_IT, _OT]: ... def __rsub__(self, bits: _ContextLambda | int) -> Field[_IT, _OT]: ... diff --git a/src/caterpillar/model/_template.pyi b/src/caterpillar/model/_template.pyi index e5ef5de3..c26cdea4 100755 --- a/src/caterpillar/model/_template.pyi +++ b/src/caterpillar/model/_template.pyi @@ -4,7 +4,7 @@ from caterpillar.abc import ( _LengthT, _StructLike, _ContextLambda, - _Switch, + _SwitchLike, ) from caterpillar.byteorder import ByteOrder, Arch from caterpillar.fields._base import Field @@ -19,7 +19,7 @@ class TemplateTypeVar: field_kwds: dict[str, Any] def __init__(self, name: str, **field_kwds) -> None: ... def __getitem__(self, amount: _LengthT) -> TemplateTypeVar: ... - def __rshift__(self, switch: dict | _Switch) -> TemplateTypeVar: ... + def __rshift__(self, switch: dict | _SwitchLike) -> TemplateTypeVar: ... def __matmul__(self, offset: int | _ContextLambda) -> TemplateTypeVar: ... def __set_byteorder__(self, order: ByteOrder) -> TemplateTypeVar: ... def __rsub__(self, bits: int | _ContextLambda) -> TemplateTypeVar: ... From f13ddd5e480917fa16748a5755673afca6156426 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sun, 29 Jun 2025 12:23:12 +0200 Subject: [PATCH 40/41] Fix bitfield alignment on last group --- + Update README example --- README.md | 43 +++++++++++++++++------------- src/caterpillar/model/_bitfield.py | 1 + 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index a8ffe1c9..7627ce83 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # Caterpillar - 🐛 [![python](https://img.shields.io/badge/Python-3.12+-blue?logo=python&logoColor=yellow)](https://www.python.org/downloads/) -![![Latest Version](https://pypi.org/project/caterpillar-py/)](https://img.shields.io/github/v/release/MatrixEditor/caterpillar.svg?logo=github) +![![Latest Version](https://pypi.org/project/caterpillar-py/)](https://img.shields.io/github/v/release/MatrixEditor/caterpillar.svg?logo=github&label=Latest+Version) [![Build and Deploy Docs](https://github.com/MatrixEditor/caterpillar/actions/workflows/python-sphinx.yml/badge.svg)](https://github.com/MatrixEditor/caterpillar/actions/workflows/python-sphinx.yml) [![Run Tests](https://github.com/MatrixEditor/caterpillar/actions/workflows/python-test.yml/badge.svg)](https://github.com/MatrixEditor/caterpillar/actions/workflows/python-test.yml) ![GitHub issues](https://img.shields.io/github/issues/MatrixEditor/caterpillar?logo=github) @@ -40,35 +40,42 @@ options will be added in the future. Documentation is [here >](https://matrixedi ## Give me some code! ```python -from caterpillar.py import * +@bitfield(order=LittleEndian) +class Header: + version : 4 # 4bit integer + valid : 1 # 1bit flag (boolean) + ident : (8, CharFactory) # 8bit char + # automatic alignment to 16bits @struct(order=LittleEndian) class Format: - magic: b"ITS MAGIC" # Supports string and byte constants directly - a: uint8 # Primitive data types - b: int32 - length: uint8 # String fields with computed lengths - name: String(this.length) # -> you can also use Prefixed(uint8) + magic : b"ITS MAGIC" # Supports string and byte constants directly + header : Header + a : uint8 # Primitive data types + b : int32 + length : uint8 # String fields with computed lengths + name : String(this.length) # -> you can also use Prefixed(uint8) - # custom actions, e.g. for hashes - _hash_begin: DigestField.begin("hash", Md5_Algo) - - # Sequences with prefixed, computed lengths - names: CString[uint8::] - - # automatic hash creation and verification + default value - hash: Md5_Field("hash", verify=True) = None + _hash_begin : DigestField.begin("hash", Md5_Algo) # custom actions, e.g. for hashes + names : CString[uint8::] # Sequences with prefixed, computed lengths + hash : Md5_Field("hash", verify=True) = None # automatic hash creation and verification + default value # Instantiation (keyword-only arguments, magic is auto-inferred): -obj = Format(a=1, b=2, length=3, name="foo", names=["a", "b"]) +obj = Format( + header=Header(version=2, valid=True, ident="F"), + a=1, + b=2, + length=3, + name="foo", + names=["a", "b"] +) # Packing the object, reads as 'PACK obj FROM Format' # objects of struct classes can be packed right away blob = pack(obj, Format) -# results in: b'ITS MAGIC\x01\x02\x00\x00\x00\x03foo\x02a\x00b\x00\xf55... +# results in: b'ITS MAGIC0*\x01\x02\x00\x00\x00\x03foo\x02a\x00b\x00)\x9a...' # Unpacking the binary data, reads as 'UNPACK Format FROM blob' obj2 = unpack(Format, blob) -assert obj2.hash == obj.hash ``` This library offers extensive functionality beyond basic struct handling. For further details diff --git a/src/caterpillar/model/_bitfield.py b/src/caterpillar/model/_bitfield.py index b35815e2..96248184 100755 --- a/src/caterpillar/model/_bitfield.py +++ b/src/caterpillar/model/_bitfield.py @@ -580,6 +580,7 @@ def __init__( self.options.update(GLOBAL_BITFIELD_FLAGS) self.groups = [group for group in self.groups if not group.is_empty()] + self.groups[-1].align_to(self._current_alignment) # REVISIT: should be enable modification after processing? del self._bit_pos del self._current_alignment From a10fff40b53531092e7d50e38536fe996e54f9b6 Mon Sep 17 00:00:00 2001 From: MatrixEditor <58256046+MatrixEditor@users.noreply.github.com> Date: Sun, 29 Jun 2025 12:29:49 +0200 Subject: [PATCH 41/41] 2.5.0 Minor Release --- ## Changes made The code of all modules were refactored to include as little type hints as possible now. All typing related information should be taken from `.pyi` files. ##### `caterpillar.abc` - Removed `_Action` and split into two separate Protocols `_ActionLike` := `_SupportsActionUnpack` | `_SupportsActionPack` - Renamed `_Switch` to `_SwitchLike` - Removed `_EnumLike` - Added two new protocols: '_SupportsBits' and '_ContainsBits' - The following attributes and methods were moved into [caterpillar.shared](#caterpillarshared): `STRUCT_FIELD` -> `ATTR_STRUCT`, `hasstruct`, `getstruct` and `typeof` - Removed unused '_ContextPathStr' - Removed `__type__()` requirement from '_StructLike' - Added new protocol: `_SupportsType` ##### `caterpillar.byteorder` - Moved 'BYTEORDER_FIELD' to [caterpillar.shared](#caterpillarshared) as `ATTR_BYTEORDER` ##### `caterpillar.shortcuts` - Shortcuts now include `typeof`, `to_struct`, `hasstruct`, `getstruct` and `sizeof` ##### `caterpillar.shared` - New constants moved from other modules: `ATTR_BYTEORDER`, `ATTR_TYPE`, `ATTR_BITS`, `ATTR_SIGNED`, `ATTR_TEMPLATE` ##### `caterpillar.context` - New context attribute: '_root' can be set to point to the root context instance. Internally, instead of a for-loop that iterates through parent context instances, a simple self.get(...) call is made. (see #35) ##### `caterpillar.model._base` - Fixed an issue when parsing union objects with an unbound stream object - Fixed an issue where field options defined in Sequences were not populated when creating fields. ##### `caterpillar.model._struct` - `sizeof` now checks if the provided object implements the `_SupportsSize` protocol ##### `caterpillar.model._bitfield` - **Completely reworked the bitfield mechanism to make it even more powerful. For details refer to #34.** - Fixed an issue where field options defined in BitFields were not populated when creating fields. - Moved `BITS_ATTR` and `SIGNED_ATTR` into [caterpillar.shared](#caterpillarshared) - Removed unused `getformat` function ##### `caterpillar.fields.common` - `Transformer`: removed `__fmt__` method ##### `caterpillar.fields.compression` - Updated public compression methods to use lazy imports #### Documentation - Updated library documentation to reflect changes made to function signatures - Updated the reference to explicitly state the Protocols defined in `caterpillar.abc` - Created a change log to reflect changes made up to v2.5.0 ## Associated Issues and Pull Requests - #30: Stub files for Caterpillar allowing to use generics and more detailed return types - #27: Python3.14 does NOT break this project. However, some features (with-statements) can't be used - #34: Introducing a new Bitfield concept to make it even more flexible - #35: Optimized resolving the root context instance - #37: Various small fixes related to the documentation - #24: Bitfield docs - #33: General documentation updates --- pyproject.toml | 2 +- src/caterpillar/__init__.py | 2 +- src/ccaterpillar/pyproject.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index db17b11d..09defc94 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ cmake.source-dir = "." [project] name = "caterpillar" -version = "2.5.0-rc" +version = "2.5.0" description="Library to pack and unpack structurized binary data." authors = [{ name = "MatrixEditor" }] diff --git a/src/caterpillar/__init__.py b/src/caterpillar/__init__.py index a2ec8c48..4b3eddee 100644 --- a/src/caterpillar/__init__.py +++ b/src/caterpillar/__init__.py @@ -14,7 +14,7 @@ # along with this program. If not, see . import warnings -__version__ = "2.5.0-rc" +__version__ = "2.5.0" __release__ = None __author__ = "MatrixEditor" diff --git a/src/ccaterpillar/pyproject.toml b/src/ccaterpillar/pyproject.toml index aa5f5543..21f4a5dd 100644 --- a/src/ccaterpillar/pyproject.toml +++ b/src/ccaterpillar/pyproject.toml @@ -18,7 +18,7 @@ CP_ENABLE_NATIVE = "1" [project] name = "caterpillar" -version = "2.5.0-rc" +version = "2.5.0" description = "Library to pack and unpack structurized binary data." readme = "../../README.md"