diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index 0156440d..99e9c54f 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -9,7 +9,7 @@ jobs: fail-fast: true matrix: os: ["ubuntu-latest", "windows-latest", "macos-latest"] - python-version: ["3.12"] + python-version: ["3.12", "3.13"] steps: - name: Checkout source diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..2ba7c69c --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,56 @@ +# Changelog + +## [2.5.0] - Minor Release + +### Added +- **caterpillar.abc** + - Add `_SupportsBits` protocol. + - Add `_ContainsBits` protocol. + - Add `_SupportsType` protocol. + +- **caterpillar.shortcuts** + - Add new shortcuts: `typeof`, `to_struct`, `hasstruct`, `getstruct`, and `sizeof`. + +- **caterpillar.shared** + - Add new constants from other modules: `ATTR_BYTEORDER`, `ATTR_TYPE`, `ATTR_BITS`, `ATTR_SIGNED`, `ATTR_TEMPLATE`. + +- **caterpillar.context** + - Add context attribute `_root` to point to the root context instance. Internal parent iteration now uses `self.get(...)`. + +--- + +### Removed +- **caterpillar.abc** + - Remove `_Action` protocol; split into `_ActionLike = _SupportsActionUnpack | _SupportsActionPack`. + - Remove `__type__` requirement from `_StructLike`. + - **Breaking:** Remove `_EnumLike` and `_ContextPathStr`. + +- **caterpillar.model** + - Remove unused `getformat` function. + +- **caterpillar.fields.common** + - Remove unused `__fmt__` function in `Transformer`. + +--- + +### Changed +- **caterpillar.abc** + - Rename `_Switch` protocol to `_SwitchLike`. + - Move `STRUCT_FIELD` to `caterpillar.shared` as `ATTR_STRUCT`; move `hasstruct`, `getstruct`, and `typeof` to `caterpillar.shared`. + +- **caterpillar.byteorder** + - Move `BYTEORDER_FIELD` to `caterpillar.shared` as `ATTR_BYTEORDER`. + +- **caterpillar.model** + - Update `sizeof` to check if the object implements `_SupportsSize` protocol. + - Renew `Bitfield` concept with enhanced syntax. + +- **Documentation** + - Update reference and library docs; improve section numbering. + +--- + +### Fixed +- **caterpillar.model** + - Fix parsing of union objects with an unbound stream. + - Fix field options in Sequences and Structs not being populated when creating fields. diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index a18fb49a..00000000 --- a/MANIFEST.in +++ /dev/null @@ -1,9 +0,0 @@ -include caterpillar/*.c -include caterpillar/*.h -include caterpillar/*.py -include caterpillar/*.pyi -include caterpillar/py.typed -include setup.py -include README.md -include LICENSE -include MANIFEST.in \ No newline at end of file diff --git a/README.md b/README.md index c7470933..7627ce83 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # Caterpillar - ๐Ÿ› -[![python](https://img.shields.io/python/required-version-toml?tomlFilePath=https%3A%2F%2Fraw.githubusercontent.com%2FMatrixEditor%2Fcaterpillar%2Fmaster%2Fpyproject.toml&logo=python)](https://www.python.org/downloads/) +[![python](https://img.shields.io/badge/Python-3.12+-blue?logo=python&logoColor=yellow)](https://www.python.org/downloads/) +![![Latest Version](https://pypi.org/project/caterpillar-py/)](https://img.shields.io/github/v/release/MatrixEditor/caterpillar.svg?logo=github&label=Latest+Version) [![Build and Deploy Docs](https://github.com/MatrixEditor/caterpillar/actions/workflows/python-sphinx.yml/badge.svg)](https://github.com/MatrixEditor/caterpillar/actions/workflows/python-sphinx.yml) [![Run Tests](https://github.com/MatrixEditor/caterpillar/actions/workflows/python-test.yml/badge.svg)](https://github.com/MatrixEditor/caterpillar/actions/workflows/python-test.yml) ![GitHub issues](https://img.shields.io/github/issues/MatrixEditor/caterpillar?logo=github) @@ -10,15 +11,8 @@ > [!WARNING] > This project is still in beta/testing phase. Expect bugs, naming changes and errors while using this -> library. C API Reference is WIP, C extensions are supported since v2.1.0. - -> [!NOTE] -> Python 3.14 breaks `with` statements in class definitions since `__annotations__` are added at the end -> of a class definition. Therefore, `Digest` and conditional statements **ARE NOT SUPPORTED** in using the `with` syntax Python 3.14+. -> As of version `2.4.5` the `Digest` class has a counterpart (`DigestField`), which can be used to manually specify a digest without -> the need of a `แบith` statement. - - +> library. C API Reference is WIP, C extensions are supported since v2.1.0. The latest stable release +> is available at PyPI. Caterpillar is a Python 3.12+ library to pack and unpack structurized binary data. It enhances the capabilities of [Python Struct](https://docs.python.org/3/library/struct.html) @@ -27,7 +21,7 @@ options will be added in the future. Documentation is [here >](https://matrixedi *Caterpillar* is able to: -* Pack and unpack data just from processing Python class definitions (including support for bitfields, c++-like templates and c-like unions!), +* Pack and unpack data just from processing Python class definitions (including support for powerful bitfields, c++-like templates and c-like unions!), * apply a wide range of data types (with endianess and architecture configuration), * dynamically adapt structs based on their inheritance layout, * reduce the used memory space using `__slots__`, @@ -36,36 +30,52 @@ options will be added in the future. Documentation is [here >](https://matrixedi * it helps you to create cleaner and more compact code. * You can even extend Caterpillar and write your parsing logic in C or C++!! +> [!NOTE] +> Python 3.14 breaks `with` statements in class definitions since `__annotations__` are added at the end +> of a class definition. Therefore, `Digest` and conditional statements **ARE NOT SUPPORTED** using the `with` syntax in Python 3.14+. +> As of version `2.4.5` the `Digest` class has a counterpart (`DigestField`), which can be used to manually specify a digest without +> the need of a `แบith` statement. + + ## Give me some code! ```python -from caterpillar.py import * +@bitfield(order=LittleEndian) +class Header: + version : 4 # 4bit integer + valid : 1 # 1bit flag (boolean) + ident : (8, CharFactory) # 8bit char + # automatic alignment to 16bits @struct(order=LittleEndian) class Format: - magic: b"ITS MAGIC" # Supports string and byte constants directly - a: uint8 # Primitive data types - b: int32 - length: uint8 # String fields with computed lengths - name: String(this.length) # -> you can also use Prefixed(uint8) - - # wraps all following fields and creates a new attr - # only for Python <= 3.13 - with Md5(name="hash", verify=True): - # Sequences with prefixed, computed lengths - names: CString[uint8::] + magic : b"ITS MAGIC" # Supports string and byte constants directly + header : Header + a : uint8 # Primitive data types + b : int32 + length : uint8 # String fields with computed lengths + name : String(this.length) # -> you can also use Prefixed(uint8) + _hash_begin : DigestField.begin("hash", Md5_Algo) # custom actions, e.g. for hashes + names : CString[uint8::] # Sequences with prefixed, computed lengths + hash : Md5_Field("hash", verify=True) = None # automatic hash creation and verification + default value # Instantiation (keyword-only arguments, magic is auto-inferred): -obj = Format(a=1, b=2, length=3, name="foo", names=["a", "b"]) +obj = Format( + header=Header(version=2, valid=True, ident="F"), + a=1, + b=2, + length=3, + name="foo", + names=["a", "b"] +) # Packing the object, reads as 'PACK obj FROM Format' # objects of struct classes can be packed right away blob = pack(obj, Format) -# results in: b'ITS MAGIC\x01\x02\x00\x00\x00\x03foo\x02a\x00b\x00\xf55... +# results in: b'ITS MAGIC0*\x01\x02\x00\x00\x00\x03foo\x02a\x00b\x00)\x9a...' # Unpacking the binary data, reads as 'UNPACK Format FROM blob' obj2 = unpack(Format, blob) -assert obj2.hash == obj.hash ``` This library offers extensive functionality beyond basic struct handling. For further details diff --git a/docs/sphinx/source/_static/css/custom.css b/docs/sphinx/source/_static/css/custom.css new file mode 100644 index 00000000..3225f29e --- /dev/null +++ b/docs/sphinx/source/_static/css/custom.css @@ -0,0 +1,51 @@ +table.t-stbl { + padding: 0em 1em 0.5em 1em; + flex-direction: column; + display: flex; +} + +.t-stbl td.t-hr { + border-top: 1px solid var(--pst-color-surface); + padding: 0 +} + +.t-stbl span.t-decl-id { + color: var(--pst-color-on-surface); + font-style: italic; +} + +.t-stbl span.t-decl-opt { + color: var(--pst-color-primary); + font-style: italic; + padding-left: 0; + font-size: small; +} + +.t-stbl td.t-no { + padding-left: 2em; +} + +table.t-par-begin { + flex-direction: column; + display: flex; +} + +tr.t-bar { + vertical-align: top; +} + +tr.t-par>td { + vertical-align: top; + padding: 0 1em 0 0; +} + +tr.t-par>td:nth-child(1) { + white-space: nowrap; + text-align: right; + font-weight: bold; + font-family: var(--pst-font-family-monospace-system); +} + +tr.t-par>td:nth-child(3) { + width: 100% +} \ No newline at end of file diff --git a/docs/sphinx/source/conf.py b/docs/sphinx/source/conf.py index a529bee2..f56735d8 100644 --- a/docs/sphinx/source/conf.py +++ b/docs/sphinx/source/conf.py @@ -29,7 +29,7 @@ "sphinx_design", "breathe", "c_annotations", - "sphinx_copybutton" + "sphinx_copybutton", ] templates_path = ["_templates"] @@ -63,13 +63,14 @@ # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False -refcount_file = '../../../src/capi.dat' -autodoc_member_order = 'bysource' +refcount_file = "../../../src/capi.dat" +autodoc_member_order = "bysource" # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output html_theme = "pydata_sphinx_theme" html_static_path = ["_static"] +html_css_files = ["css/custom.css"] html_theme_options = { "show_nav_level": 4, "navbar_end": ["navbar-icon-links", "theme-switcher"], @@ -78,10 +79,9 @@ "logo": { "text": f"Caterpillar {version}", }, - "announcement": "https://raw.githubusercontent.com/MatrixEditor/caterpillar/master/docs/sphinx/source/_templates/announcement.html", } html_sidebars = { - "installing/index": [], + "installing/index": [], } # -- Options for C++ Docs ----------------------------------------------------- diff --git a/docs/sphinx/source/development/changelog.rst b/docs/sphinx/source/development/changelog.rst index e15e69a5..afab34fa 100644 --- a/docs/sphinx/source/development/changelog.rst +++ b/docs/sphinx/source/development/changelog.rst @@ -4,4 +4,109 @@ Changelog ********* -*Entries will be added in the future.* \ No newline at end of file +*More entries will be added in the future.* + +.. _changelog_2.5.0: + +[2.5.0] - Minor Release +======================= + +This version introduces massive changes due to the addition of stub files. Most of the type hints in the Python +file are ported into several stub files. Additionally, the bitfield concept was completely renewed to be more +flexible and dynamic. + +Added +----- + +*caterpillar.abc* +^^^^^^^^^^^^^^^^^ + +- :class:`_SupportsBits` protocol +- :class:`_ContainsBits` protocol +- :class:`_SupportsType` protocol + +*caterpillar.shortcuts* +^^^^^^^^^^^^^^^^^^^^^^^ + +- New shortcuts: :func:`typeof`, :func:`to_struct`, :func:`hasstruct`, :func:`getstruct` and :func:`sizeof` + +*caterpillar.shared* +^^^^^^^^^^^^^^^^^^^^ + +- New constants from other modules: :attr:`ATTR_BYTEORDER`, :attr:`ATTR_TYPE`, :attr:`ATTR_BITS`, :attr:`ATTR_SIGNED`, :attr:`ATTR_TEMPLATE` + +*caterpillar.context* +^^^^^^^^^^^^^^^^^^^^^ + +- New context attribute: `_root` can be set to point to the root context instance. Internally, instead of a for-loop that iterates through parent context instances, a simple :code:`self.get(...)` call is made. + +.. raw:: html + +
+ +Removed +------- + +*caterpillar.abc* +^^^^^^^^^^^^^^^^^ + +- ``_Action`` protocol and create two separate Protocols that form:: + + _ActionLike = _SupportsActionUnpack | _SupportsActionPack + +- ``__type__`` requirement from :class:`_StructLike` +- **Breaking:** ``_EnumLike``, ``_ContextPathStr`` + +*caterpillar.model* +^^^^^^^^^^^^^^^^^^^ + +- Unused ``getformat`` function + +*caterpillar.fields.common* +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +- Unused ``__fmt__`` function in :class:`Transformer` + +.. raw:: html + +
+ +Changed +------- + +*caterpillar.abc* +^^^^^^^^^^^^^^^^^ + +- Rename ``_Switch`` protocol to :attr:`_SwitchLike` +- Move the following attributes and methods into *caterpillar.shared*: rename ``STRUCT_FIELD`` to :attr:`ATTR_STRUCT`, :func:`hasstruct`, :func:`getstruct` and :func:`typeof` + +*caterpillar.byteorder* +^^^^^^^^^^^^^^^^^^^^^^^ + +- Move ``BYTEORDER_FIELD`` to *caterpillar.shared* as :attr:`ATTR_BYTEORDER` + + +*caterpillar.model* +^^^^^^^^^^^^^^^^^^^ + +- :func:`sizeof` now checks if the provided object implements the :class:`_SupportsSize` protocol +- New :class:`Bitfield` concept with enhanced syntax + + +*Documentation* +^^^^^^^^^^^^^^^ + +- Update reference and library docs as well as section numbering + +.. raw:: html + +
+ +Fixed +----- + +*caterpillar.model* +^^^^^^^^^^^^^^^^^^^ + +- when parsing union objects with an unbound stream object +- field options defined in Sequences and Structs were not populated when creating fields. diff --git a/docs/sphinx/source/development/index.rst b/docs/sphinx/source/development/index.rst index 8a5a340f..134bba07 100644 --- a/docs/sphinx/source/development/index.rst +++ b/docs/sphinx/source/development/index.rst @@ -7,7 +7,6 @@ Development *TODO* .. toctree:: - :numbered: :maxdepth: 2 roadmap.rst diff --git a/docs/sphinx/source/index.rst b/docs/sphinx/source/index.rst index 4fbbf083..2546b4df 100644 --- a/docs/sphinx/source/index.rst +++ b/docs/sphinx/source/index.rst @@ -125,14 +125,21 @@ what configuration options can be used. Alternatively you can follow the :ref:`t .. toctree:: :maxdepth: 3 :hidden: + :numbered: 4 :caption: Contents: installing/index.rst tutorial/index.rst reference/index.rst library/index.rst + +.. toctree:: + :maxdepth: 3 + :hidden: + development/index.rst + .. seealso:: * `Github Source `_ * `Github Issues `_ diff --git a/docs/sphinx/source/library/abc.rst b/docs/sphinx/source/library/abc.rst deleted file mode 100644 index 96a08f13..00000000 --- a/docs/sphinx/source/library/abc.rst +++ /dev/null @@ -1,55 +0,0 @@ -.. _abc: - -********************* -Abstract base classes -********************* - -Struct ------- - -.. autoclass:: caterpillar.abc._SupportsPack - :members: - -.. autoclass:: caterpillar.abc._SupportsUnpack - :members: - -.. autoclass:: caterpillar.abc._SupportsSize - :members: - -.. autoclass:: caterpillar.abc._StructLike - :members: - -.. autoclass:: caterpillar.abc._ContainsStruct - :members: - -Context -------- - -.. autoclass:: caterpillar.abc._ContextLike - :members: - -.. autoclass:: caterpillar.abc._ContextLambda - :members: - - -Other base classes ------------------- - -.. autoclass:: caterpillar.abc._EnumLike - :members: - -.. autoclass:: caterpillar.abc._Switch - :members: - - -Standard interface ------------------- - -.. autoattribute:: caterpillar.abc.STRUCT_FIELD - -.. autofunction:: caterpillar.abc.hasstruct - -.. autofunction:: caterpillar.abc.getstruct - -.. autofunction:: caterpillar.abc.typeof - diff --git a/docs/sphinx/source/library/byteorder.rst b/docs/sphinx/source/library/byteorder.rst index 6f9aaee1..7ca757d1 100644 --- a/docs/sphinx/source/library/byteorder.rst +++ b/docs/sphinx/source/library/byteorder.rst @@ -4,13 +4,18 @@ Byteorder and Architecture ************************** +.. py:currentmodule:: caterpillar.byteorder + Byteorder --------- -.. autoclass:: caterpillar.byteorder.ByteOrder +.. autoclass:: ByteOrder :members: -.. autoattribute:: caterpillar.byteorder.BYTEORDER_FIELD +.. autofunction:: byteorder(obj, default: Optional[ByteOrder] = None) -> ByteOrder + +Standard Byteorder Instances +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoattribute:: caterpillar.byteorder.Native @@ -22,13 +27,46 @@ Byteorder .. autoattribute:: caterpillar.byteorder.SysNative -.. autofunction:: caterpillar.byteorder.byteorder + Architecture ------------ -.. autoclass:: caterpillar.byteorder.Arch +.. autoclass:: Arch :members: .. autoattribute:: caterpillar.byteorder.system_arch + +Standard Architectures +~~~~~~~~~~~~~~~~~~~~~~ + +.. autoattribute:: caterpillar.byteorder.x86 + +.. autoattribute:: caterpillar.byteorder.x86_64 + +.. autoattribute:: caterpillar.byteorder.ARM + +.. autoattribute:: caterpillar.byteorder.ARM64 + +.. autoattribute:: caterpillar.byteorder.AARCH64 + +.. autoattribute:: caterpillar.byteorder.PowerPC + +.. autoattribute:: caterpillar.byteorder.PowerPC64 + +.. autoattribute:: caterpillar.byteorder.MIPS + +.. autoattribute:: caterpillar.byteorder.MIPS64 + +.. autoattribute:: caterpillar.byteorder.SPARC + +.. autoattribute:: caterpillar.byteorder.SPARC64 + +.. autoattribute:: caterpillar.byteorder.RISC_V64 + +.. autoattribute:: caterpillar.byteorder.RISC_V + +.. autoattribute:: caterpillar.byteorder.AMD + +.. autoattribute:: caterpillar.byteorder.AMD64 diff --git a/docs/sphinx/source/library/context.rst b/docs/sphinx/source/library/context.rst index e38bdb85..47308974 100644 --- a/docs/sphinx/source/library/context.rst +++ b/docs/sphinx/source/library/context.rst @@ -10,7 +10,7 @@ Context classes --------------- .. autoclass:: caterpillar.context.Context - :members: + :members: __context_getattr__, __context_setattr__, _root .. autoclass:: caterpillar.context.ContextPath :members: @@ -53,6 +53,8 @@ Special Attributes .. autoattribute:: caterpillar.context.CTX_SEQ +.. autoattribute:: caterpillar.context.CTX_ROOT + Expressions ----------- diff --git a/docs/sphinx/source/library/fields/common.rst b/docs/sphinx/source/library/fields/common.rst index 5b0a302d..8ebb977d 100644 --- a/docs/sphinx/source/library/fields/common.rst +++ b/docs/sphinx/source/library/fields/common.rst @@ -7,121 +7,124 @@ Common Structs Numeric Structs --------------- -.. autoclass:: caterpillar.py.PyStructFormattedField +.. autoclass:: caterpillar.fields.PyStructFormattedField :members: .. versionchanged:: 2.4.0 :code:`FormatField` renamed to :code:`PyStructFormattedField` -.. autoattribute:: caterpillar.py.uint8 +.. autoattribute:: caterpillar.fields.uint8 -.. autoattribute:: caterpillar.py.int8 +.. autoattribute:: caterpillar.fields.int8 -.. autoattribute:: caterpillar.py.uint16 +.. autoattribute:: caterpillar.fields.uint16 -.. autoattribute:: caterpillar.py.int16 +.. autoattribute:: caterpillar.fields.int16 -.. autoattribute:: caterpillar.py.uint32 +.. autoattribute:: caterpillar.fields.uint32 -.. autoattribute:: caterpillar.py.int32 +.. autoattribute:: caterpillar.fields.int32 -.. autoattribute:: caterpillar.py.uint64 +.. autoattribute:: caterpillar.fields.uint64 -.. autoattribute:: caterpillar.py.int64 +.. autoattribute:: caterpillar.fields.int64 -.. autoattribute:: caterpillar.py.size_t +.. autoattribute:: caterpillar.fields.size_t -.. autoattribute:: caterpillar.py.ssize_t +.. autoattribute:: caterpillar.fields.ssize_t -.. autoattribute:: caterpillar.py.float16 +.. autoattribute:: caterpillar.fields.float16 -.. autoattribute:: caterpillar.py.float32 +.. autoattribute:: caterpillar.fields.float32 -.. autoattribute:: caterpillar.py.float64 +.. autoattribute:: caterpillar.fields.float64 -.. autoattribute:: caterpillar.py.void_ptr +.. autoattribute:: caterpillar.fields.void_ptr -.. autoattribute:: caterpillar.py.char +.. autoattribute:: caterpillar.fields.char -.. autoattribute:: caterpillar.py.boolean +.. autoattribute:: caterpillar.fields.boolean -.. autoattribute:: caterpillar.py.padding +.. autoattribute:: caterpillar.fields.padding -.. autoclass:: caterpillar.py.Int +.. autoclass:: caterpillar.fields.Int :members: -.. autoclass:: caterpillar.py.UInt +.. autoclass:: caterpillar.fields.UInt :members: -.. autoattribute:: caterpillar.py.vint +.. autoattribute:: caterpillar.fields.vint -.. autoclass:: caterpillar.py.VarInt +.. autoclass:: caterpillar.fields.VarInt :members: Bytes, Strings -------------- -.. autoclass:: caterpillar.py.Memory +.. autoclass:: caterpillar.fields.Memory :members: .. versionchanged:: 2.4.0 Removed :code:`encoding` argument -.. autoclass:: caterpillar.py.Bytes +.. autoclass:: caterpillar.fields.Bytes :members: -.. autoclass:: caterpillar.py.String +.. autoclass:: caterpillar.fields.String :members: -.. autoclass:: caterpillar.py.Prefixed +.. autoclass:: caterpillar.fields.Prefixed :members: .. versionadded:: 2.4.0 Added support for arbitrary structs. **Warning: the second parameter is now a struct instead of the encoding string.** -.. autoclass:: caterpillar.py.CString +.. autoclass:: caterpillar.fields.CString :members: -.. autoclass:: caterpillar.py.ConstString +.. autoclass:: caterpillar.fields.ConstString :members: -.. autoclass:: caterpillar.py.ConstBytes +.. autoclass:: caterpillar.fields.ConstBytes :members: Special Structs --------------- -.. autoattribute:: caterpillar.py.Pass +.. autoattribute:: caterpillar.fields.Pass See source code for details -.. autoclass:: caterpillar.py.Aligned +.. autoclass:: caterpillar.fields.Aligned :members: .. versionadded:: 2.4.0 -.. autofunction:: caterpillar.py.align +.. autofunction:: caterpillar.fields.align .. versionadded:: 2.4.0 -.. autoclass:: caterpillar.py.Computed +.. autoclass:: caterpillar.fields.Computed :members: -.. autoclass:: caterpillar.py.Transformer +.. autoclass:: caterpillar.fields.Transformer :members: -.. autoclass:: caterpillar.py.Enum + .. versionchanged:: 2.5.0 + Removed ``__fmt__`` method + +.. autoclass:: caterpillar.fields.Enum :members: -.. autoclass:: caterpillar.py.Const +.. autoclass:: caterpillar.fields.Const :members: -.. autoclass:: caterpillar.py.Lazy +.. autoclass:: caterpillar.fields.Lazy :members: -.. autoclass:: caterpillar.py.Uuid +.. autoclass:: caterpillar.fields.Uuid :members: .. versionchanged:: 2.4.0 diff --git a/docs/sphinx/source/library/fields/compression.rst b/docs/sphinx/source/library/fields/compression.rst index cf7296ad..6fbbaf43 100644 --- a/docs/sphinx/source/library/fields/compression.rst +++ b/docs/sphinx/source/library/fields/compression.rst @@ -7,16 +7,16 @@ Compression Structs Basic structs ------------- -.. autoclass:: caterpillar.py.Compressed +.. autoclass:: caterpillar.fields.Compressed :members: Supported compression types --------------------------- -.. autofunction:: caterpillar.py.ZLibCompressed +.. autofunction:: caterpillar.fields.ZLibCompressed -.. autofunction:: caterpillar.py.Bz2Compressed +.. autofunction:: caterpillar.fields.Bz2Compressed -.. autofunction:: caterpillar.py.LZMACompressed +.. autofunction:: caterpillar.fields.LZMACompressed -.. autofunction:: caterpillar.py.LZOCompressed +.. autofunction:: caterpillar.fields.LZOCompressed diff --git a/docs/sphinx/source/library/fields/crypto.rst b/docs/sphinx/source/library/fields/crypto.rst index 92ad1865..e00c4b7f 100644 --- a/docs/sphinx/source/library/fields/crypto.rst +++ b/docs/sphinx/source/library/fields/crypto.rst @@ -8,14 +8,14 @@ Cryptographic Structs Hashes ------ -.. autoclass:: caterpillar.py.Algorithm +.. autoclass:: caterpillar.fields.Algorithm -.. autoclass:: caterpillar.py.Digest +.. autoclass:: caterpillar.fields.Digest -.. autoclass:: caterpillar.py.DigestField +.. autoclass:: caterpillar.fields.DigestField -.. autoclass:: caterpillar.py.DigestFieldAction +.. autoclass:: caterpillar.fields.DigestFieldAction Ciphers diff --git a/docs/sphinx/source/library/fields/field_model.rst b/docs/sphinx/source/library/fields/field_model.rst index e737cc09..c146e533 100644 --- a/docs/sphinx/source/library/fields/field_model.rst +++ b/docs/sphinx/source/library/fields/field_model.rst @@ -4,24 +4,24 @@ Field Model *********** -.. autoclass:: caterpillar.py.Field() +.. autoclass:: caterpillar.fields.Field :members: :private-members: :no-undoc-members: -.. autoclass:: caterpillar.py.FieldMixin +.. autoclass:: caterpillar.fields.FieldMixin :special-members: -.. autoclass:: caterpillar.py.FieldStruct +.. autoclass:: caterpillar.fields.FieldStruct :members: :special-members: -.. autoclass:: caterpillar.py.Chain +.. autoclass:: caterpillar.fields.Chain :members: :special-members: -.. autoclass:: caterpillar.py.If +.. autoclass:: caterpillar.fields.If :members: -.. autoclass:: caterpillar.py.ElseIf +.. autoclass:: caterpillar.fields.ElseIf :members: \ No newline at end of file diff --git a/docs/sphinx/source/library/index.rst b/docs/sphinx/source/library/index.rst index c7241f3e..24fede34 100644 --- a/docs/sphinx/source/library/index.rst +++ b/docs/sphinx/source/library/index.rst @@ -7,19 +7,25 @@ Library *descriptions are WIP* .. toctree:: - :maxdepth: 2 + :maxdepth: 1 :caption: Python API byteorder.rst options.rst - abc.rst context.rst exceptions.rst model.rst registry + shared fields/index.rst +.. _library-index-capi: + +*********** +C Extension +*********** + .. toctree:: :maxdepth: 2 :caption: C API Python Types diff --git a/docs/sphinx/source/library/model.rst b/docs/sphinx/source/library/model.rst index b7e81ce6..cb09b03d 100644 --- a/docs/sphinx/source/library/model.rst +++ b/docs/sphinx/source/library/model.rst @@ -6,61 +6,10 @@ Struct Model *TODO* +.. toctree:: + :maxdepth: 2 -Base classes ------------- - -.. autoclass:: caterpillar.py.Action - - .. versionadded:: 2.4.0 - -.. autoclass:: caterpillar.model.Sequence() - :members: - -.. autoclass:: caterpillar.model.Struct() - :members: - -.. autoclass:: caterpillar.model.BitFieldGroup - :members: - -.. autoclass:: caterpillar.model.BitField - :members: - -.. autoclass:: caterpillar.model.UnionHook - :members: - :special-members: __model_init__, __model_setattr__ - - -Standard functions ------------------- - -.. autofunction:: caterpillar.model.struct - -.. autofunction:: caterpillar.model.union - -.. autofunction:: caterpillar.model.pack - -.. autofunction:: caterpillar.model.pack_into - -.. autofunction:: caterpillar.model.pack_file - -.. autofunction:: caterpillar.model.unpack - -.. autofunction:: caterpillar.model.unpack_file - -.. autofunction:: caterpillar.model.bitfield - - -Templates ---------- - -.. autoclass:: caterpillar.model.TemplateTypeVar - :members: - -.. autofunction:: caterpillar.model.istemplate - - -.. autofunction:: caterpillar.model.template - - -.. autofunction:: caterpillar.model.derive \ No newline at end of file + model/sequence + model/struct + model/bitfield + model/template diff --git a/docs/sphinx/source/library/model/bitfield.rst b/docs/sphinx/source/library/model/bitfield.rst new file mode 100644 index 00000000..f013e40b --- /dev/null +++ b/docs/sphinx/source/library/model/bitfield.rst @@ -0,0 +1,77 @@ +.. _library_model_bitfield: + +======== +Bitfield +======== + +.. versionchanged:: 2.5.0 + New revised concept since v2.5.0. + +.. py:currentmodule:: caterpillar.model + +Main Interface +-------------- + +.. autoclass:: Bitfield + :members: + + .. versionchanged:: 2.5.0 + Updated concept. See the _reference_ for more information. + +.. autoclass:: BitfieldGroup + :members: + + .. versionchanged:: 2.5.0 + Renamed from ``BitFieldGroup`` to ``BitfieldGroup`` + +.. autoclass:: BitfieldEntry + :members: + + .. versionadded:: 2.5.0 + + +.. autofunction:: getbits + +.. autofunction:: issigned + +.. autofunction:: bitfield + + .. versionchanged:: 2.5.0 + Added the ``alignment`` parameter. + +Default Factory Classes +----------------------- + +.. autoclass:: BitfieldValueFactory + :members: + + .. versionadded:: 2.5.0 + +.. autoclass:: CharFactory + :members: + + .. versionadded:: 2.5.0 + +.. autoclass:: EnumFactory + :members: + + .. versionadded:: 2.5.0 + + +Default Options +--------------- + +.. py:attribute:: EndGroup + + .. versionadded:: 2.5.0 + + Alias for the :attr:`B_GROUP_NEW` flag, used to indicate that a new bitfield group should be started. + + +.. py:attribute:: NewGroup + + Alias for the :attr:`B_GROUP_END` flag, used to indicate that the current bitfield group should be finalized. + + +.. autoclass:: SetAlignment + :members: diff --git a/docs/sphinx/source/library/model/sequence.rst b/docs/sphinx/source/library/model/sequence.rst new file mode 100644 index 00000000..3e1812dc --- /dev/null +++ b/docs/sphinx/source/library/model/sequence.rst @@ -0,0 +1,10 @@ +.. _library_model_base: + +======== +Sequence +======== + +.. py:currentmodule:: caterpillar.model + +.. autoclass:: Sequence + :members: \ No newline at end of file diff --git a/docs/sphinx/source/library/model/struct.rst b/docs/sphinx/source/library/model/struct.rst new file mode 100644 index 00000000..b9af5a03 --- /dev/null +++ b/docs/sphinx/source/library/model/struct.rst @@ -0,0 +1,46 @@ +.. _library_model_struct: + +====== +Struct +====== + +.. py:currentmodule:: caterpillar.model + + +The *Struct* class +------------------ + +.. autoclass:: caterpillar.model.Struct + :members: + + +Unions +------ + +.. autofunction:: caterpillar.model.union + +.. autoclass:: caterpillar.model.UnionHook + :members: + :special-members: __model_init__, __model_setattr__ + + +Standard Interface +------------------ + +.. autofunction:: caterpillar.model.struct + +.. autofunction:: caterpillar.model.pack + +.. autofunction:: caterpillar.model.pack_into + +.. autofunction:: caterpillar.model.pack_file + +.. autofunction:: caterpillar.model.unpack + +.. autofunction:: caterpillar.model.unpack_file + +.. autofunction:: caterpillar.model.sizeof + + .. versionchanged:: 2.5.0 + + Now checks if the provided object implements the :class:`_SupportsSize` protocol \ No newline at end of file diff --git a/docs/sphinx/source/library/model/template.rst b/docs/sphinx/source/library/model/template.rst new file mode 100644 index 00000000..5d5f3704 --- /dev/null +++ b/docs/sphinx/source/library/model/template.rst @@ -0,0 +1,18 @@ +.. _library_model_template: + +========= +Templates +========= + +.. py:currentmodule:: caterpillar.model + +.. autoclass:: caterpillar.model.TemplateTypeVar + :members: + +.. autofunction:: caterpillar.model.istemplate + + +.. autofunction:: caterpillar.model.template + + +.. autofunction:: caterpillar.model.derive diff --git a/docs/sphinx/source/library/options.rst b/docs/sphinx/source/library/options.rst index 16cc057e..499c56d3 100644 --- a/docs/sphinx/source/library/options.rst +++ b/docs/sphinx/source/library/options.rst @@ -1,217 +1,313 @@ .. _options: -******* -Options -******* +**************** +Options / Flags +**************** -*TODO* +This library provides a flexible system of options and flags to control various aspects +of data structures, serialization, and deserialization behaviors. Options can be set +globally or applied to specific types to fine-tune performance, memory usage, and +structure representation. -Options by type +Options by Type --------------- -Global options -^^^^^^^^^^^^^^ - -.. data:: caterpillar.options.O_ARRAY_FACTORY - - To control the returned array type, a factory class or method can be set - in this option using its attached value. For instance, we can incorporate - the :code:`numpy.ndarray` into our unpacked objects: - - .. code-block:: python - - from caterpillar.options import O_ARRAY_FACTORY - from numpy import array - - # just set the option's value - O_ARRAY_FACTORY.value = array +Options are grouped by the type of object they affect. +Global Options +^^^^^^^^^^^^^^ - With the new configuration applied, your unpacked objects will occupy less - memory space. The following table shows the size of unpacked objects in bytes: +.. attribute:: caterpillar.options.O_ARRAY_FACTORY + + Specifies a factory function or class for creating array instances. This allows you + to replace the default array type with a more efficient or specialized implementation. + For example, you can use :code:`numpy.ndarray` for large numeric arrays to reduce + memory usage: + + .. code-block:: python + + from caterpillar.options import O_ARRAY_FACTORY + from numpy import array + + # Set the array factory globally + O_ARRAY_FACTORY.value = array + + With this configuration, unpacked arrays will use the specified factory, potentially + leading to significant memory savings. The following table demonstrates the memory + footprint of unpacked objects under different configurations: + + .. list-table:: Object sizes for different configuration options + :header-rows: 1 + :stub-columns: 1 + :widths: 10, 15, 15 + + * - Configuration + - :code:`formats/nibarchive` + - :code:`formats/caf` [*]_ + * - Default configuration + - 26520 + - 10608 + * - :code:`__slots__` classes + - 14240 + - 3848 + * - Default configuration with :code:`numpy.ndarray` + - 7520 + - 1232 + * - :code:`__slots__` classes with :code:`numpy.ndarray` + - 6152 + - 384 + * - Original file size + - **1157** + - **5433** + +.. [*] A CAF audio file may include a chunk that contains only zeroed data. By ignoring + this chunk during unpacking, the in-memory size can be smaller than the original file. + +Sequence Options +^^^^^^^^^^^^^^^^ - .. list-table:: Object sizes between different configuration options - :header-rows: 1 - :stub-columns: 1 - :widths: 10, 15, 15 +.. note:: - * - Configuration - - :code:`formats/nibarchive` - - :code:`formats/caf` [*]_ - * - Default configuration - - 26520 - - 10608 - * - :code:`__slots__` classes - - 14240 - - 3848 - * - Default configuration and :code:`numpy.ndarray` - - 7520 - - 1232 - * - :code:`__slots__` classes and :code:`numpy.ndarray` - - 6152 - - 384 - * - Original filesize - - **1157** - - **5433** + All sequence-related options also apply to structs. -.. [*] A CAF audio comes with a special chunk type that stores only zeros. By ingoring - the data in this chunk, we can achieve less bytes in memory than the file originally used. +.. attribute:: caterpillar.options.S_DISCARD_UNNAMED -Sequence options -^^^^^^^^^^^^^^^^ + When enabled, this option discards all *unnamed* fields from the final unpacked result. + An *unnamed* field must follow the convention: -.. note:: + .. code-block:: bnf - All sequence-related configuration options are applied to structs as well. + := '_' [0-9]* + This allows you to include padding or other non-essential fields without polluting + the output. For example: -.. data:: caterpillar.options.S_DISCARD_UNNAMED + .. code-block:: python + :caption: Sequence with an unnamed field - Using this option, all *unnamed* fields will be discarded and won't be - visible in the final result object. An *unnamed* field must follow the - following naming convention: + >>> schema = Seq({ + ... "a": uint8, + ... "_": padding[10] + ... }, options={opt.S_DISCARD_UNNAMED}) + >>> data = b"\xFF" + bytes(10) + >>> unpack(schema, data) + {'a': 255} + >>> pack(_, schema) + b'\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' - .. code-block:: bnf +.. data:: caterpillar.options.S_DISCARD_CONST - := '_' [0-9]* + Discards all constant fields from the final result. This is useful for fields + that serve only validation or padding purposes. - Therefore, it is possible to include more than one unnamed field, for - example: +Struct Options +^^^^^^^^^^^^^^ - .. code-block:: python - :caption: Simple sequence with an unnamed field +.. data:: caterpillar.options.S_SLOTS - >>> schema = Seq({ - ... "a": uint8, - ... "_": padding[10] - ... }, options={opt.S_DISCARD_UNNAMED}) - >>> data = b"\xFF" + bytes(10) - >>> unpack(schema, data) - {'a': 255} - >>> pack(_, schema) - b'\xff\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' + When enabled, this option generates a struct class with a :code:`__slots__` declaration. + Using :code:`__slots__` significantly reduces the per-object memory overhead by preventing + the creation of an instance :code:`__dict__`. For example: + .. code-block:: python -.. data:: caterpillar.options.S_DISCARD_CONST + @struct + class DictClass: + a: uint8 + b: uint8 + c: uint8 + d: uint8 + e: uint8 + f: uint8 + g: uint8 + + @struct(options={opt.S_SLOTS}) + class SlotsClass: + a: uint8 + b: uint8 + c: uint8 + d: uint8 + e: uint8 + f: uint8 + g: uint8 - This option will only discard constant fields. + Comparing memory usage using `pympler `_: + .. code-block:: python + :caption: Memory usage difference with :code:`__slots__` + >>> o1 = DictClass(*[0xFF]*7) + >>> asizeof.asizeof(o1) + 712 + >>> o2 = SlotsClass(*[0xFF]*7) + >>> asizeof.asizeof(o2) + 120 -Struct options -^^^^^^^^^^^^^^ + The class definition itself also occupies less memory: -.. data:: caterpillar.options.S_SLOTS + .. code-block:: python - Feature option that will create a new class with the :code:`__slots__` attribute - to lower required space. Take the following two structs into consideration: - - .. code-block:: python - - @struct - class DictClass: - a: uint8 - b: uint8 - c: uint8 - d: uint8 - e: uint8 - f: uint8 - g: uint8 - - @struct(options={opt.S_SLOTS}) - class SlotsClass: - a: uint8 - b: uint8 - c: uint8 - d: uint8 - e: uint8 - f: uint8 - g: uint8 - - Each struct stores seven fields in total, whereby :code:`Format2` uses :code:`__slots__` - to store each member. We used `pympler `_ to - retrieve the amount of occupied bytes per object: - - .. code-block:: python - :caption: Size difference between a :code:`__slots__` class and a normal Python class - - >>> o1 = DictClass(*[0xFF]*7) - >>> asizeof.asizeof(o1) - 712 - >>> o2 = SlotsClass(*[0xFF]*7) - >>> asizeof.asizeof(o2) - 120 - - In addition, the overall used memory will be reduced, because the defined type will - also occupy less memory: - - .. code-block:: python - - >>> from sys import getsizeof - >>> getsizeof(DictClass) - 1704 - >>> getsizeof(SlotsClass) - 936 + >>> from sys import getsizeof + >>> getsizeof(DictClass) + 1704 + >>> getsizeof(SlotsClass) + 936 .. data:: caterpillar.options.S_REPLACE_TYPES - This option was designed for documentation purposes only and should be - used in that context only. It will alter the class' annotations and remove - all :class:`caterpillar.fields.Field` instances. + Replaces field types in the class annotations with their native Python equivalents. + This option is primarily intended for documentation purposes. It removes all + :class:`caterpillar.fields.Field` instances from annotations. For example: - Consider the following struct: - - .. code-block:: python + .. code-block:: python @struct class Format: a: uint8 b: String(10) - c: uuid + c: Uuid - # use the following line to enable type replacement globally + # Enable type replacement globally opt.set_struct_flags(opt.S_REPLACE_TYPES) - # otherwise, just add options={opt.S_REPLACE_TYPES} to the - # @struct call. + # Or apply it directly: + @struct(options={opt.S_REPLACE_TYPES}) - You will notice the difference in the following output on disabled - type replacement (1) and enabled replacement (2): + Comparison of annotations: - .. code-block:: python + .. code-block:: python - >>> Format.__annotations__ # (1) - {'a': , 'b': , 'c': } - >>> Format.__annotations__ # (2) + >>> Format.__annotations__ # Without replacement + {'a': , 'b': , 'c': } + >>> Format.__annotations__ # With replacement {'a': , 'b': , 'c': } - .. data:: caterpillar.options.S_EVAL_ANNOTATIONS - If you decide to use :code:`from __future__ import annotations`, you have to set this - option for each struct in the scope of this import, because it will stringify all - placed annotations. Therefore, they need to be executed before analyzed. + Ensures that annotations are evaluated at runtime if you use + :code:`from __future__ import annotations`. When enabled, stringified annotations + are evaluated before analysis. - .. caution:: + .. caution:: - Use this option with caution! It may result in execution of untrusted code, be aware! + Use with care! Evaluating annotations can lead to the execution of untrusted code. .. data:: caterpillar.options.S_UNION - Internal option to add union behaviour to the :code:`caterpillar.model.Struct` class. - - + Internal option that enables union behavior for the :class:`caterpillar.model.Struct` class. -Field options +Field Options ^^^^^^^^^^^^^ .. attribute:: caterpillar.options.F_KEEP_POSITION + When enabled, this option retains the position information of each field in the + serialized data. This is the default behavior. + .. attribute:: caterpillar.options.F_DYNAMIC + Marks the field as dynamic, indicating that its size or format is determined at + runtime rather than being statically defined. This is used internally to support + advanced features like variable-length fields. + .. attribute:: caterpillar.options.F_SEQUENTIAL + Indicates that this field should be processed sequentially relative to other + fields, ensuring that order-dependent parsing or packing logic is respected. + This is primarily for internal use in complex layouts. + .. attribute:: caterpillar.options.F_OFFSET_OVERRIDE + Allows you to override the calculated offset for this field within its parent + structure. This means, the offset used by the :meth:`struct.__matmul__` + operation will be used from here on. + +Bit-field Options +^^^^^^^^^^^^^^^^^ + +Bit-field options provide fine-grained control over the alignment, grouping, +and interpretation of individual fields and entire bit-field classes. + + +.. attribute:: caterpillar.options.B_OVERWRITE_ALIGNMENT + + Replaces the current alignment with the alignment explicitly defined by the field. + This option is applicable only to fields within a bit-field class. + + For example, the following structure overrides the default alignment of 8 bits + with a 32-bit alignment for the specified field: + + .. code-block:: python + + @bitfield + class Format: + # Override alignment from 8 bits to 32 bits + a: 4 - uint32 | B_OVERWRITE_ALIGNMENT + + + .. versionadded:: 2.5.0 + +.. attribute:: caterpillar.options.B_GROUP_END + + Adds the annotated field to the current bit-field group and immediately aligns + the group's total size according to the active alignment constraints. + This option is applicable only to fields. + + For example, to avoid automatic alignment to the next multiple of 16 bits: + + .. code-block:: python + + @bitfield + class Format: + a1: 4 + a2: 3 + # Prevent automatic alignment to 16 bits: + a3: (1, B_GROUP_END) + b1: 1 + ... + + .. versionadded:: 2.5.0 + +.. attribute:: caterpillar.options.B_GROUP_NEW + + Finalizes the current bit-field group by aligning it, then starts a new group + and adds the annotated field to this new group. + This option is applicable only to fields. + + Example usage where a new group is started with its own alignment: + + .. code-block:: python + + @bitfield + class Format: + a1: 4 + a2: 4 + # Finalize current group and start new one with 8-bit alignment: + b1: (1, B_GROUP_NEW) + + .. versionadded:: 2.5.0 + +.. attribute:: caterpillar.options.B_GROUP_KEEP + + *Applicable only to classes.* + + When applied at the class level, this option instructs the bit-field structure + to preserve existing group alignments throughout parsing and packing. + It affects how alignment statements are interpreted within the class body. + + .. versionadded:: 2.5.0 + +.. attribute:: caterpillar.options.B_NO_AUTO_BOOL + + *Applicable only to classes.* + + Prevents the automatic assignment of a boolean type factory for fields + that are exactly one bit in size. By default, one-bit fields are treated + as boolean values; enabling this option disables that behavior, preserving + the raw integer representation instead. + + .. versionadded:: 2.5.0 + + Interface diff --git a/docs/sphinx/source/library/shared.rst b/docs/sphinx/source/library/shared.rst new file mode 100644 index 00000000..bcc57b52 --- /dev/null +++ b/docs/sphinx/source/library/shared.rst @@ -0,0 +1,82 @@ +.. _lib_shared: + +Shared Concepts +=============== + +.. automodule:: caterpillar.shared + +.. py:attribute:: ATTR_STRUCT + :value: "__struct__" + + All models annotated with either ``@struct`` or ``@bitfield`` are struct + containers. Thus, they store the additional class attribute :code:`__struct__`. + + Internally, any types utilizing this attribute can be employed within a + struct, bitfield, or sequence definition. The type of the stored value + must be conforming to the :class:`_StructLike` protocol. + + .. versionchanged:: 2.5.0 + This attribute is now used when callung :func:`getstruct` or :func:`hasstruct`. + + +.. py:attribute:: ATTR_BYTEORDER + :value: "__byteorder__" + + .. versionadded:: 2.5.0 + Moved from *caterpillar.byteorder*. + + +.. py:attribute:: ATTR_TYPE + :value: "__type__" + + .. versionadded:: 2.5.0 + + +.. py:attribute:: ATTR_BITS + :value: "__bits__" + + .. versionadded:: 2.5.0 + Moved from *caterpillar.model._bitfield*. + + +.. py:attribute:: ATTR_SIGNED + :value: "__signed__" + + .. versionadded:: 2.5.0 + Moved from *caterpillar.model._bitfield*. + + +.. py:attribute:: ATTR_TEMPLATE + :value: "__template__" + + .. versionadded:: 2.5.0 + Moved from *caterpillar.model._template*. + + +.. py:attribute:: ATTR_ACTION_PACK + :value: "__action_pack__" + + .. versionadded:: 2.4.0 + + +.. py:attribute:: ATTR_ACTION_UNPACK + :value: "__action_unpack__" + + .. versionadded:: 2.4.0 + + +.. py:attribute:: MODE_PACK + :value: 0 + + +.. py:attribute:: MODE_UNPACK + :value: 1 + + +.. autofunction:: getstruct + +.. autofunction:: hasstruct + +.. autofunction:: typeof + +.. autoclass:: Action diff --git a/docs/sphinx/source/reference/capi/context.rst b/docs/sphinx/source/reference/capi/context.rst index dcdfc029..2f97e731 100644 --- a/docs/sphinx/source/reference/capi/context.rst +++ b/docs/sphinx/source/reference/capi/context.rst @@ -6,19 +6,4 @@ Context Protocol **************** -Caterpillar provides a special protocol for working with contexts. It is -designed to enable access to context variables while packing or unpacking -data. This procotol is implemented by *all* context-related classes in -this package. - - -.. py:method:: object.__context_getattr__(self, path) -> object - :noindex: - - This function is used to retrieve a value from the context. It is implementation - dependent whether nested paths are supported. By default, multiple path elements - are sperated by a single dot. - - For example, consider the following path: :code:`"foo.bar"`. The context - implementation should first resolve the value of :code:`"foo"` and then - retrieve the value of :code:`"bar"` from the result. +*Moved to* :ref:`ref_datamodel_protocol_contextlike` \ No newline at end of file diff --git a/docs/sphinx/source/reference/datamodel.rst b/docs/sphinx/source/reference/datamodel.rst index 7a737d30..8f8c880f 100644 --- a/docs/sphinx/source/reference/datamodel.rst +++ b/docs/sphinx/source/reference/datamodel.rst @@ -6,9 +6,6 @@ Data Model .. _objects: -Structs, Sequences and Fields -============================= - :dfn:`Structs` serve as the foundation of this library. All data within the framework undergoes the process of packing and unpacking using structs or :class:`~caterpillar.abc._StructLike` objects. There are three possible types of structs: @@ -31,665 +28,25 @@ objects. There are three possible types of structs: provide a modular approach for extending the library. Consideration of partial structs is essential when aiming to extend the capabilities of this framework. -Standard Types -============== - -Below is a list of types provided by *Caterpillar*. These types are designed to maintain -compatibility with older versions of the library, making them particularly important. - -Sequence --------- - -As previously explained, a sequence functions independently of fields. The library introduces -the :class:`~caterpillar.model.Sequence` as a named finite collection of :class:`~caterpillar.fields.Field` objects. A *Sequence* -operates on a model, which is a string-to-field mapping by default. Later, we will discuss -the distinctions between a *Sequence* and a *Struct* regarding the model representation. - -A sequence definition entails the specification of a :class:`~caterpillar.model.Sequence` object by directly -indicating the model to use. Inheritance poses a challenge with sequences, as they are not -designed to operate on a type hierarchy. The default instantiation with all default options -involves passing the dictionary with all fields directly: - ->>> Format = Sequence({"a": uint8, "b": uint32}) - -.. admonition:: Programmers Note: - - All sequence types introduced by this library can also store so-called *unnamed* fields. - These fields are not visible in the unpacked result and are automatically packed, removing - concerns about them when the option ``S_DISCARD_UNNAMED`` is active. Their names usually - begin with an underscore and must solely contain numbers (e.g., :code:`_123`). - -The sequence follows the :class:`~caterpillar.fields.Field` configuration model, allowing sequence and -field-related options to be set. As mentioned earlier, the ``S_DISCARD_UNNAMED`` option can -be used for example to exclude all unnamed fields from the final representation. A complete -list of all configuration options and their impact can be found in :ref:`options`. - -All sequences store a configurable :class:`ByteOrder` and :class:`Arch` as architecture, -which are passed to **all** fields in the current model. For more information on why these -classes are not specified as an enum class, please refer to :ref:`byteorder`. - -Inheritance in sequences is intricate, as a :class:`~caterpillar.model.Sequence` is constructed from a dictionary -of elements. We can attempt to simulate a chain of extended *base sequences* using the -concatenation of two sequences. The :meth:`~sequence.__add__` method will *import* all fields -from the other specified sequence. The only disadvantage is the placement required by the -operator. For instance: - -.. code-block:: python - - >>> BaseFormat = Sequence({"magic": b"MAGIC", "a": uint8}) - >>> Format = Sequence({"b": uint32, "c": uint16}) + BaseFormat - -will result in the following field order: - -.. code-block:: python - - >>> list(Format.get_members()) - ['b', 'c', 'magic', 'a'] - -which is not the intended order. The correct order should be :code:`['magic', 'a', 'b', 'c']`. -This can be achieved by using the :code:`BaseFormat` instance as the first operand. - -.. warning:: - This will alter the *BaseFormat* sequence, making it unusable elsewhere as the *base* for - all sub-sequences. Therefore, it is not recommended to use inheritance within sequences. - The :class:`~caterpillar.model.Struct` class resolves this issue with ease. - -Nesting sequences is allowed by default and can be achieved by incorporating another -:class:`~caterpillar.model.Sequence` into the model. It is important to note that *nesting* is distinct from -*inheritance*, adding an additional layer of packing and unpacking. - ->>> Format = Sequence({"other": BaseFormat, "b": uint32}) - - -Struct -^^^^^^ - -A *struct* describes a finite collection of named fields. In contrast to a *sequence*, a *struct* -utilizes Python classes as its model. The annotation feature in Python enables the definition of -custom types as annotations, enabling this special struct class to create a model solely based on -class annotations. Additionally, it generates a ``dataclass`` of the provided model, offering a -standardized string representation. - -Several differences exist between a :class:`~caterpillar.model.Sequence` and a -:class:`~caterpillar.model.Struct`, with the most significant ones highlighted below: - - -.. list-table:: Behaviour of structs and sequences - :header-rows: 1 - :widths: 10, 15, 15 - :stub-columns: 1 - - * - - - Sequence - - Struct - * - Model Type - - dict - - type - * - Inheritance - - No - - Yes - * - Attribute Access - - :code:`x["name"]` - - :code:`getattr(x, "name", None)` - * - Unpacked Type (also needed to pack) - - dict [*]_ - - instance of model - * - Documentation - - No - - Yes - - -.. [*] The unpacked values are stored inside a :class:`~caterpillar.context.Context` instance, a direct subclass of a dictionary. - -As evident from the comparison, the :class:`~caterpillar.model.Struct` class introduces new features such as -inheritance and documentation support. It's crucial to note that inheritance uses -struct types exclusively. - -The :class:`~caterpillar.model.Sequence` class implements a specific process for creating an internal representation -of the given model. The :class:`~caterpillar.model.Struct` class enhances this process by handling default values, replacing -types for documentation purposes, or removing annotation fields directly from the model. Additionally, -this class adds :attr:`~class.__struct__` to the model afterward. - -.. admonition:: Implementation Note - - If you decide to use the ``annotation`` feature from the ``__future__`` module, it is necessary to - enable :attr:`~options.S_EVAL_ANNOTATIONS` since it "`Stringizes`_" all annotations. ``inspect`` then - evaluates all strings, introducing a potential security risk. Exercise with caution when evaluating code! - -Specifying structs is as simple as defining `Python Classes`_: - -.. code-block:: python - - >>> @struct - ... class BaseFormat: - ... magic: b"MAGIC" - ... a: uint8 - ... - -Internally, a representation with all required fields and their corresponding names is -created. As :code:`b"MAGIC"` or :code:`uint8` are instances of types, the type replacement -for documentation purposes should be enabled, as shown in :ref:`struct_type`. - -As described above, this class introduces an easy-to-use inheritance system using the method -resolution order of Python: - -.. code-block:: python - - >>> @struct - ... class Format(BaseFormat): - ... b: uint32 - ... c: uint16 - ... - >>> list(Format.__struct__.get_members()) - ['magic', 'a', 'b', 'c'] - -.. admonition:: Programmers Note - - As the :class:`~caterpillar.model.Struct` class is a direct subclass of :class:`~caterpillar.model.Sequence`, nesting is supported - by default. That means, so-called *anonymous inner* structs can be defined within a class - definition. - - .. code-block:: python - - >>> @struct - ... class Format: - ... a: uint32 - ... b: {"c": uint8} - ... - - It is not recommended to use this technique as the inner structs can't be used anywhere else. - Anonymous inner union definitions are tricky and are not officially supported yet. There are - workarounds to that problem, which are discussed in the API documentation of :class:`~caterpillar.model.Sequence`. - - -.. _union-reference: - -Union -^^^^^ - -Internally constructing unions in the library poses challenges. The current implementation uses -the predefined behavior of the :class:`~caterpillar.model.Sequence` class for union types. It selects the field with -the greatest length as its representational size. *Unions*, much like *BitFields*, must store a static -size. - -**In essence, they behave similarly to C unions.** A traditional function hook will be installed on -the model to capture field assignments. What that means will be illustrated by the following example: - -.. code-block:: python - - >>> @union - ... class Format: - ... foo: uint16 - ... bar: uint32 - ... baz: boolean - ... - >>> obj = Format() # union does not need any values - -Right now, all attributes store the default value (:code:`None`). If we assign a new value to one field, it -will be applied to all others. Hence, - ->>> obj.bar = 0xFF00FF00 - -will result in - -.. code-block:: python - - >>> obj - Format(foo=65280, bar=4278255360, baz=False) - - -.. admonition:: Implementation Detail - - The constructor is the only place where there is no synchronization between fields. Additionally, the current - implementation may produce some overhead, because every *refresh* will first pack the new value and then - executes *unpack* on all other fields. - -BitField -^^^^^^^^ - -A *BitField*, despite its name suggesting a field of bits, is a powerful structure designed for -detailed byte inspection. Similar to other structures, it is a finite collection of named fields. This -section will introduce potential challenges associated with the implementation of a :class:`~caterpillar.model.BitField` -and explains its behavior. - -.. caution:: - This class is still experimental, and caution is advised. For a list of known disadvantages or - problems, refer to the information provided below. - -As mentioned earlier, a *BitField* allows the inspection of individual bits within parsed bytes. Its -internal model relies on a special function or attribute, namely :meth:`~object.__bits__`. Consequently, -a bitfield has a predefined length and will always possess a length that can be represented in bytes. - -The :class:`~caterpillar.model.BitField` class not only stores the existing model representation with a name-to-field -mapping and a collection of all fields but also introduces a special organizational class: -:class:`~caterpillar.model.BitFieldGroup`. Each group defines its bit size, the absolute bit position in the bitfield, -and a mapping of fields to their relative bit position in the current group, along with the field's -width. In the following example, three groups are created: - ->>> @bitfield -... class Format: -... a : uint8 # Group 1, pos=0, size=8 -... _ : 0 # Group 2, pos=8, size=8 -... b : 15 - uint16 # \ -... c : 1 # \ Group 3, pos=16, size=16 -... - -- ``a``: The first field creates a group with a size of eight bits at position zero. -- ``_``: Next, a zero-sized field indicates that padding until the end of the current byte should be - added. As we start from bit position ``0``, one byte will be filled with zeros. -- ``b``: The third field only uses 15 bits of a 16-bit wide field (2 bytes inferred using :code:`uint16`) -- ``c``: The last field uses the final bit of our current group. - -*TODO: describe process of collecting fields, packing and unpacking* - - -Field ------ - -The next core element of this library is the *Field*. It serves as a context storage to store configuration data -about a struct. Even sequences and structs can be used as fields. The process is straightforward: each custom operator -creates an instance of a :class:`~caterpillar.fields.Field` with the applied configuration value. Most of the time, this value can be -static or a :ref:`context_lambda`. A field implements basic behavior that should not be duplicated, such as -conditional execution, exception handling with default values, and support for a built-in switch-case structure. - -As mentioned earlier, some primitive structs depend on being linked to a :class:`~caterpillar.fields.Field`. This is because all -configuration elements are stored in a :class:`~caterpillar.fields.Field` instance rather than in the target struct instance. More -information about each supported configuration can be found in :ref:`operators`. - -.. _greedy: - -Greedy ------- - -This library provides direct support for *greedy* parsing. Leveraging Python's syntactic features, this special form -of parsing is enabled using the `Ellipsis`_ (:code:`...`). All previously introduced structs implement greedy parsing -when enabled. - ->>> field = uint8[...] - -This special type can be used in places where a length has to be specified. Therefore, it can be applied to all array -:code:`[]` declarations and constructors that take the length as an input argument, such as :class:`CString`, for -example. - -.. code-block:: python - - >>> field = Field(CString(...)) - >>> unpack(field, b"abcd\x00") - 'abcd' - -.. _prefixed: - -Prefixed --------- - -In addition to greedy parsing, this library supports prefixed packing and unpacking as well. With *prefixed*, we refer -to the length of an array of elements that should be parsed. In this library, the :code:`slice` class is to achieve a -prefix option. - ->>> field = CString[uint32::] - - -.. _context-reference: - -Context -------- - -The context is another core element of this framework, utilized to store all relevant variables needed during the -process of packing or unpacking objects. The top-level :meth:`~caterpillar.model.unpack` and :meth:`~caterpillar.model.pack` methods are designed to -create the context themselves with some pre-defined (internal) fields. - -.. admonition:: Implementation Note - - :class:`Context` objects are essentially :code:`dict` objects with enhanced capabilities. Therefore, all - operations supported on dictionaries are applicable. - -The context enables special attribute-like access using :code:`getattr` if the attribute wasn't defined in the -instance directly. All custom attributes are stored in the dictionary representation of the instance. - -.. attribute:: CTX_PARENT - :value: "_parent" - - All :class:`Context` instances *SHOULD* contain a reference to the parent context. If the returned reference is - :code:`None`, it can be assumed that the current context is the root context. If this attribute is set, it - *MUST* point to a :class:`Context` instance. - -.. attribute:: CTX_OBJECT - :value: "_obj" - - When packing or unpacking objects, the current object attributes are stored within the *object* context. This - is a special context that allows access to previously parsed fields or attributes of the input object. To - minimize the number of calls using this attribute, a shortcut named :code:`this` was defined, which - automatically inserts a path to the object context. - - -.. attribute:: CTX_STREAM - :value: "_io" - - The input or output stream *MUST* be set in each context instance to prevent access errors on missing stream - objects. - - .. seealso:: - Discussion on `Github `_ why this attribute has - to be set in every context instance. - -.. attribute:: CTX_PATH - :value: "_path" - - Although it is optional to provide the current parsing or building path, it is *recommended*. All nesting - structures implement a behavior that automatically adds a sub-path while packing or unpacking. Special - names are :code:`""` for the starting path and :code:`""` for greedy sequence elements. - -.. attribute:: CTX_FIELD - :value: "_field" - - In case a struct is linked to a field, the :class:`~caterpillar.fields.Field` instance will always set this context variable - to be accessible from within the underlying struct. - - -.. attribute:: CTX_INDEX - :value: "_index" - - When packing or unpacking collections of elements, the current working index is given under this context - variable. This variable is set only in this specific situation. - - -.. attribute:: CTX_VALUE - :value: "_value" - - In case a switch-case statement is activated in a field, the context will receive the parsed value in this - context variable temporarily. - -.. attribute:: CTX_POS - :value: "_pos" - - Currently undefined. - -.. attribute:: CTX_OFFSETS - :value: "_offsets" - - **Internal use only:** This special member is only set in the root context and stores all packed objects that - should be placed at an offset position. - -.. _context_lambda: - -Context lambda -^^^^^^^^^^^^^^ - -Dynamic sized structs are supported by this library using the power of so-called *context lambdas*. This library -introduces a special callable :class:`_ContextLambda`, that takes a :class:`Context` instance and returns the -desired result. To mimic a *context lambda*, the :meth:`__call__` method has to be implemented. - -Dynamic-sized structs are supported by this library using the power of so-called *context lambdas*. This library -introduces a special callable :class:`_ContextLambda` that takes a :class:`Context` instance and returns the # -desired result. To mimic a *context lambda*, the :meth:`__call__` method has to be implemented. - -.. function:: object.__call__(self, context) - - This library does not distinguish between callable objects and *context lambdas*. They are treated as the - same class (this aspect is under subject to changes). - - -Context path -^^^^^^^^^^^^ - -The path of a context is a specialized form of a :ref:`context_lambda` and supports lazy evaluation of most -operators (conditional ones excluded). Once called, they try to retrieve the requested value from within -the given :class:`Context` instance. Below is a list of default paths designed to provide a relatively easy -way to access the context variables. - -.. attribute:: ctx - :value: "" - - This special path acts as a wrapper to access all variables within the top-level :class:`Context` object. - -.. attribute:: this - :value: "_obj" - - As described before, a special *object context* is created when packing or unpacking structs that store - more than one field. - -.. attribute:: parent - :value: "_parent._obj" - - A shortcut to access the object context of the parent context. - - -.. _ref-templates: - -Templates ---------- - -A specialized form of structs are *templates*, which are basically generic Python classes. Think of them -as blueprints for your final classes/structs that contain placeholders for actual types. As in C++, a -template needs type arguments, in this case we will name them :class:`~caterpillar.model.TemplateTypeVar`. - -Actually, there are two different types of type variables: - -* Required: - These variables are **required** when creating a new struct based on the template and they - can be used as positional arguments within the type derivation. - -* Positional: - These arguments are usable only as keyword arguments and are may be optional if a default value - is supplied. - -These template type variables can be created using simple variable definitions: - ->>> A = TemplateTypeVar("A") - -.. important:: - A template class is **not** a struct definition. It specifies a blueprint for the final class. - -A template class is defined like a struct, union or bitfield class, but without being a -dataclass nor storing a struct instance. - -.. code-block:: python - - >>> @template(A, "B") - ... class FormatTemplate: - ... foo: A - ... bar: B - ... baz: uint32 - ... - -The defined class then can be used to create new classes based on the provided class -structure. For instance, - -.. code-block:: python - - >>> Format = derive(FormatTemplate, A=uint32, B=uint8) - >>> Format - - -will return an anonymous class (in this case). Normally, *caterpillar* tries to infer the -variable name from the current module (if :code:`name=...`). In summary, every time -:meth:`~caterpillar.model.derive` is called, a new class will be created if not already -defined. - -The current implementation will place template information about the current class using -a special class attribute: :attr:`~class.__template__`. - -To support sub-classes of templates, we can declare a derived class as partial: - -.. code-block:: python - - >>> Format32 = derive(FormatTemplate, A=uint32, partial=True) - -Again, the resulting class is **not** a struct, but another template class. - -.. admonition:: Developer's note - - By now, a template won't copy existing field documentation comments. Therefore, you - can't display inherited members using sphinx. - -Special method names -==================== - -A class can either extend :class:`_StructLike` or implement the special methods needed -to act as a struct. The subsequent sections provide an overview of all special methods -and attributes introduced by this library. Further insights into extending structs with -custom operators can be found in :ref:`operators`. - -Emulating Struct Types ----------------------- - -.. method:: object.__pack__(self, obj, context) - - Invoked to serialize the given object into an output stream, :meth:`~object.__pack__` - is designed to implement the behavior necessary for packing a collection of elements - or a single element. Accordingly, the input obj may be an :code:`Iterable` or a - singular element. - - The absence of a standardized implementation for deserializing a collection of elements - is deliberate. For example, all instances of the :class:`FormatField` utilize the Python - library `struct`_ internally to pack and unpack data. To optimize execution times, a - collection of elements is packed and unpacked in a single call, rather than handling each - element individually. - - The context must incorporate specific members, mentioned in :ref:`context`. Any data - input verification is implemented by the corresponding class. - - :meth:`~__pack__` is invoked by the :code:`pack()` method defined within this library. - Its purpose is to dictate how input objects are written to the stream. It is crucial - to note that the outcome of this function is ignored. - - .. versionchanged:: beta - The *stream* parameter has been removed and was instead moved into the context. - -.. method:: object.__unpack__(self, context) - - Called to desersialize objects from an input stream (the stream is stored in the given context). - The result of :meth:`~object.__unpack__` is not going to be ignored. - - Every implementation is tasked with the decision of whether to support the deserialization - of multiple elements concurrently. By default, the :class:`~caterpillar.fields.Field` class stores all essential - attributes required to determine the length of elements set for unpacking. The :meth:`~__unpack__` - method is activated through the :code:`unpack()` operation, integrated with the default - struct classes โ€” namely, :class:`~caterpillar.model.Sequence`, :class:`~caterpillar.model.Struct`, and :class:`~caterpillar.fields.Field`. - - .. versionchanged:: beta - The *stream* parameter has been removed and was instead moved into the context. - -.. method:: object.__size__(self, context) - - This method serves the purpose of determining the space occupied by this struct, - expressed in bytes. The availability of a context enables the execution of a - :class:`_ContextLambda`, offering support for dynamically sized structs. Furthermore, - for the explicit definition of dynamic structs, the option to raise a :class:`DynamicSizeError` - is provided. - - -.. _struct_type: - -Customizing the struct's type -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. method:: object.__type__(self) - - The configuration of *Structs* incorporates type replacement before a dataclass is - created. This feature was specifically introduced for documentation purposes. - The optional :meth:`~object.__type__` method allows for the specification of a - type, with the default being :code:`Any` if not explicitly defined. - - .. note:: - The implementation of the :meth:`~object.__type__` method is optional and, - therefore, not mandatory as per the library's specifications. - - The following example demonstrates the use of the `sphinx-autodoc`_ extension to document - struct classes with the :code:`S_REPLACE_TYPE` option enabled. Only documented members - are displayed. - - .. code-block:: rst - - .. autoclass:: examples.formats.nibarchive.NIBHeader() - :members: - - Will be displayed as: - - .. autoclass:: examples.formats.nibarchive.NIBHeader() - :members: - :no-undoc-members: - - In this illustration, the extra parentheses at the end are included to prevent the - automatic creation of constructors. - - -Struct containers -^^^^^^^^^^^^^^^^^ - -.. attribute:: class.__struct__ - - All models annotated with either :code:`@struct` or :code:`@bitfield` fall into the - category of *struct containers*. These containers store the additional class attribute - :func:`~class.__struct__`. - - Internally, any types utilizing this attribute can be employed within a struct, bitfield, - or sequence definition. The type of the stored value must be a subclass of :class:`_StructLike`. - - -Template Containers -^^^^^^^^^^^^^^^^^^^ - -.. attribute:: class.__template__ - - All template classes store information about the used template type variables. Whether they - are required or just positional. In addition, default inferred types are stored as well. - -BitField specific methods -------------------------- - -The introduced :class:`~caterpillar.model.BitField` class is special in many different ways. One key -attribute is its fixed size. To determine the size of a struct, it leverages a special -member, which can be either a function or an attribute. - -.. method:: object.__bits__(self) - - Called to measure the bit count of the current object. :meth:`~object.__bits__` - serves as the sole requirement for the defined fields in the current implementation - of the :class:`~caterpillar.model.BitField` class. - - .. note:: - This class member can also be expressed as an attribute. The library automatically - adapts to the appropriate representation based on the context. - - -Customizing the object's byteorder ----------------------------------- - -.. attribute:: object.__byteorder__ - - The byteorder of a struct can be temporarily configured using the corresponding - operator. It is important to note that this attribute is utilized internally and - should not be used elsewhere. - - .. code-block:: python - - >>> struct = BigEndian | struct # Automatically sets __byteorder__ +.. toctree:: + :caption: Standard Data Model -.. method:: object.__set_byteorder__(self, byteorder) + datamodel/standard - In contrast to the attribute :attr:`~object.__byteorder__`, the :meth:`~object.__set_byteorder__` - method is invoked to apply the current byteorder to a struct. The default behavior, - as described in :class:`FieldMixin`, is to return a new :class:`~caterpillar.fields.Field` instance with - the byteorder applied. Note the use of another operator here. - >>> field = BigEndian + struct +.. toctree:: + :caption: Caterpillar-specific Protocols + datamodel/protocols.rst -Modifying fields ----------------- +.. toctree:: + :hidden: -.. attribute:: field.__name__ + datamodel/processing_classes.rst - The name of a regular field is not explicitly specified in a typical attribute but is - instead set using a dedicated one. This naming convention is automatically applied by - all default :class:`~caterpillar.model.Sequence` implementations. The name can be retrieved through the - use of :code:`field.__name__`. .. _struct: https://docs.python.org/3/library/struct.html .. _sphinx-autodoc: https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html -.. _Stringizes: https://docs.python.org/3/howto/annotations.html#manually-un-stringizing-stringized-annotations -.. _Python Classes: https://docs.python.org/3/reference/compound_stmts.html#class .. _Ellipsis: https://docs.python.org/3/library/constants.html#Ellipsis \ No newline at end of file diff --git a/docs/sphinx/source/reference/datamodel/bitfield.rst b/docs/sphinx/source/reference/datamodel/bitfield.rst new file mode 100644 index 00000000..3fbeda2c --- /dev/null +++ b/docs/sphinx/source/reference/datamodel/bitfield.rst @@ -0,0 +1,286 @@ +.. _datamodel_standard_bitfield: + +Bit-field +========= + + +A *Bit-field*, despite its name suggesting a field of bits, is a powerful structure designed for +detailed byte inspection. Similar to other structures, it is a finite collection of named fields. This +section will introduce potential challenges associated with the implementation of a :class:`~caterpillar.model.Bitfield` +and explains its behavior. + +.. versionchanged:: 2.5.0 + Completely reworked the internal :class:`Bitfield` behaviour, model and processing. + +Concept +------- + +Each Bitfield instance maintains a sequence of bitfield groups, where each group +contains a collection of sized fields. A bitfield group may consist of either multiple +entries (i.e., any types that can be converted to an integral type) or a single +:class:`_StructLike` object. For example, consider the following bitfield definition: + +.. code-block:: python + + @bitfield + class Format: + a1: 2 + a2: 1 + _ : 0 + b1: char + c1: uint32 + +This Bitfield definition will generate three distinct bitfield groups (labeled here as +groups a, b, and c). By default, bitfields use 8-bit alignment, leading to the following +layout: + +.. code-block:: + + Group Pos Bits + a 0x00 8 + b 0x01 8 + c 0x02 32 + +Internally, only the first group requires special bit-level parsing. The remaining groups +(b and c) are treated as standard structures since they span full bytes or words without +sub-byte alignment. This dynamic grouping mechanism allows leveraging full struct-like +class definitions within bitfields. + +Syntax +------ + +This new approach enables more complex and expressive bitfield definitions. The annotation +syntax is therefore extended as follows: + +.. raw:: html + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ name + : + bits + - + field + (optional) + (1) + +
+
+ name + : + 0 + (2) + +
+
+ name + : + field-or-action + (3) + +
+
+ name + : + ( + field + , + factory + ) + (4) + +
+
+ name + : + ( + bits + , + factory + (optional) + , + options + (optional) + ) + (5) + +
+
+ +Each identifier named above is bound to certain constraints: + +.. raw:: html + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
name + - + Any valid Python attribute name. +
bits + - + Any valid positive integer starting from 0. +
field + - + + Any valid field definition for a struct resulting in a + _StructLike + that implements the subtract-operation resulting in a + Field + instance. +
field-or-action + - + + Any valid field definition for a struct resulting or an object implementing the + _ActionLike + protocol. +
factory + - + + Either a target Python type to use as factory or an instance of + BitfieldValueFactory. +
options + - + + One or more options represented either as flags or instances of + SetAlignment. +
+ +What that syntax allows can be seen in the following example: + +.. code-block:: python + :caption: Bit-field definition using all syntax rules + + @bitfield + class Format: # -. + # Any annotation that can be converted by a TypeConverter | 1. Group + # into as _StructLike object is allowed. | (3 Bytes) + magic: b"foo" # bytes -' + + # Syntax according to (1) and (2) -. + version : 4 # int | 2. Group + state : 3 # int | (1 Byte / 8 Bits) + _ : 0 # ignored/removed -' + + # Extended Syntax (5) + flag1 : (1, SetAlignment(16)) # bool, new alignment -. + flag2 : 1 # bool | 3. Group + name : (12, CharFactory) # str | (2 Bytes / 16 Bits) + type : (2, SimpleEnum, EndGroup) # SimpleEnum -' + + +Processing Rules +---------------- + +Based on the previously defined syntax, some processing constraints were derived: + +- Rule 1.: + - Default alignment is 1 byte (8 bits). + - Zero (``0``) bits are prohibited. + - If followed by a (2) declaration, the remaining bits in the current byte are padded. + - If a ```` is provided: + - typeof(````) is used to infer the factory. + - :func:`~caterpillar.model.getbits` and :func:`~cateprillar.model.sizeof` determine the field's alignment. + - If a custom alignment is configured in the Bitfield constructor, inferred alignment is ignored unless the field includes the :attr:`~cateprillar.options.B_OVERWRITE_ALIGNMENT` option. + - If the :attr:`~cateprillar.options.B_GROUP_END` option is set, the current group is finalized and a new one is started. + +- Rule 2.: + - This rule forces alignment to the next byte boundary. + - The field is ignored during final class generation (name is discarded). + - The current group is finalized unless the bitfield is configured with :attr:`~caterpillar.options.B_GROUP_KEEP` + +- Rule 3.: + - Equivalent to struct-like class field definitions. + - Automatically implies a rule 2 alignment. + - Always finalizes the current group regardless of :attr:`~caterpillar.options.B_GROUP_KEEP`. + +- Rule 4.: + - Extension of (1). + - Explicitly defines a conversion factory for the field. + - The factory must be: + - A built-in type (e.g., int, bool) supporting ``__int__``, or + - A type or instance of :class:`BitfieldValueFactory`. + +- Rule 5.: + - Builds upon (4) with support for options. + - Options can be passed as a list or single element. + - Supported Options: + - :attr:`~caterpillar.model.NewGroup`: Aligns the current group, starts a new one, and adds the entry to it. + - :attr:`~caterpillar.model.EndGroup`: Adds the entry to the current group, then aligns it. + - :class:`SetAlignment`: Changes the current working alignment. + - Note: Option order affects behavior and must be considered carefully. diff --git a/docs/sphinx/source/reference/datamodel/processing_classes.rst b/docs/sphinx/source/reference/datamodel/processing_classes.rst new file mode 100644 index 00000000..c3967de6 --- /dev/null +++ b/docs/sphinx/source/reference/datamodel/processing_classes.rst @@ -0,0 +1,170 @@ +.. _datamodel_processing: + +Processing-related Types +======================== + + +Field +----- + +The next core element of this library is the *Field*. It serves as a context storage to store configuration data +about a struct. Even sequences and structs can be used as fields. The process is straightforward: each custom operator +creates an instance of a :class:`~caterpillar.fields.Field` with the applied configuration value. Most of the time, this value can be +static or a :ref:`context_lambda`. A field implements basic behavior that should not be duplicated, such as +conditional execution, exception handling with default values, and support for a built-in switch-case structure. + +As mentioned earlier, some primitive structs depend on being linked to a :class:`~caterpillar.fields.Field`. This is because all +configuration elements are stored in a :class:`~caterpillar.fields.Field` instance rather than in the target struct instance. More +information about each supported configuration can be found in :ref:`operators`. + +.. _greedy: + +Greedy +------ + +This library provides direct support for *greedy* parsing. Leveraging Python's syntactic features, this special form +of parsing is enabled using the `Ellipsis`_ (:code:`...`). All previously introduced structs implement greedy parsing +when enabled. + +>>> field = uint8[...] + +This special type can be used in places where a length has to be specified. Therefore, it can be applied to all array +:code:`[]` declarations and constructors that take the length as an input argument, such as :class:`CString`, for +example. + +.. code-block:: python + + >>> field = Field(CString(...)) + >>> unpack(field, b"abcd\x00") + 'abcd' + +.. _prefixed: + +Prefixed +-------- + +In addition to greedy parsing, this library supports prefixed packing and unpacking as well. With *prefixed*, we refer +to the length of an array of elements that should be parsed. In this library, the :code:`slice` class is to achieve a +prefix option. + +>>> field = CString[uint32::] + + +.. _context-reference: + +Context +------- + +The context is another core element of this framework, utilized to store all relevant variables needed during the +process of packing or unpacking objects. The top-level :meth:`~caterpillar.model.unpack` and :meth:`~caterpillar.model.pack` methods are designed to +create the context themselves with some pre-defined (internal) fields. + +.. admonition:: Implementation Note + + :class:`Context` objects are essentially :code:`dict` objects with enhanced capabilities. Therefore, all + operations supported on dictionaries are applicable. + +The context enables special attribute-like access using :code:`getattr` if the attribute wasn't defined in the +instance directly. All custom attributes are stored in the dictionary representation of the instance. + +.. attribute:: CTX_PARENT + :value: "_parent" + + All :class:`Context` instances *SHOULD* contain a reference to the parent context. If the returned reference is + :code:`None`, it can be assumed that the current context is the root context. If this attribute is set, it + *MUST* point to a :class:`Context` instance. + +.. attribute:: CTX_OBJECT + :value: "_obj" + + When packing or unpacking objects, the current object attributes are stored within the *object* context. This + is a special context that allows access to previously parsed fields or attributes of the input object. To + minimize the number of calls using this attribute, a shortcut named :code:`this` was defined, which + automatically inserts a path to the object context. + + +.. attribute:: CTX_STREAM + :value: "_io" + + The input or output stream *MUST* be set in each context instance to prevent access errors on missing stream + objects. + + .. seealso:: + Discussion on `Github `_ why this attribute has + to be set in every context instance. + +.. attribute:: CTX_PATH + :value: "_path" + + Although it is optional to provide the current parsing or building path, it is *recommended*. All nesting + structures implement a behavior that automatically adds a sub-path while packing or unpacking. Special + names are :code:`""` for the starting path and :code:`""` for greedy sequence elements. + +.. attribute:: CTX_FIELD + :value: "_field" + + In case a struct is linked to a field, the :class:`~caterpillar.fields.Field` instance will always set this context variable + to be accessible from within the underlying struct. + + +.. attribute:: CTX_INDEX + :value: "_index" + + When packing or unpacking collections of elements, the current working index is given under this context + variable. This variable is set only in this specific situation. + + +.. attribute:: CTX_VALUE + :value: "_value" + + In case a switch-case statement is activated in a field, the context will receive the parsed value in this + context variable temporarily. + +.. attribute:: CTX_POS + :value: "_pos" + + Currently undefined. + +.. attribute:: CTX_OFFSETS + :value: "_offsets" + + **Internal use only:** This special member is only set in the root context and stores all packed objects that + should be placed at an offset position. + +.. attribute:: CTX_ROOT + :value: "_root" + + .. versionadded:: 2.5.0 + + Special attribute set to specify the root context. If this attribute is not present, the current ``Context`` instance + will be returned. + + +Context path +------------ + +The path of a context is a specialized form of a :ref:`context_lambda` and supports lazy evaluation of most +operators (conditional ones excluded). Once called, they try to retrieve the requested value from within +the given :class:`Context` instance. Below is a list of default paths designed to provide a relatively easy +way to access the context variables. + +.. attribute:: ctx + :value: "" + + This special path acts as a wrapper to access all variables within the top-level :class:`Context` object. + +.. attribute:: this + :value: "_obj" + + As described before, a special *object context* is created when packing or unpacking structs that store + more than one field. + +.. attribute:: parent + :value: "_parent._obj" + + A shortcut to access the object context of the parent context. + + + + +.. _Ellipsis: https://docs.python.org/3/library/constants.html#Ellipsis \ No newline at end of file diff --git a/docs/sphinx/source/reference/datamodel/protocols.rst b/docs/sphinx/source/reference/datamodel/protocols.rst new file mode 100644 index 00000000..830e1816 --- /dev/null +++ b/docs/sphinx/source/reference/datamodel/protocols.rst @@ -0,0 +1,16 @@ +.. _reference_datamodel_protocols: + +Caterpillar's Protocols +======================= + +.. toctree:: + :caption: Protocols + :maxdepth: 1 + + protocols/actionlike + protocols/contextlike + protocols/contextlambda + protocols/structlike + protocols/bitfield_extensions + protocols/byteorder_extensions + diff --git a/docs/sphinx/source/reference/datamodel/protocols/actionlike.rst b/docs/sphinx/source/reference/datamodel/protocols/actionlike.rst new file mode 100644 index 00000000..1507edba --- /dev/null +++ b/docs/sphinx/source/reference/datamodel/protocols/actionlike.rst @@ -0,0 +1,45 @@ +.. _ref_datamodel_protocol_actionlike: + +Action-like Objects +=================== + +Action-like objects provide a flexible mechanism for performing custom operations +during data processing. Rather than directly reading, writing, or storing a value, +actions are defined to modify or interact with the data at various stages +of serialization or deserialization. + +There are generally two kinds of actions that can be implemented: + +.. method:: object.__action_pack__(self, context) + + Invoked when data is serialized. This method can be used for tasks + such as calculating checksums, logging.. + +.. method:: object.__action_unpack__(self, context) + + Invoked when data is deserialized. This method is typically used + for validation, verification, or any other operation that should run during + unpacking process. + +.. note:: + + To implement an action-like object, only one of these methods needs to be defined; + defining both is optional. + + +Derived Protocols +----------------- + +.. py:class:: _ActionLike + + .. py:function:: __action_pack__(self, context: _ContextLike) -> None + __action_unpack__(self, context: _ContextLike) -> None + +.. py:class:: _SupportsActionUnpack + + .. py:function:: __action_unpack__(self, context: _ContextLike) -> None + + +.. py:class:: _SupportsActionPack + + .. py:function:: __action_pack__(self, context: _ContextLike) -> None diff --git a/docs/sphinx/source/reference/datamodel/protocols/bitfield_extensions.rst b/docs/sphinx/source/reference/datamodel/protocols/bitfield_extensions.rst new file mode 100644 index 00000000..e750d487 --- /dev/null +++ b/docs/sphinx/source/reference/datamodel/protocols/bitfield_extensions.rst @@ -0,0 +1,36 @@ +.. _ref_datamodel_protocol_bitfield_ext: + +Bit-field Extensions +==================== + +The introduced :class:`~caterpillar.model.Bitfield` class is special in many different ways. One key +attribute is its fixed size. To determine the size of a struct, it leverages a special +member, which can be either a function or an attribute. + +.. method:: object.__bits__(self) + + Called to measure the bit count of the current object. :meth:`~object.__bits__` + serves as the sole requirement for the defined fields in the current implementation + of the :class:`~caterpillar.model.Bitfield` class. + + .. note:: + This class member can also be expressed as an attribute. The library automatically + adapts to the appropriate representation based on the context. + +.. method:: object.__signed__(self) + + *Proposed for future use - currently unused* + +Derived Protocols +----------------- + +.. py:class:: _SupportsBits + + .. py:function:: __bits__(self) -> int + + +.. py:class:: _ContainsBits + + .. py:attribute:: __bits__ + :type: int + diff --git a/docs/sphinx/source/reference/datamodel/protocols/byteorder_extensions.rst b/docs/sphinx/source/reference/datamodel/protocols/byteorder_extensions.rst new file mode 100644 index 00000000..9d14c695 --- /dev/null +++ b/docs/sphinx/source/reference/datamodel/protocols/byteorder_extensions.rst @@ -0,0 +1,24 @@ +.. _ref_datamodel_protocol_byteorder_ext: + +Byteorder Extensions +==================== + +.. attribute:: object.__byteorder__ + + The byteorder of a struct can be temporarily configured using the corresponding + operator. It is important to note that this attribute is utilized internally and + should not be used elsewhere. + + .. code-block:: python + + >>> struct = BigEndian | struct # Automatically sets __byteorder__ + + +.. method:: object.__set_byteorder__(self, byteorder) + + In contrast to the attribute :attr:`~object.__byteorder__`, the :meth:`~object.__set_byteorder__` + method is invoked to apply the current byteorder to a struct. The default behavior, + as described in :class:`FieldMixin`, is to return a new :class:`~caterpillar.fields.Field` instance with + the byteorder applied. Note the use of another operator here. + + >>> field = BigEndian + struct diff --git a/docs/sphinx/source/reference/datamodel/protocols/contextlambda.rst b/docs/sphinx/source/reference/datamodel/protocols/contextlambda.rst new file mode 100644 index 00000000..b118047f --- /dev/null +++ b/docs/sphinx/source/reference/datamodel/protocols/contextlambda.rst @@ -0,0 +1,23 @@ +.. _context_lambda: + +Context Lambdas +=============== + +Dynamic-sized structs are supported by this library through the use of so-called +*context lambdas*. The library introduces a special callable, :class:`_ContextLambda`, +which accepts a :class:`_ContextLike` instance and returns the desired result. + +To implement a *context lambda*, a class must define the :meth:`__call__` method. + +.. method:: object.__call__(self, context) + + The library does not distinguish between general callable objects and *context lambdas*; + both are treated as instances of the same class. + + +Derived Protocols +----------------- + +.. py:class:: _ContextLambda[_RT] + + .. py:function:: __call__(self, context: _ContextLike) -> _RT diff --git a/docs/sphinx/source/reference/datamodel/protocols/contextlike.rst b/docs/sphinx/source/reference/datamodel/protocols/contextlike.rst new file mode 100644 index 00000000..75303f19 --- /dev/null +++ b/docs/sphinx/source/reference/datamodel/protocols/contextlike.rst @@ -0,0 +1,47 @@ +.. _ref_datamodel_protocol_contextlike: + +Context-like Objects +==================== + +Caterpillar provides a dedicated protocol for working with context-like objects. +This protocol enables seamless access to context variables during data packing +and unpacking operations. It is consistently implemented by all context-related +classes within this package. + +.. method:: object.__context_getattr__(self, path) + + Retrieves a value from the context based on the given path. It is + implementation-dependent whether nested paths are supported; by default, + multiple path elements are separated by a single dot. + + For example, given the path :code:`"foo.bar"`, the context implementation + should first resolve the value associated with :code:`"foo"`, then retrieve + the :code:`"bar"` attribute from that result. + +.. method:: object.__context_setattr__(self, path, value) + + Sets the value of a context variable specified by the given path. + Similar to :meth:`__context_getattr__`, multiple path elements are + separated by a dot by default. Implementations should resolve the + intermediate path components and update the target variable with the + provided value. + +.. attribute:: object._root + + References the root context object. This attribute provides access to the + top-level context, which can be useful for resolving global context + variables or for operations that require awareness of the entire + context hierarchy. If no root context has been set, the current instance + will be returned. + + +Derived Protocols +----------------- + +.. py:class:: _ContextLike + + .. py:attribute:: _root: _ContextLike | None + .. py:function:: __context_getattr__(self, path: str) -> Any: ... + __context_setattr__(self, path: str, value: Any) -> None: ... + __getitem__(self, key, /) -> Any: ... + __setitem__(self, key, value: Any, /) -> None: ... \ No newline at end of file diff --git a/docs/sphinx/source/reference/datamodel/protocols/structlike.rst b/docs/sphinx/source/reference/datamodel/protocols/structlike.rst new file mode 100644 index 00000000..55413e0a --- /dev/null +++ b/docs/sphinx/source/reference/datamodel/protocols/structlike.rst @@ -0,0 +1,157 @@ +.. _ref_datamodel_protocol_structlike: + +Struct-Like Objects +=================== + + +The :class:`_StructLike` protocol can be used to emulate struct types. Even though, :func:`pack` +and :func:`unpack` allow so-called *partial* struct-like objects, there won't be a conversion +within struct class definitions. It is always recommended to implement all methods conforming +to the :code:`_StructLike` protocol. + +.. _struct_type: + +Special Methods for Struct-Like objects +--------------------------------------- + +.. method:: object.__pack__(self, obj, context) -> None + + Invoked to serialize the given object into an output stream, :meth:`~object.__pack__` + is designed to implement the behavior necessary for packing a collection of elements + or a single element. Accordingly, the input obj may be an :code:`Iterable` or a + singular element. + + The absence of a standardized implementation for deserializing a collection of elements + is deliberate. For example, all instances of the :class:`PyStructFormattedField` utilize the Python + library `struct`_ internally to pack and unpack data. To optimize execution times, a + collection of elements is packed and unpacked in a single call, rather than handling each + element individually. + + The context must incorporate specific members, mentioned in :ref:`context`. Any data + input verification is implemented by the corresponding class. + + :meth:`~__pack__` is invoked by the :code:`pack()` method defined within this library. + Its purpose is to dictate how input objects are written to the stream. It is crucial + to note that the outcome of this function is ignored. + + .. versionchanged:: beta + The *stream* parameter has been removed and was instead moved into the context. + + +.. method:: object.__unpack__(self, context) + + Called to desersialize objects from an input stream (the stream is stored in the given context). + The result of :meth:`~object.__unpack__` is not going to be ignored. + + Every implementation is tasked with the decision of whether to support the deserialization + of multiple elements concurrently. By default, the :class:`~caterpillar.fields.Field` class stores all essential + attributes required to determine the length of elements set for unpacking. The :meth:`~__unpack__` + method is activated through the :code:`unpack()` operation, integrated with the default + struct classes โ€” namely, :class:`~caterpillar.model.Sequence`, :class:`~caterpillar.model.Struct`, and + :class:`~caterpillar.fields.Field`. + + .. versionchanged:: beta + The *stream* parameter has been removed and was instead moved into the context. + + +.. method:: object.__size__(self, context) + + This method serves the purpose of determining the space occupied by this struct, + expressed in bytes. The availability of a context enables the execution of a + :class:`_ContextLambda`, offering support for dynamically sized structs. Furthermore, + for the explicit definition of dynamic structs, the option to raise a :class:`DynamicSizeError` + is provided. + +.. method:: object.__type__(self) + + The configuration of *Structs* incorporates type replacement before a dataclass is + created. This feature was specifically introduced for documentation purposes. + The optional :meth:`~object.__type__` method allows for the specification of a + type, with the default being :code:`Any` if not explicitly defined. + + .. note:: + The implementation of the :meth:`~object.__type__` method is optional and, + therefore, not mandatory as per the library's specifications. + + The following example demonstrates the use of the `sphinx-autodoc`_ extension to document + struct classes with the :code:`S_REPLACE_TYPE` option enabled. Only documented members + are displayed. + + .. code-block:: rst + + .. autoclass:: examples.formats.nibarchive.NIBHeader() + :members: + + Will be displayed as: + + .. autoclass:: examples.formats.nibarchive.NIBHeader() + :members: + :no-undoc-members: + + In this illustration, the extra parentheses at the end are included to prevent the + automatic creation of constructors. + + +Struct containers +^^^^^^^^^^^^^^^^^ + +.. attribute:: class.__struct__ + + All models annotated with either :code:`@struct` or :code:`@bitfield` fall into the + category of *struct containers*. These containers store the additional class attribute + :func:`~class.__struct__`. + + Internally, any types utilizing this attribute can be employed within a struct, bitfield, + or sequence definition. The type of the stored value must be conforming to the :class:`_StructLike` protocol. + + +Template Containers +^^^^^^^^^^^^^^^^^^^ + +.. attribute:: class.__template__ + + All template classes store information about the used template type variables. Whether they + are required or just positional. In addition, default inferred types are stored as well. + + +Protocols for Struct-like objects +--------------------------------- + +To represent a :code:`_StructLike` object, all previously described methods must be implemented: + +.. py:class:: _StructLike[_IT, _OT] + + .. py:function:: __pack__(self, obj: _IT, context: _ContextLike) -> None + __unpack__(self, context: _ContextLike) -> _OT + __size__(self, context: _ContextLike) -> int + + +.. py:class:: _ContainsStruct[_IT, _OT] + + .. py:attribute:: __struct__ + :type: _StructLike[_IT, _OT] + + +.. py:class:: _SupportsPack[_IT] + + .. py:function:: __pack__(self, obj: _IT, context: _ContextLike) -> None + + +.. py:class:: _SupportsUnpack[_OT] + + .. py:function:: __unpack__(self, context: _ContextLike) -> _OT + + +.. py:class:: _SupportsSize + + .. py:function:: __size__(self, context: _ContextLike) -> int + + +.. py:class:: _SupportsType + + .. py:function:: __type__(self) -> Optional[type | str] + + + +.. _struct: https://docs.python.org/3/library/struct.html +.. _sphinx-autodoc: https://www.sphinx-doc.org/en/master/usage/extensions/autodoc.html \ No newline at end of file diff --git a/docs/sphinx/source/reference/datamodel/sequence.rst b/docs/sphinx/source/reference/datamodel/sequence.rst new file mode 100644 index 00000000..94c9f2dd --- /dev/null +++ b/docs/sphinx/source/reference/datamodel/sequence.rst @@ -0,0 +1,64 @@ +.. _datamodel_standard_sequence: + +Sequence +======== + +A sequence functions independently of fields. The library introduces the :class:`~caterpillar.model.Sequence` +as a named finite collection of :class:`~caterpillar.fields.Field` objects. A *Sequence* +operates on a model, which is a string-to-field mapping by default. Later, we will discuss +the distinctions between a *Sequence* and a *Struct* regarding the model representation. + +A sequence definition entails the specification of a :class:`~caterpillar.model.Sequence` object by directly +indicating the model to use. Inheritance poses a challenge with sequences, as they are not +designed to operate on a type hierarchy. The default instantiation with all default options +involves passing the dictionary with all fields directly: + +>>> Format = Sequence({"a": uint8, "b": uint32}) + +.. admonition:: Programmers Note: + + All sequence types introduced by this library can also store so-called *unnamed* fields. + These fields are not visible in the unpacked result and are automatically packed, removing + concerns about them when the option ``S_DISCARD_UNNAMED`` is active. Their names usually + begin with an underscore and must solely contain numbers (e.g., :code:`_123`). + +The sequence follows the :class:`~caterpillar.fields.Field` configuration model, allowing sequence and +field-related options to be set. As mentioned earlier, the ``S_DISCARD_UNNAMED`` option can +be used for example to exclude all unnamed fields from the final representation. A complete +list of all configuration options and their impact can be found in :ref:`options`. + +All sequences store a configurable :class:`ByteOrder` and :class:`Arch` as architecture, +which are passed to **all** fields in the current model. For more information on why these +classes are not specified as an enum class, please refer to :ref:`byteorder`. + +Inheritance in sequences is intricate, as a :class:`~caterpillar.model.Sequence` is constructed from a dictionary +of elements. We can attempt to simulate a chain of extended *base sequences* using the +concatenation of two sequences. The :meth:`~sequence.__add__` method will *import* all fields +from the other specified sequence. The only disadvantage is the placement required by the +operator. For instance: + +.. code-block:: python + + >>> BaseFormat = Sequence({"magic": b"MAGIC", "a": uint8}) + >>> Format = Sequence({"b": uint32, "c": uint16}) + BaseFormat + +will result in the following field order: + +.. code-block:: python + + >>> list(Format.get_members()) + ['b', 'c', 'magic', 'a'] + +which is not the intended order. The correct order should be :code:`['magic', 'a', 'b', 'c']`. +This can be achieved by using the :code:`BaseFormat` instance as the first operand. + +.. warning:: + This will alter the *BaseFormat* sequence, making it unusable elsewhere as the *base* for + all sub-sequences. Therefore, it is not recommended to use inheritance within sequences. + The :class:`~caterpillar.model.Struct` class resolves this issue with ease. + +Nesting sequences is allowed by default and can be achieved by incorporating another +:class:`~caterpillar.model.Sequence` into the model. It is important to note that *nesting* is distinct from +*inheritance*, adding an additional layer of packing and unpacking. + +>>> Format = Sequence({"other": BaseFormat, "b": uint32}) diff --git a/docs/sphinx/source/reference/datamodel/standard.rst b/docs/sphinx/source/reference/datamodel/standard.rst new file mode 100644 index 00000000..9a883112 --- /dev/null +++ b/docs/sphinx/source/reference/datamodel/standard.rst @@ -0,0 +1,17 @@ +.. _datamodel_standards: + +Standard Types +============== + +Below is a list of types provided by *Caterpillar*. These types are designed to maintain +compatibility with older versions of the library, making them particularly important. + +.. toctree:: + :caption: Standard Types + + sequence.rst + struct + union + bitfield + templates + diff --git a/docs/sphinx/source/reference/datamodel/struct.rst b/docs/sphinx/source/reference/datamodel/struct.rst new file mode 100644 index 00000000..ba1baed7 --- /dev/null +++ b/docs/sphinx/source/reference/datamodel/struct.rst @@ -0,0 +1,104 @@ +.. _datamodel_standard_struct: + +Struct +====== + +A *struct* describes a finite collection of named fields. In contrast to a *sequence*, a *struct* +utilizes Python classes as its model. The annotation feature in Python enables the definition of +custom types as annotations, enabling this special struct class to create a model solely based on +class annotations. Additionally, it generates a ``dataclass`` of the provided model, offering a +standardized string representation. + +Several differences exist between a :class:`~caterpillar.model.Sequence` and a +:class:`~caterpillar.model.Struct`, with the most significant ones highlighted below: + + +.. list-table:: Behaviour of structs and sequences + :header-rows: 1 + :widths: 10, 15, 15 + :stub-columns: 1 + + * - + - Sequence + - Struct + * - Model Type + - dict + - type + * - Inheritance + - No + - Yes + * - Attribute Access + - :code:`x["name"]` + - :code:`getattr(x, "name", None)` + * - Unpacked Type (also needed to pack) + - dict [*]_ + - instance of model + * - Documentation + - No + - Yes + + +.. [*] The unpacked values are stored inside a :class:`~caterpillar.context.Context` instance, a direct subclass of a dictionary. + +As evident from the comparison, the :class:`~caterpillar.model.Struct` class introduces new features such as +inheritance and documentation support. It's crucial to note that inheritance uses +struct types exclusively. + +The :class:`~caterpillar.model.Sequence` class implements a specific process for creating an internal representation +of the given model. The :class:`~caterpillar.model.Struct` class enhances this process by handling default values, replacing +types for documentation purposes, or removing annotation fields directly from the model. Additionally, +this class adds :attr:`~class.__struct__` to the model afterward. + +.. admonition:: Implementation Note + + If you decide to use the ``annotation`` feature from the ``__future__`` module, it is necessary to + enable :attr:`~options.S_EVAL_ANNOTATIONS` since it "`Stringizes`_" all annotations. ``inspect`` then + evaluates all strings, introducing a potential security risk. Exercise with caution when evaluating code! + +Specifying structs is as simple as defining `Python Classes`_: + +.. code-block:: python + + >>> @struct + ... class BaseFormat: + ... magic: b"MAGIC" + ... a: uint8 + ... + +Internally, a representation with all required fields and their corresponding names is +created. As :code:`b"MAGIC"` or :code:`uint8` are instances of types, the type replacement +for documentation purposes should be enabled, as shown in :ref:`struct_type`. + +As described above, this class introduces an easy-to-use inheritance system using the method +resolution order of Python: + +.. code-block:: python + + >>> @struct + ... class Format(BaseFormat): + ... b: uint32 + ... c: uint16 + ... + >>> list(Format.__struct__.get_members()) + ['magic', 'a', 'b', 'c'] + +.. admonition:: Programmers Note + + As the :class:`~caterpillar.model.Struct` class is a direct subclass of :class:`~caterpillar.model.Sequence`, nesting is supported + by default. That means, so-called *anonymous inner* structs can be defined within a class + definition. + + .. code-block:: python + + >>> @struct + ... class Format: + ... a: uint32 + ... b: {"c": uint8} + ... + + It is not recommended to use this technique as the inner structs can't be used anywhere else. + Anonymous inner union definitions are tricky and are not officially supported yet. There are + workarounds to that problem, which are discussed in the API documentation of :class:`~caterpillar.model.Sequence`. + +.. _Stringizes: https://docs.python.org/3/howto/annotations.html#manually-un-stringizing-stringized-annotations +.. _Python Classes: https://docs.python.org/3/reference/compound_stmts.html#class \ No newline at end of file diff --git a/docs/sphinx/source/reference/datamodel/templates.rst b/docs/sphinx/source/reference/datamodel/templates.rst new file mode 100644 index 00000000..efae8015 --- /dev/null +++ b/docs/sphinx/source/reference/datamodel/templates.rst @@ -0,0 +1,67 @@ +.. _ref-templates: + +Templates +========= + +A specialized form of structs are *templates*, which are basically generic Python classes. Think of them +as blueprints for your final classes/structs that contain placeholders for actual types. As in C++, a +template needs type arguments, in this case we will name them :class:`~caterpillar.model.TemplateTypeVar`. + +Actually, there are two different types of type variables: + +* Required: + These variables are **required** when creating a new struct based on the template and they + can be used as positional arguments within the type derivation. + +* Positional: + These arguments are usable only as keyword arguments and are may be optional if a default value + is supplied. + +These template type variables can be created using simple variable definitions: + +>>> A = TemplateTypeVar("A") + +.. important:: + A template class is **not** a struct definition. It specifies a blueprint for the final class. + +A template class is defined like a struct, union or bitfield class, but without being a +dataclass nor storing a struct instance. + +.. code-block:: python + + >>> @template(A, "B") + ... class FormatTemplate: + ... foo: A + ... bar: B + ... baz: uint32 + ... + +The defined class then can be used to create new classes based on the provided class +structure. For instance, + +.. code-block:: python + + >>> Format = derive(FormatTemplate, A=uint32, B=uint8) + >>> Format + + +will return an anonymous class (in this case). Normally, *caterpillar* tries to infer the +variable name from the current module (if :code:`name=...`). In summary, every time +:meth:`~caterpillar.model.derive` is called, a new class will be created if not already +defined. + +The current implementation will place template information about the current class using +a special class attribute: :attr:`~class.__template__`. + +To support sub-classes of templates, we can declare a derived class as partial: + +.. code-block:: python + + >>> Format32 = derive(FormatTemplate, A=uint32, partial=True) + +Again, the resulting class is **not** a struct, but another template class. + +.. admonition:: Developer's note + + By now, a template won't copy existing field documentation comments. Therefore, you + can't display inherited members using sphinx. \ No newline at end of file diff --git a/docs/sphinx/source/reference/datamodel/union.rst b/docs/sphinx/source/reference/datamodel/union.rst new file mode 100644 index 00000000..118d2bbc --- /dev/null +++ b/docs/sphinx/source/reference/datamodel/union.rst @@ -0,0 +1,41 @@ +.. _union-reference: + +Union +===== + +Internally constructing unions in the library poses challenges. The current implementation uses +the predefined behavior of the :class:`~caterpillar.model.Sequence` class for union types. It selects the field with +the greatest length as its representational size. *Unions*, much like *BitFields*, must store a static +size. + +**In essence, they behave similarly to C unions.** A traditional function hook will be installed on +the model to capture field assignments. What that means will be illustrated by the following example: + +.. code-block:: python + + >>> @union + ... class Format: + ... foo: uint16 + ... bar: uint32 + ... baz: boolean + ... + >>> obj = Format() # union does not need any values + +Right now, all attributes store the default value (:code:`None`). If we assign a new value to one field, it +will be applied to all others. Hence, + +>>> obj.bar = 0xFF00FF00 + +will result in + +.. code-block:: python + + >>> obj + Format(foo=65280, bar=4278255360, baz=False) + + +.. admonition:: Implementation Detail + + The constructor is the only place where there is no synchronization between fields. Additionally, the current + implementation may produce some overhead, because every *refresh* will first pack the new value and then + executes *unpack* on all other fields. \ No newline at end of file diff --git a/docs/sphinx/source/reference/index.rst b/docs/sphinx/source/reference/index.rst index 5fdf64aa..eb0e6f64 100644 --- a/docs/sphinx/source/reference/index.rst +++ b/docs/sphinx/source/reference/index.rst @@ -24,7 +24,6 @@ If you want to switch to the C API instead, please refer to the sections describ .. toctree:: :maxdepth: 2 - :numbered: :caption: C API Reference capi/extension diff --git a/docs/sphinx/source/tutorial/advanced/bitfield.rst b/docs/sphinx/source/tutorial/advanced/bitfield.rst index 4de922fe..6bef26a4 100644 --- a/docs/sphinx/source/tutorial/advanced/bitfield.rst +++ b/docs/sphinx/source/tutorial/advanced/bitfield.rst @@ -3,25 +3,97 @@ Bitfields ========= -.. attention:: - This section is still under development. +*Bitfields* are a compact way to pack multiple fields into a fixed-size binary +representation while also allowing dynamic structs in-between. -**BitFields** are a specialized feature in *Caterpillar* that allow you to define -fields at the bit level within a struct. This is particularly useful when dealing -with compact binary formats, such as network protocols, file formats, or hardware -interfaces where each bit has a specific meaning. +.. versionchanged:: 2.5.0 -BitFields allow you to specify the number of bits allocated for each field and -provide the ability to fine-tune how data is stored and retrieved from a binary -stream. This feature is useful when you need to work with bit-level manipulations, -such as when defining flags, options, or small data values packed into a single -byte or multiple bytes. + Completely reworked the concept of a *Bit-field*. The reference explains the + new concept and its implementation rules in detail: :ref:`datamodel_standard_bitfield`. -Syntax ------- + In short, the new system supports: -In *Caterpillar*, BitFields are defined using the :code:`@bitfield` decorator, and the -individual fields are specified with their respective bit widths. + - Dynamic grouping: Each Bitfield can have multiple *dynamic* bitfield groups. + - Struct-like fields: Full struct classes can be embedded. + - **Custom alignment and type factories.** + - Flexible syntax with 5 powerful rules. + + +Core Concepts +------------- + +Each Bitfield instance contains one or more *bitfield groups*. A +group is simply a collection of *bitfield entries* that are packed together based on +alignment constraints or a single :class:`~caterpillar.fields.Field`. + +How it works: Groups and Fields +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +When you define a Bitfield class, each field line describes either a simple bit-sized +entry or a more complex structure. The fields are automatically grouped based on pre-defined +syntax definitions and processing rules. + +Here is the general idea: + +- Bitfield groups collect fields until the group is finalized (e.g. because of an alignment boundary or a struct-like field). +- Within a group, fields may consume only part of a byte, or a whole number of bytes. +- Once a group is finalized, any new fields start a new group. + +This approach means you don't have to manually calculate where each field starts and ends. Instead, +the system infers layout while giving you the tools to align fields precisely when needed. Neat, isn't it? + + +The Syntax +---------- + +Each field is defined using one of five patterns: + +1. Basic fixed-size bits field: + + A standard field defines a name and the number of bits it occupies. Example:: + + @bitfield + class Format: + flag : 1 # means the field flag uses 1 bit. + +2. Basic fixed-size bits field with type from struct: + + You can specify how the bits should be interpreted using a type that will be + resolved from the given struct using the :class:`_SupportsType` protocol:: + + @bitfield + class Format: + valid : 1 - boolean # the 1-bit field should be interpreted as a bool + +3. Alignment: + + If you define a field with a size of ``0``, it forces the Bitfield to align the current group + to the next byte boundary. Afterwards, the current group is finalized and a new one is started:: + + @bitfield + class Format: + flag : 1 + _ : 0 # align to 8bits + valid : 1 - boolean # size will be 2x 8bits + +4. Struct-like Field: + + You can embed an entire struct-like class inside the Bitfield. (Any object conforming to the + :class:`_StructLike` protocol). + +5. Custom Factory with Options: + + For advanced cases, you can specify not only the number of bits and a factory (a type converter) + but also additional options to fine-tune grouping or alignment:: + + @bitfield + class Format: + # 7bits, converted to char and alignment set to 24bits + name : (7, CharFactory, SetAlignment(24)) + + +Practical Example +----------------- One practical example of using BitFields is implementing the chunk-naming convention for PNG files. Here's how you might define the options for a chunk using a bitfield structure: @@ -29,27 +101,24 @@ for PNG files. Here's how you might define the options for a chunk using a bitfi .. code-block:: python :caption: Implementing the `chunk-naming `_ convention - @bitfield(options={S_DISCARD_UNNAMED}) - class ChunkOptions: - _ : 2 # <-- first two bits are not used - ancillary : 1 # f0 - _1 : 0 - _2 : 2 - private : 1 # <-- the 5-th bit (from right to left) - _3 : 0 - _4 : 2 - reserved : 1 # f2 - _5 : 0 # <-- padding until the end of this byte - _6 : 2 - safe_to_copy : 1 # f3 + @bitfield(opions={S_DISCARD_UNNAMED}) + class ChunkOption: + _ : 2 # <-- first two bits are not used + value : 1 # automatically boolean + @struct + class ChunkOptions: + ancillary : ChunkOption # f0 + private : ChunkOption # f1 + reserved : ChunkOption # f2 + safe_to_copy : ChunkOption # f3 In the example above, each field within the :code:`ChunkOptions` class is assigned -a specific number of bits: +8bits according to the :code:`ChunkOption` bitfield. - The first two bits (:code:`_`) are unused (or "unnamed"). -- The :code:`ancillary` field uses 1 bit, representing a flag. -- Other fields like :code:`private`, :code:`reserved`, and :code:`safe_to_copy` are allocated 1 or 2 bits as needed. +- The :code:`value` field uses 1 bit, representing a flag (automatically boolean type). +- all other bits are ignored automatically Here's a breakdown of how the bits are laid out in memory: @@ -59,7 +128,7 @@ Here's a breakdown of how the bits are laid out in memory: bit : 76543210 76543210 76543210 76543210 ---------------------------------------------- breakdown: 00100000 00100000 00100000 00100000 - \/|\___/ \/|\___/ \/|\___/ \/|\___/ + โ”œโ”˜โ”‚โ””โ”€โ”ฌโ”€โ”˜ โ”œโ”˜โ”‚โ””โ”€โ”ฌโ”€โ”˜ โ”œโ”˜โ”‚โ””โ”€โ”€โ”ฌโ”˜ โ”œโ”˜โ”‚โ””โ”€โ”ฌโ”€โ”˜ u f0 a u f1 a u f2 a u f3 a Where: diff --git a/docs/sphinx/source/tutorial/index.rst b/docs/sphinx/source/tutorial/index.rst index 5487508e..81d6a97a 100644 --- a/docs/sphinx/source/tutorial/index.rst +++ b/docs/sphinx/source/tutorial/index.rst @@ -23,7 +23,6 @@ is an important resource to start from. .. toctree:: :caption: Python - :numbered: :maxdepth: 4 first_steps/index.rst diff --git a/examples/bitfield_example.py b/examples/bitfield_example.py index 039feb0d..0f209698 100644 --- a/examples/bitfield_example.py +++ b/examples/bitfield_example.py @@ -1,5 +1,5 @@ # type: ignore -from caterpillar.py import bitfield, char, int8, unpack, pack +from caterpillar.py import bitfield, CharFactory, int8, unpack, pack try: from rich import print @@ -11,15 +11,15 @@ class Format: b1: 1 # inferred uint8 type with a width of one bit _: 0 # start new uint8 with 7 unused bits - b2: 2 - char # wraps parsed int to char (string) + b2: (2, CharFactory) # wraps parsed int to char (string) b3: 3 - int8 = 1 # default value is applied -> REVISIT: necessary? _1: 3 # unnamed padding to the rest of the byte -print(Format.__struct__) +print(Format.__struct__.groups) obj = unpack(Format, b"\x80\x80") print(obj) -# prints: Format(b1=True, b2='2', b3=0, _1=0) +# prints: Format(b1=1, b2='\x02', b3=0, _1=0) # real_pos: 0123456701234567 # bit_pos: 7654321076543210 # ---------------- # right to left diff --git a/examples/formats/caf.py b/examples/formats/caf.py index d914482c..a1636080 100644 --- a/examples/formats/caf.py +++ b/examples/formats/caf.py @@ -114,7 +114,7 @@ class CAFPacketTable: remainder_frames: int32 # The VarInt configuration doesn't have to be changed, as this file format uses # the default implementation. - table_data: VarInt[this.num_packets] + table_data: vint[this.num_packets] @struct(order=BigEndian) diff --git a/pyproject.toml b/pyproject.toml index 63c6a974..09defc94 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,16 +10,18 @@ cmake.source-dir = "." [project] name = "caterpillar" -version = "2.4.5" +version = "2.5.0" description="Library to pack and unpack structurized binary data." -authors = [ - { name="MatrixEditor", email="not@supported.com" }, -] +authors = [{ name = "MatrixEditor" }] +maintainers = [{ name = "MatrixEditor" }] readme = "README.md" classifiers = [ - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: MIT License', + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", 'Programming Language :: Python :: 3.12', 'Programming Language :: Python :: 3.13', diff --git a/src/caterpillar/_C.pyi b/src/caterpillar/_C.pyi index 813ee8c8..4edd2803 100644 --- a/src/caterpillar/_C.pyi +++ b/src/caterpillar/_C.pyi @@ -1,11 +1,32 @@ -from __future__ import annotations - -from typing import Any, Optional, Collection, Union, Callable, IO, TypeVar -from enum import EnumType - -_Length = Union[int, ContextLambda, slice, Ellipsis] -ContextLambda = Callable[[Context], Any] -_ConstType = Union[Any, ContextLambda] +from _typeshed import Incomplete +from types import NotImplementedType +from typing import ( + IO, + Any, + Callable, + Collection, + Dict, + Generic, + Optional, + Protocol, + Type, + TypeVar, + Union, +) + +from caterpillar.abc import ( + _IT, + _OT, + _ContextLike, + _LengthT, + _ContextLambdaReturnT_co, + _ContextLambda, +) + +class _CContextLambda(Protocol[_ContextLambdaReturnT_co]): + def __call__(self, layer: layer) -> _ContextLambdaReturnT_co: ... + +_CTypeDesc = Union[str, Type[_OT], NotImplementedType] BIG_ENDIAN: Endian DefaultOption: DefaultOptionType @@ -23,6 +44,7 @@ S_EVAL_ANNOTATIONS: Option S_REPLACE_TYPES: Option S_SLOTS: Option S_UNION: Option +TYPE_MAP: dict boolean: Bool char: Char f16: Float @@ -44,34 +66,35 @@ u64: Int u8: Int varint: VarInt -class Context(dict): - def __init__(self, /, **kwargs) -> None: ... - def __context_getattr__(self, *args, **kwargs): ... - class Arch: name: str ptr_size: int def __init__(self, name: str, ptr_size: int) -> None: ... + def __eq__(self, other: object) -> bool: ... + def __ge__(self, other: object) -> bool: ... + def __gt__(self, other: object) -> bool: ... def __hash__(self) -> int: ... + def __le__(self, other: object) -> bool: ... + def __lt__(self, other: object) -> bool: ... + def __ne__(self, other: object) -> bool: ... -class binaryexpr: - expr: int - lhs: Union[ContextLambda, Any] - rhs: Union[ContextLambda, Any] - def __init__( - self, - expr: int, - left: Union[ContextLambda, Any], - right: Union[ContextLambda, Any], - ) -> None: ... - def __call__(self, *args, **kwargs) -> Any: ... +class Bool(builtinatom[bool, bool]): + def __init__(self) -> None: ... + +class Char(builtinatom[str, str]): + def __init__(self) -> None: ... -class ContextPath: +class Context(dict, _ContextLike): + def __init__(self, *args, **kwargs) -> None: ... + def __context_getattr__(self, *args, **kwargs): ... + def __getattribute__(self, name: str, /) -> Any: ... + +class ContextPath(_ContextLambda): path: str def __init__(self, path: str) -> None: ... def __add__(self, other) -> binaryexpr: ... def __and__(self, other) -> binaryexpr: ... - def __call__(self, context: Context) -> Any: ... + def __call__(self, context: _ContextLike): ... def __eq__(self, other: object) -> binaryexpr: ... def __floordiv__(self, other) -> binaryexpr: ... def __ge__(self, other: object) -> binaryexpr: ... @@ -81,23 +104,43 @@ class ContextPath: def __le__(self, other: object) -> binaryexpr: ... def __lshift__(self, other) -> binaryexpr: ... def __lt__(self, other: object) -> binaryexpr: ... + def __matmul__(self, *args, **kwargs) -> binaryexpr: ... def __mod__(self, other) -> binaryexpr: ... def __mul__(self, other) -> binaryexpr: ... def __ne__(self, other: object) -> binaryexpr: ... def __neg__(self) -> unaryexpr: ... def __or__(self, other) -> binaryexpr: ... - def __sub__(self, other) -> unaryexpr: ... + def __pos__(self) -> unaryexpr: ... + def __pow__(self, other) -> binaryexpr: ... + def __radd__(self, other) -> binaryexpr: ... + def __rand__(self, other) -> binaryexpr: ... + def __rfloordiv__(self, other) -> binaryexpr: ... + def __rlshift__(self, other) -> binaryexpr: ... + def __rmatmul__(self, *args, **kwargs) -> binaryexpr: ... + def __rmod__(self, other) -> binaryexpr: ... + def __rmul__(self, other) -> binaryexpr: ... + def __ror__(self, other) -> binaryexpr: ... + def __rpow__(self, other) -> binaryexpr: ... + def __rrshift__(self, other) -> binaryexpr: ... + def __rshift__(self, other) -> binaryexpr: ... + def __rsub__(self, other) -> binaryexpr: ... + def __rtruediv__(self, other) -> binaryexpr: ... + def __rxor__(self, other) -> binaryexpr: ... + def __size__(self, layer: layer) -> int: ... + def __sub__(self, other) -> binaryexpr: ... def __truediv__(self, other) -> binaryexpr: ... - def __type__(self) -> type: ... - def __xor__(self, other) -> binaryexpr: ... + def __type__(self, layer: layer) -> _CTypeDesc: ... + def __xor__(self, other): ... class DefaultOptionType: - def __init__(self) -> None: ... + @classmethod + def __init__(cls, *args, **kwargs) -> None: ... class Endian: ch: str name: str def __init__(self, ch: str, name: str) -> None: ... + def __add__(self, other): ... def __eq__(self, other: object) -> bool: ... def __ge__(self, other: object) -> bool: ... def __gt__(self, other: object) -> bool: ... @@ -105,69 +148,35 @@ class Endian: def __le__(self, other: object) -> bool: ... def __lt__(self, other: object) -> bool: ... def __ne__(self, other: object) -> bool: ... + def __radd__(self, other): ... -class atom: - def __init__(self) -> None: ... - def __pack__(self, obj: Any, context: layer) -> None: ... - def __size__(self, context: layer) -> Any: ... - def __type__(self) -> Any: ... - def __unpack__(self, context: layer) -> Any: ... - -class catom(atom): - def __init__(self) -> None: ... - def __pack__(self, obj: Any, context: layer) -> None: ... - def __pack_many__(self, obj: Collection[Any], layer: layer) -> None: ... - def __size__(self, context: layer) -> Any: ... - def __type__(self) -> Any: ... - def __unpack__(self, context: layer) -> Any: ... - def __unpack_many__(self, context: layer, lengthinfo) -> Collection[Any]: ... - -class builtinatom(catom): - def __init__(self, *args, **kwargs) -> None: ... - def __floordiv__(self, other) -> condition: ... - def __getitem__(self, index) -> repeated: ... - def __matmul__(self, *args, **kwargs) -> atoffset: ... - def __rfloordiv__(self, other) -> condition: ... - def __rmatmul__(self, *args, **kwargs) -> atoffset: ... - def __rrshift__(self, other) -> switch: ... - def __rshift__(self, other) -> switch: ... - -class Char(builtinatom): - def __init__(self, *args, **kwargs) -> None: ... - -class condition(builtinatom): - atom: Any - condition: Union[bool, ContextLambda] - def __init__(self, condition: Union[bool, ContextLambda], atom: Any) -> None: ... - def is_enabled(self, context: layer) -> bool: ... - def __set_byteorder__(self, byteorder: Endian) -> condition: ... - -class const(builtinatom): - def __init__(self, *args, **kwargs) -> None: ... +class Float(builtinatom[float, float]): + little_endian: bool + nbits: int + nbytes: int + def __init__(self, nbits: int, little_endian: bool = ...) -> None: ... + def __set_byteorder__(self, endian: Endian) -> Float: ... -class builtinatom(catom): - def __init__(self) -> None: ... - def __add__(self, endian: Endian) -> Field: ... - def __floordiv__(self, condition: Union[bool, ContextLambda]) -> Field: ... - def __getitem__(self, length: _Length) -> Field: ... - def __matmul__(self, offset: Union[ContextLambda, int]) -> Field: ... - def __or__(self, option: Option) -> Field: ... - def __radd__(self, endian: Endian) -> Field: ... - def __rfloordiv__(self, condition: Union[bool, ContextLambda]) -> Field: ... - def __rmatmul__(self, offset: Union[ContextLambda, int]) -> Field: ... - def __ror__(self, option: Option) -> Field: ... - def __rrshift__(self, switch: Union[dict, ContextLambda]) -> Field: ... - def __rshift__(self, switch: Union[dict, ContextLambda]) -> Field: ... - def __rxor__(self, option: Option) -> Field: ... - def __xor__(self, option: Option) -> Field: ... +class Int(builtinatom[int, int]): + little_endian: bool + nbits: int + nbytes: int + signed: bool + def __init__( + self, nbits: int, signed: bool = ..., little_endian: bool = ... + ) -> None: ... + def __set_byteorder__(self, endian: Endian) -> Int: ... class InvalidDefaultType: - def __init__(self) -> None: ... + @classmethod + def __init__(cls, *args, **kwargs) -> None: ... + +_OptionValueT = TypeVar("_OptionValueT", default=Any) -class Option: +class Option(Generic[_OptionValueT]): name: str - value: Optional[Any] - def __init__(self, name: str, value: Optional[Any] = ...) -> None: ... + value: Optional[_OptionValueT] + def __init__(self, name: str, value: Optional[_OptionValueT] = ...) -> None: ... def __eq__(self, other: object) -> bool: ... def __ge__(self, other: object) -> bool: ... def __gt__(self, other: object) -> bool: ... @@ -176,182 +185,199 @@ class Option: def __lt__(self, other: object) -> bool: ... def __ne__(self, other: object) -> bool: ... +class Padding(builtinatom[None, None]): + def __init__(self, pad: str) -> None: ... + class State: globals: Context io: IO offset_table: dict - def __init__(self, io: IO, **globals) -> None: ... + def __init__(self, io: Optional[IO] = ..., **globals) -> None: ... def read(self, __n: int) -> bytes: ... def seek(self, __offset: int, __whence: int = ...) -> None: ... def tell(self) -> int: ... def write(self, __data: bytes) -> None: ... -class fieldinfo: - excluded: bool - default: Any - field: atom - name: str - def __init__(self, field: atom, excluded: bool = ...) -> None: ... - -class lengthinfo: - length: int - greedy: bool - def __init__(self, length: int = ..., greedy: bool = ...) -> None: ... +_ModelT = TypeVar("_ModelT") -class Struct(builtinatom): +class Struct(Generic[_ModelT], builtinatom[_ModelT, _ModelT]): members: dict[str, fieldinfo] - model: type + model: Type[_ModelT] options: set[Option] def __init__( self, - model: type, + model: Type[_ModelT], options: set[Option] = ..., endian: Endian = ..., field_options: set[Option] = ..., alter_model: bool = ..., ) -> None: ... -class unaryexpr: - expr: int - value: Union[ContextLambda, Any] - def __init__(self, expr: int, value: Union[ContextLambda, Any]) -> Any: ... - def __call__(self, *args, **kwargs): ... - def __hash__(self) -> int: ... - -class layer: - field: Field - greedy: bool - index: int - length: int - obj: Context - parent: layer - path: str - sequence: Collection - sequential: bool - state: State - value: Any - def __init__( - self, - state: State, - field: Field = ..., - obj: Context = ..., - value: Any = ..., - path: str = ..., - sequence: Collection = ..., - parent: layer = ..., - ) -> None: ... - def __context_getattr__(self, path: str) -> Any: ... - -def typeof(obj: atom): ... -def pack_into(__obj: Any, __struct: atom, __io: IO, **globals) -> None: ... -def pack(__obj: Any, __struct: atom, **globals) -> bytes: ... -def sizeof(obj: atom, globals: Optional[dict | Context] = ...): ... -def unpack(__io: Any, __struct: atom, **globals) -> Any: ... - -class Int(builtinatom): - little_endian: bool - nbits: int - nbytes: int - signed: bool - def __init__( - self, nbits: int, signed: bool = ..., little_endian: bool = ... - ) -> None: ... - -class Float(builtinatom): - little_endian: bool - nbits: int - nbytes: int - def __init__(self, nbits: int, little_endian: bool = ...) -> None: ... - -class Padding(builtinatom): - def __init__(self, pad: int) -> None: ... - -class string(builtinatom): - encoding: str - errors: str - length: _Length - def __init__(self, length: _Length, encoding: str, errors: str = ...) -> None: ... +class VarInt(builtinatom[int, int]): + little_endian: Incomplete + lsb: Incomplete + def __init__(self, little_endian: bool = ..., lsb: bool = ...) -> None: ... + def __set_byteorder__(self, endian: Endian) -> VarInt: ... -class atoffset(builtinatom): - offset: Union[int, ContextLambda] +class atoffset(builtinatom[_IT, _OT]): + offset: Union[int, _CContextLambda[int]] whence: int def __init__( - self, offset: Union[int, ContextLambda], atom: atom, whence: int = ... + self, + offset: Union[int, _CContextLambda[int]], + atom: atom[_IT, _OT], + whence: int = ..., ) -> None: ... def get_offset(self, layer: layer) -> int: ... def __set_byteorder__(self, byteorder: Endian) -> atoffset: ... +class atom(Generic[_IT, _OT]): + def __init__(self) -> None: ... + def __pack__(self, obj: _IT, context: layer) -> None: ... + def __size__(self, context: layer) -> int: ... + def __type__(self) -> _CTypeDesc[_OT]: ... + def __unpack__(self, context: layer) -> _OT: ... -class repeated(builtinatom): - atom: Any - length: _Length - def __init__(self, atom: atom, length: _Length) -> None: ... - def __set_byteorder__(self, byteorder: Endian) -> repeated: ... +class binaryexpr(_ContextLambda): + expr: int + lhs: Union[_ContextLambda, Any] + rhs: Union[_ContextLambda, Any] + def __init__(self, expr: int, lhs, rhs) -> None: ... + def __add__(self, other): ... + def __and__(self, other): ... + def __call__(self, *args, **kwargs): ... + def __floordiv__(self, other): ... + def __invert__(self): ... + def __lshift__(self, other): ... + def __matmul__(self, *args, **kwargs): ... + def __mod__(self, other): ... + def __mul__(self, other): ... + def __neg__(self): ... + def __or__(self, other): ... + def __pos__(self): ... + def __pow__(self, other): ... + def __radd__(self, other): ... + def __rand__(self, other): ... + def __rfloordiv__(self, other): ... + def __rlshift__(self, other): ... + def __rmatmul__(self, *args, **kwargs): ... + def __rmod__(self, other): ... + def __rmul__(self, other): ... + def __ror__(self, other): ... + def __rpow__(self, other): ... + def __rrshift__(self, other): ... + def __rshift__(self, other): ... + def __rsub__(self, other): ... + def __rtruediv__(self, other): ... + def __rxor__(self, other): ... + def __sub__(self, other): ... + def __truediv__(self, other): ... + def __xor__(self, other): ... + +class builtinatom(catom[_IT, _OT]): + def __init__(self) -> None: ... + def __floordiv__( + self, condition: Union[bool, _ContextLambda[bool]] + ) -> condition[_IT, _OT]: ... + def __getitem__(self, index: _LengthT) -> repeated[_IT, _OT]: ... + def __matmul__( + self, offset: Union[int, _CContextLambda[int]] + ) -> atoffset[_IT, _OT]: ... + def __rfloordiv__( + self, other: Union[bool, _CContextLambda[bool]] + ) -> condition[_IT, _OT]: ... + def __rmatmul__( + self, offset: Union[int, _CContextLambda[int]] + ) -> atoffset[_IT, _OT]: ... + def __rrshift__(self, other) -> switch: ... + def __rshift__(self, other) -> switch: ... -class seqlayer(layer): - index: int - length: int - sequence: Collection +class catom(atom[_IT, _OT]): def __init__(self, *args, **kwargs) -> None: ... - -class switch(builtinatom): - atom: Any - cases: Union[dict[Any, Any], ContextLambda] + def __pack__(self, obj: _IT, context: layer) -> None: ... + def __pack_many__(self, obj: Collection[_IT], layer: layer) -> None: ... + def __size__(self, context: layer) -> int: ... + def __type__(self) -> Union[Type[_OT], str, NotImplementedType]: ... + def __unpack__(self, context: layer) -> _OT: ... + def __unpack_many__( + self, context: layer, lengthinfo: lengthinfo + ) -> Collection[_OT]: ... + +class computed(builtinatom[_IT, _IT]): + value: _IT + def __init__(self, value: _IT) -> None: ... + +class condition(builtinatom[Optional[_IT], Optional[_OT]]): + atom: atom[_IT, _OT] + condition: Union[bool, _ContextLambda[bool]] def __init__( - self, atom: atom, cases: Union[dict[Any, Any], ContextLambda] + self, condition: Union[bool, _ContextLambda[bool]], atom: atom[_IT, _OT] ) -> None: ... - def get_next(self, obj: Any, context: layer) -> Any: ... - def __set_byteorder__(self, byteorder: Endian) -> switch: ... + def is_enabled(self, context: layer) -> bool: ... + def __set_byteorder__(self, byteorder: Endian) -> condition[_IT, _OT]: ... + +class const(Generic[_IT], builtinatom[_IT, _IT]): + atom: atom[_IT, _IT] + value: _IT + def __init__(self, atom: atom[_IT, _IT], value: _IT) -> None: ... + def __set_byteorder__(self, endian: Endian) -> const[_IT, _IT]: ... -class cstring(builtinatom): - length: Union[_Length, atom] +class cstring(builtinatom[str, str]): + length: Union[_LengthT, atom[int, int]] encoding: str errors: str terminator: str keep_terminator: bool - def __init__( self, - length: Union[_Length, atom], + length: Union[_LengthT, atom], encoding: str = ..., errors: str = ..., sep: str = ..., keep_terminator: bool = ..., ) -> None: ... -class octetstring(builtinatom): - length: _Length - def __init__(self, length: _Length) -> None: ... +class enumeration(builtinatom[_IT, _IT]): + atom: atom[_IT, _IT] + default: Optional[_IT] + enum_type: Type[_IT] + members: Dict[str, _IT] + def __init__( + self, + atom: atom[_IT, _IT], + enum_type: Type[_IT], + default: Optional[_IT] = ..., + ) -> None: ... + def __set_byteorder__(self, endian: Endian) -> enumeration[_IT]: ... -class enumeration(builtinatom): - atom: Any - enum_type: EnumType - members: dict[Any, Any] +class fieldinfo: + excluded: bool default: Any + field: atom + name: str + def __init__(self, field: atom, excluded: bool = ...) -> None: ... + +class layer(_ContextLike): + parent: Optional[layer] + path: str + state: State def __init__( self, - atom: atom, - enum_type: EnumType, - default: Any = ..., + state: State, + path: str = ..., + parent: layer = ..., ) -> None: ... - def __set_byteorder__(self, byteorder: Endian) -> enumeration: ... - -class pstring(builtinatom): - atom: Any - encoding: str - errors: str - def __init__(self, atom: atom, encoding: str = ..., errors: str = ...) -> None: ... - def __set_byteorder__(self, byteorder: Endian) -> pstring: ... + def __context_getattr__(self, path: str) -> Any: ... -class VarInt(builtinatom): - little_endian: bool - lsb: bool - def __init__(self, little_endian: bool = ..., lsb: bool = ...) -> None: ... - def __set_byteorder__(self, byteorder: Endian) -> VarInt: ... +class lazy(builtinatom[_IT, _OT]): + always_lazy: bool + fn: Callable[[], atom[_IT, _OT]] + def __init__( + self, fn: Callable[[], atom[_IT, _OT]], always_lazy: bool = ... + ) -> None: ... + def __set_byteorder__(self, endian: Endian) -> lazy[_IT, _OT]: ... -class computed(builtinatom): - value: _ConstType - def __init__(self, value: _ConstType) -> None: ... +class lengthinfo: ... class objlayer(layer): obj: Context @@ -360,13 +386,11 @@ class objlayer(layer): ) -> None: ... def __context_getattr__(self, path: str) -> Any: ... -class lazy(builtinatom): - always_lazy: bool - fn: Callable[[], atom] - def __init__(self, fn: Callable[[], atom], always_lazy: bool = ...) -> None: ... - def __set_byteorder__(self, byteorder: Endian) -> lazy: ... +class octetstring(builtinatom[bytes, bytes]): + length: _LengthT + def __init__(self, length: _LengthT) -> None: ... -class patom(atom): +class patom(atom[_IT, _OT]): def __init__(self) -> None: ... def __floordiv__(self, other): ... def __getitem__(self, index): ... @@ -375,3 +399,162 @@ class patom(atom): def __rmatmul__(self, *args, **kwargs): ... def __rrshift__(self, other): ... def __rshift__(self, other): ... + +class pstring(builtinatom[str, str]): + atom: atom[str, str] + encoding: str + errors: str + def __init__( + self, atom: atom[str, str], encoding: str = ..., errors: str = ... + ) -> None: ... + def __set_byteorder__(self, byteorder: Endian) -> pstring: ... + +class repeated(builtinatom[_IT, _OT]): + atom: atom[_IT, _OT] + length: _LengthT + def __init__(self, atom: atom[_IT, _OT], length: _LengthT) -> None: ... + def __set_byteorder__(self, byteorder: Endian) -> repeated[_IT, _OT]: ... + +class seqlayer(layer): + index: int + length: int + sequence: Collection + def __init__(self, *args, **kwargs) -> None: ... + +class string(builtinatom[str, str]): + encoding: str + errors: str + length: _LengthT + def __init__(self, length: _LengthT, encoding: str, errors: str = ...) -> None: ... + +class switch(builtinatom): + atom: atom + cases: Dict[Any, atom] + def __init__(self, *args, **kwargs) -> None: ... + def get_next(self, *args, **kwargs): ... + def __set_byteorder__(self, endian: Endian) -> None: ... + +class unaryexpr: + expr: int + value: Any | _CContextLambda + def __init__(self, *args, **kwargs) -> None: ... + def __add__(self, other): ... + def __and__(self, other): ... + def __call__(self, *args, **kwargs): ... + def __floordiv__(self, other): ... + def __hash__(self) -> int: ... + def __invert__(self): ... + def __lshift__(self, other): ... + def __matmul__(self, *args, **kwargs): ... + def __mod__(self, other): ... + def __mul__(self, other): ... + def __neg__(self): ... + def __or__(self, other): ... + def __pos__(self): ... + def __pow__(self, other): ... + def __radd__(self, other): ... + def __rand__(self, other): ... + def __rfloordiv__(self, other): ... + def __rlshift__(self, other): ... + def __rmatmul__(self, *args, **kwargs): ... + def __rmod__(self, other): ... + def __rmul__(self, other): ... + def __ror__(self, other): ... + def __rpow__(self, other): ... + def __rrshift__(self, other): ... + def __rshift__(self, other): ... + def __rsub__(self, other): ... + def __rtruediv__(self, other): ... + def __rxor__(self, other): ... + def __sub__(self, other): ... + def __truediv__(self, other): ... + def __xor__(self, other): ... + +def typeof(obj: atom): ... +def pack_into(__obj: _IT, __struct: atom[_IT, _OT], __io: IO, **globals) -> None: ... +def pack(__obj: _IT, __struct: atom[_IT, _OT], **globals) -> bytes: ... +def sizeof(obj: atom[_IT, _OT], globals: Optional[dict | Context] = ...) -> int: ... +def unpack(__io: IO | bytes, __struct: atom[_IT, _OT], **globals) -> _OT: ... + +__all__ = [ + "Arch", + "Bool", + "Char", + "Context", + "ContextPath", + "DefaultOptionType", + "Endian", + "Float", + "Int", + "InvalidDefaultType", + "Option", + "Padding", + "State", + "Struct", + "VarInt", + "atoffset", + "atom", + "binaryexpr", + "builtinatom", + "catom", + "computed", + "condition", + "const", + "cstring", + "enumeration", + "fieldinfo", + "layer", + "lazy", + "lengthinfo", + "objlayer", + "octetstring", + "patom", + "pstring", + "repeated", + "seqlayer", + "string", + "switch", + "unaryexpr", + "typeof", + "pack_into", + "pack", + "sizeof", + "unpack", + "BIG_ENDIAN", + "DefaultOption", + "FIELD_OPTIONS", + "F_DYNAMIC", + "F_SEQUENTIAL", + "HOST_ARCH", + "InvalidDefault", + "LITTLE_ENDIAN", + "NATIVE_ENDIAN", + "STRUCT_OPTIONS", + "S_DISCARD_CONST", + "S_DISCARD_UNNAMED", + "S_EVAL_ANNOTATIONS", + "S_REPLACE_TYPES", + "S_SLOTS", + "S_UNION", + "TYPE_MAP", + "boolean", + "char", + "f16", + "f32", + "f64", + "i128", + "i16", + "i24", + "i32", + "i64", + "i8", + "lsbvarint", + "padding", + "u128", + "u16", + "u24", + "u32", + "u64", + "u8", + "varint", +] diff --git a/src/caterpillar/__init__.py b/src/caterpillar/__init__.py index 546e6d8b..4b3eddee 100644 --- a/src/caterpillar/__init__.py +++ b/src/caterpillar/__init__.py @@ -14,12 +14,12 @@ # along with this program. If not, see . import warnings -__version__ = "2.4.5" +__version__ = "2.5.0" __release__ = None __author__ = "MatrixEditor" -def native_support(): +def native_support() -> bool: """Return True if native support is available.""" try: # pylint: disable-next=import-outside-toplevel @@ -31,4 +31,6 @@ def native_support(): # Explicitly report deprecation warnings -warnings.filterwarnings("default", module="caterpillar") \ No newline at end of file +warnings.filterwarnings("default", module="caterpillar") + +__all__ = ["__version__", "__author__", "native_support"] diff --git a/src/caterpillar/_common.py b/src/caterpillar/_common.py old mode 100644 new mode 100755 index dafb1ae4..d8ff8a16 --- a/src/caterpillar/_common.py +++ b/src/caterpillar/_common.py @@ -14,9 +14,9 @@ # along with this program. If not, see . import itertools -from typing import Collection, List, Any +from typing import Collection -from caterpillar.abc import _ContextLike, _StreamType, _PrefixedType +from caterpillar.abc import _PrefixedType from caterpillar.context import ( Context, CTX_PATH, @@ -31,7 +31,7 @@ class WithoutContextVar: - def __init__(self, context: _ContextLike, name, value) -> None: + def __init__(self, context, name, value) -> None: self.context = context self.old_value = context[name] self.value = value @@ -48,7 +48,7 @@ def __exit__(self, exc_type, exc_value, traceback) -> None: self.context[CTX_FIELD] = self.field -def unpack_seq(context: _ContextLike, unpack_one) -> List[Any]: +def unpack_seq(context, unpack_one) -> Collection: """Generic function to unpack sequenced elements. :param stream: the input stream @@ -68,6 +68,7 @@ def unpack_seq(context: _ContextLike, unpack_one) -> List[Any]: # the new context. The '_pos' attribute will be adjusted automatically. values = [] # always list (maybe add factory) seq_context = Context( + _root=context._root, _parent=context, _io=stream, _length=length, @@ -113,7 +114,7 @@ def unpack_seq(context: _ContextLike, unpack_one) -> List[Any]: return values -def pack_seq(seq: Collection[Any], context: _ContextLike, pack_one) -> None: +def pack_seq(seq, context, pack_one) -> None: """Generic function to pack sequenced elements. :param seq: the iterable of elements @@ -142,6 +143,7 @@ def pack_seq(seq: Collection[Any], context: _ContextLike, pack_one) -> None: # Special elements '_index' and '_length' can be referenced within # the new context. The '_pos' attribute will be adjusted automatically. seq_context = Context( + _root=context._root, _parent=context, _io=stream, _length=count, @@ -164,7 +166,7 @@ def pack_seq(seq: Collection[Any], context: _ContextLike, pack_one) -> None: raise StructException(str(exc), seq_context) from exc -def iseof(stream: _StreamType) -> bool: +def iseof(stream) -> bool: """ Check if the stream is at the end of the file. diff --git a/src/caterpillar/_common.pyi b/src/caterpillar/_common.pyi new file mode 100755 index 00000000..2d6e5df1 --- /dev/null +++ b/src/caterpillar/_common.pyi @@ -0,0 +1,44 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +import types + +from typing import Any, Callable, Collection +from caterpillar.abc import _ContextLike, _OT, _IT, _StreamType +from caterpillar.fields._base import Field + +class WithoutContextVar: + context: _ContextLike + old_value: Any + value: Any + name: str + field: Field + def __init__(self, context: _ContextLike, name: str, value: Any) -> None: ... + def __enter__(self) -> None: ... + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: types.TracebackType | None, + ) -> None: ... + +def unpack_seq( + context: _ContextLike, unpack_one: Callable[[_ContextLike], _OT] +) -> Collection[_OT]: ... +def pack_seq( + seq: Collection[_IT], + context: _ContextLike, + pack_one: Callable[[_IT, _ContextLike], None], +) -> None: ... +def iseof(stream: _StreamType) -> bool: ... diff --git a/src/caterpillar/abc.py b/src/caterpillar/abc.py old mode 100644 new mode 100755 index 00bedbbb..9fbc0b90 --- a/src/caterpillar/abc.py +++ b/src/caterpillar/abc.py @@ -15,7 +15,7 @@ from abc import abstractmethod from io import IOBase from typing import Protocol, runtime_checkable -from typing import Dict, Any, Optional, Union, Callable +from typing import Any, Optional, Union, Callable #: Type alias for IOBase to indicate a stream type _StreamType = IOBase @@ -23,7 +23,6 @@ _GreedyType = type(...) _PrefixedType = slice -_ContextPathStr = str @runtime_checkable @@ -34,25 +33,20 @@ class _ContextLike(Protocol): It allows direct attribute access and modification. """ - @abstractmethod def __context_getattr__(self, path: str) -> Any: pass # TODO - @abstractmethod def __context_setattr__(self, path: str, value: Any) -> None: pass @property - @abstractmethod def _root(self) -> Optional["_ContextLike"]: pass - @abstractmethod def __getitem__(self, key: str) -> Any: pass - @abstractmethod def __setitem__(self, key: str, value: Any) -> None: pass @@ -60,21 +54,22 @@ def __setitem__(self, key: str, value: Any) -> None: @runtime_checkable class _ContextLambda(Protocol): """ - An abstract base class for a lambda function that takes a _ContextLike object as an argument. + An abstract base class for a lambda function that takes a _ContextLike + object as an argument. """ - @abstractmethod def __call__(self, context: _ContextLike) -> Any: pass @runtime_checkable -class _Action(Protocol): - @abstractmethod +class _SupportsActionUnpack(Protocol): def __action_pack__(self, context: _ContextLike) -> None: pass - @abstractmethod + +@runtime_checkable +class _SupportsActionPack(Protocol): def __action_unpack__(self, context: _ContextLike) -> None: pass @@ -85,7 +80,6 @@ class _SupportsPack(Protocol): An abstract base class for objects that support packing data into a binary stream. """ - @abstractmethod def __pack__(self, obj: Any, context: _ContextLike) -> None: pass @@ -96,7 +90,6 @@ class _SupportsUnpack(Protocol): An abstract base class for objects that support unpacking data from a binary stream. """ - @abstractmethod def __unpack__(self, context: _ContextLike) -> Any: pass @@ -107,9 +100,8 @@ class _SupportsSize(Protocol): An abstract base class for objects that support determining the size of packed data. """ - @abstractmethod def __size__(self, context: _ContextLike) -> int: - pass + return 0 @runtime_checkable @@ -118,18 +110,18 @@ class _StructLike(Protocol): An abstract base class for struct-like objects that can be packed, unpacked, and have a size. """ - @abstractmethod def __size__(self, context: _ContextLike) -> int: - pass + return 0 - @abstractmethod def __unpack__(self, context: _ContextLike) -> Any: pass - @abstractmethod def __pack__(self, obj: Any, context: _ContextLike) -> None: pass + +@runtime_checkable +class _SupportsType(Protocol): def __type__(self) -> Optional[Union[type, str]]: pass @@ -142,19 +134,8 @@ class _ContainsStruct(Protocol): __struct__: _StructLike -class _EnumLike(Protocol): - """ - An abstract base class for enum-like objects with a value, name, and mappings. - """ - - value: Any - name: str - _member_map_: Dict[str, "_EnumLike"] - _value2member_map_: Dict[Any, "_EnumLike"] - - @runtime_checkable -class _Switch(Protocol): +class _SwitchLike(Protocol): """ An abstract base class for a switch-like object that generates a _StructLike based on a value and context. @@ -165,42 +146,9 @@ def __call__(self, value: Any, context: _ContextLike, **kwds) -> _StructLike: pass -# TODO: place this somewhere else -STRUCT_FIELD = "__struct__" - - -def hasstruct(obj: Any) -> bool: - """ - Check if the given object has a structure attribute. - - :param obj: The object to check. - :return: True if the object has a structure attribute, else False. - """ - cls_dict = getattr(obj.__class__ if not isinstance(obj, type) else obj, "__dict__") - return STRUCT_FIELD in cls_dict - - -def getstruct(obj: Any, /, __default: Any = None) -> _StructLike | None: - """ - Get the structure attribute of the given object. - - :param obj: The object to get the structure attribute from. - :return: The structure attribute of the object. - """ - obj = obj.__class__ if not isinstance(obj, type) else obj - cls_dict = getattr(obj, "__dict__", None) - if cls_dict is None: - return getattr(obj, "__struct__", None) - - return cls_dict.get(STRUCT_FIELD, __default) - +class _SupportsBits(Protocol): + def __bits__(self) -> int: ... -def typeof(struct: Union[_StructLike, _ContainsStruct]) -> object: - if hasstruct(struct): - struct = getstruct(struct) - __type__ = getattr(struct, "__type__", None) - if not __type__: - return Any - # this function must return a type - return __type__() or Any +class _ContainsBits(Protocol): + __bits__: int diff --git a/src/caterpillar/abc.pyi b/src/caterpillar/abc.pyi new file mode 100755 index 00000000..e1af0461 --- /dev/null +++ b/src/caterpillar/abc.pyi @@ -0,0 +1,99 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from io import IOBase +from types import EllipsisType +from typing import Any, Callable, Optional, Protocol, TypeVar, Union, runtime_checkable + +_IT = TypeVar("_IT") +_IT_co = TypeVar("_IT_co") +_IT_contra = TypeVar("_IT_contra", contravariant=True) +_OT = TypeVar("_OT") +_OT_co = TypeVar("_OT_co", covariant=True) + +_StreamType = IOBase +_StreamFactory = Callable[[], _StreamType] + +_GreedyType = EllipsisType +_PrefixedType = slice # [_StructLike[int, int], NoneType, NoneType] + +_LengthT = Union[int, _PrefixedType, _GreedyType, _ContextLambda] + +@runtime_checkable +class _ContextLike(Protocol): + @property + def _root(self) -> Optional[_ContextLike]: ... + def __context_getattr__(self, path: str) -> Any: ... + def __context_setattr__(self, path: str, value: Any) -> None: ... + def __getitem__(self, key, /) -> Any: ... + def __setitem__(self, key, value: Any, /) -> None: ... + +_ContextLambdaReturnT_co = TypeVar( + "_ContextLambdaReturnT_co", covariant=True, default=Any +) + +@runtime_checkable +class _ContextLambda(Protocol[_ContextLambdaReturnT_co]): + def __call__(self, context: _ContextLike) -> _ContextLambdaReturnT_co: ... + +@runtime_checkable +class _StructLike(Protocol[_IT_contra, _OT_co]): + def __size__(self, context: _ContextLike) -> int: ... + def __unpack__(self, context: _ContextLike) -> _OT_co: ... + def __pack__(self, obj: _IT_contra, context: _ContextLike) -> None: ... + def __type__(self) -> Optional[Union[type, str]]: ... + +_StructT = Union[_ContainsStruct[_IT, _OT], _StructLike[_IT, _OT], _ContextLambda] + +@runtime_checkable +class _ContainsStruct(Protocol[_IT_contra, _OT]): + __struct__: _StructLike[_IT_contra, _OT] + +@runtime_checkable +class _SupportsActionUnpack(Protocol): + def __action_pack__(self, context: _ContextLike) -> None: + pass + +@runtime_checkable +class _SupportsActionPack(Protocol): + def __action_unpack__(self, context: _ContextLike) -> None: + pass + +_ActionLike = Union[_SupportsActionPack, _SupportsActionUnpack] + +@runtime_checkable +class _SupportsPack(Protocol[_IT_contra]): + def __pack__(self, obj: _IT_contra, context: _ContextLike) -> None: ... + +@runtime_checkable +class _SupportsSize(Protocol): + def __size__(self, context: _ContextLike) -> int: ... + +@runtime_checkable +class _SupportsUnpack(Protocol[_OT_co]): + def __unpack__(self, context: _ContextLike) -> _OT_co: ... + +_SwitchLambda = Callable[[Any, _ContextLike], _StructLike[_IT, _OT]] +_SwitchLike = Union[ + dict[Any, Union[_ContainsStruct[_IT, _OT], _StructLike[_IT, _OT]]], + _SwitchLambda[_IT, _OT], +] + +@runtime_checkable +class _SupportsBits(Protocol): + def __bits__(self) -> int: ... + +@runtime_checkable +class _ContainsBits(Protocol): + __bits__: int diff --git a/src/caterpillar/byteorder.py b/src/caterpillar/byteorder.py old mode 100644 new mode 100755 index 9c44030d..04982adf --- a/src/caterpillar/byteorder.py +++ b/src/caterpillar/byteorder.py @@ -17,8 +17,7 @@ from dataclasses import dataclass from enum import Enum -# constant to identify the byteorder of an object -BYTEORDER_FIELD = "__byteorder__" +from caterpillar.shared import ATTR_BYTEORDER @dataclass(frozen=True) @@ -56,7 +55,7 @@ def apply(self, other): :param other: The object to which the byte order information should be applied. """ - setattr(other, BYTEORDER_FIELD, self) + setattr(other, ATTR_BYTEORDER, self) def __add__(self, other): """ @@ -89,14 +88,14 @@ def __or__(self, other): ) -def byteorder(obj, default: ByteOrder = None) -> ByteOrder: +def byteorder(obj, default=None) -> ByteOrder: """ Get the byte order of an object, defaulting to SysNative if not explicitly set. :param obj: The object to retrieve the byte order from. :return: The byte order of the object. """ - return getattr(obj, BYTEORDER_FIELD, default or SysNative) + return getattr(obj, ATTR_BYTEORDER, default or SysNative) @dataclass(frozen=True) diff --git a/src/caterpillar/byteorder.pyi b/src/caterpillar/byteorder.pyi new file mode 100755 index 00000000..e4babc47 --- /dev/null +++ b/src/caterpillar/byteorder.pyi @@ -0,0 +1,74 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from enum import Enum +from typing import Optional + +class ByteOrder: + name: str + ch: str + + class Alignment(Enum): + NONE = 0 + NATIVE = 1 + + alignment: Alignment + + class Size(Enum): + STANDARD = 0 + NATIVE = 1 + + size: Size + + def apply(self, other: ByteOrder) -> None: ... + def __add__(self, other: ByteOrder): ... + def __or__(self, other: ByteOrder): ... + def __init__( + self, + name: str, + ch: str, + alignment: Optional[Alignment] = ..., + size: Optional[Size] = ..., + ) -> None: ... + +Native: ByteOrder = ... +BigEndian: ByteOrder = ... +LittleEndian: ByteOrder = ... +NetEndian: ByteOrder = ... +SysNative: ByteOrder = ... + +def byteorder(obj, default: Optional[ByteOrder] = None) -> ByteOrder: ... + +class Arch: + name: str + ptr_size: int + + def __init__(self, name:str, ptr_size: int) -> None: ... + +system_arch: Arch = ... +x86: Arch = ... +x86_64: Arch = ... +ARM: Arch = ... +ARM64: Arch = ... +AARCH64: Arch = ... +PowerPC: Arch = ... +PowerPC64: Arch = ... +MIPS: Arch = ... +MIPS64: Arch = ... +SPARC: Arch = ... +SPARC64: Arch = ... +RISC_V64: Arch = ... +RISC_V: Arch = ... +AMD: Arch = ... +AMD64: Arch = ... \ No newline at end of file diff --git a/src/caterpillar/c.pyi b/src/caterpillar/c.pyi new file mode 100755 index 00000000..a4fe9785 --- /dev/null +++ b/src/caterpillar/c.pyi @@ -0,0 +1,28 @@ +from typing import Callable, Iterable, Type, TypeVar, overload +from caterpillar._C import * # noqa +from caterpillar._C import __all__ as _c_all + +_T = TypeVar("_T") + +@overload +def struct( + cls: None = None, + /, + options: Iterable[Option] = ..., + endian: Endian = ..., + arch: Arch = ..., + field_options: Iterable[Option] = ..., +) -> Callable[[Type[_T]], Type[_T]]: ... +@overload +def struct( + cls: Type[_T], + /, + options: Iterable[Option] = ..., + endian: Endian = ..., + arch: Arch = ..., + field_options: Iterable[Option] = ..., +) -> Type[_T]: ... + + +# pyright: reportUnsupportedDunderAll=false +__all__ = _c_all + ["struct"] diff --git a/src/caterpillar/context.py b/src/caterpillar/context.py old mode 100644 new mode 100755 index 4bc08466..fe8fd1c6 --- a/src/caterpillar/context.py +++ b/src/caterpillar/context.py @@ -18,12 +18,11 @@ import sys import warnings -from typing import Callable, Any, Union, Self +from typing import Callable, Any, Self from types import FrameType from dataclasses import dataclass -from caterpillar.abc import _ContextLambda, _ContextLike from caterpillar.exception import StructException from caterpillar.registry import to_struct @@ -38,6 +37,7 @@ CTX_PATH = "_path" CTX_SEQ = "_is_seq" CTX_ARCH = "_arch" +CTX_ROOT = "_root" class Context(dict): @@ -92,15 +92,8 @@ def __context_setattr__(self, path: str, value: Any) -> None: setattr(obj, nodes[1], value) @property - def _root(self) -> _ContextLike: - current = self - while CTX_PARENT in current: - # dict-like access is much faster - parent = current[CTX_PARENT] - if parent is None: - break - current = parent - return current + def _root(self): + return self.get("_root", self) class ExprMixin: @@ -239,12 +232,7 @@ class Format: __slots__ = "func", "annotations", "namelist", "depth" - def __init__(self, condition: Union[_ContextLambda, bool], depth=2): - if (sys.version_info.major, sys.version_info.minor) >= (3, 14): - warnings.warn( - "Python3.14 breaks support for Contitional fields. Conditional " - "statements must be defined manually until a fix has been released." - ) + def __init__(self, condition, depth=2): self.func = condition self.annotations = None self.namelist = None @@ -257,6 +245,11 @@ def getframe(self, num: int, msg=None) -> FrameType: raise StructException(msg) from exc def __enter__(self) -> Self: + if (sys.version_info.major, sys.version_info.minor) >= (3, 14): + warnings.warn( + "Python3.14 breaks support for Contitional fields. Conditional " + "statements must be defined manually until a fix has been released." + ) frame = self.getframe(self.depth, "Could not enter condition context!") # keep track of all annotations try: @@ -316,8 +309,8 @@ class BinaryExpression(ExprMixin): """ operand: Callable[[Any, Any], Any] - left: Union[Any, _ContextLambda] - right: Union[Any, _ContextLambda] + left: Any + right: Any def __call__(self, context: Context, **kwds): lhs = self.left(context, **kwds) if callable(self.left) else self.left @@ -349,9 +342,9 @@ class UnaryExpression: name: str operand: Callable[[Any], Any] - value: Union[Any, _ContextLambda] + value: Any - def __call__(self, context: Context, **kwds): + def __call__(self, context, **kwds): value = self.value(context, **kwds) if callable(self.value) else self.value return self.operand(value) @@ -373,7 +366,7 @@ class ContextPath(ExprMixin): Represents a lambda function for retrieving a value from a Context based on a specified path. """ - def __init__(self, path: str = None) -> None: + def __init__(self, path=None) -> None: """ Initializes a ContextPath instance with an optional path. @@ -384,7 +377,7 @@ def __init__(self, path: str = None) -> None: self.call_kwargs = None self.getitem_args = None - def __call__(self, context: _ContextLike = None, **kwds): + def __call__(self, context=None, **kwds): """ Calls the lambda function to retrieve a value from a Context. @@ -405,7 +398,7 @@ def __getitem__(self, key) -> Self: return self def __type__(self) -> type: - return Any + return object def __getattribute__(self, key: str) -> ContextPath: """ @@ -461,7 +454,7 @@ class ContextLength(ExprMixin): def __init__(self, path: ContextPath) -> None: self.path = path - def __call__(self, context: Context = None, **kwds): + def __call__(self, context=None, **kwds): """ Calls the lambda function to retrieve a value from a Context. diff --git a/src/caterpillar/context.pyi b/src/caterpillar/context.pyi new file mode 100755 index 00000000..d2741056 --- /dev/null +++ b/src/caterpillar/context.pyi @@ -0,0 +1,133 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from types import FrameType +from typing import ( + Any, + Callable, + Dict, + List, + Optional, + Self, + Type, + Union, + dataclass_transform, +) +from caterpillar.abc import _ContextLike, _ContextLambda + +CTX_PARENT: str = ... +CTX_OBJECT: str = ... +CTX_OFFSETS: str = ... +CTX_STREAM: str = ... +CTX_FIELD: str = ... +CTX_VALUE: str = ... +CTX_POS: str = ... +CTX_INDEX: str = ... +CTX_PATH: str = ... +CTX_SEQ: str = ... +CTX_ARCH: str = ... +CTX_ROOT: str = ... + +class Context(dict, _ContextLike): + def __setattr__(self, key: str, value: Any) -> None: ... + def __getattribute__(self, key: str): ... + def __context_getattr__(self, path: str): ... + def __context_setattr__(self, path: str, value: Any) -> None: ... + +class ExprMixin: + def __add__(self, other: Any) -> BinaryExpression: ... + def __sub__(self, other: Any) -> BinaryExpression: ... + def __mul__(self, other: Any) -> BinaryExpression: ... + def __floordiv__(self, other: Any) -> BinaryExpression: ... + def __truediv__(self, other: Any) -> BinaryExpression: ... + def __mod__(self, other: Any) -> BinaryExpression: ... + def __pow__(self, other: Any) -> BinaryExpression: ... + def __xor__(self, other: Any) -> BinaryExpression: ... + def __and__(self, other: Any) -> BinaryExpression: ... + def __or__(self, other: Any) -> BinaryExpression: ... + def __rshift__(self, other: Any) -> BinaryExpression: ... + def __lshift__(self, other: Any) -> BinaryExpression: ... + __div__ = __truediv__ + def __radd__(self, other: Any) -> BinaryExpression: ... + def __rsub__(self, other: Any) -> BinaryExpression: ... + def __rmul__(self, other: Any) -> BinaryExpression: ... + def __rfloordiv__(self, other: Any) -> BinaryExpression: ... + def __rtruediv__(self, other: Any) -> BinaryExpression: ... + def __rmod__(self, other: Any) -> BinaryExpression: ... + def __rpow__(self, other: Any) -> BinaryExpression: ... + def __rxor__(self, other: Any) -> BinaryExpression: ... + def __rand__(self, other: Any) -> BinaryExpression: ... + def __ror__(self, other: Any) -> BinaryExpression: ... + def __rrshift__(self, other: Any) -> BinaryExpression: ... + def __rlshift__(self, other: Any) -> BinaryExpression: ... + def __neg__(self) -> UnaryExpression: ... + def __pos__(self) -> UnaryExpression: ... + def __invert__(self) -> UnaryExpression: ... + def __contains__(self, other: Any) -> BinaryExpression: ... + def __gt__(self, other: Any) -> BinaryExpression: ... + def __ge__(self, other: Any) -> BinaryExpression: ... + def __lt__(self, other: Any) -> BinaryExpression: ... + def __le__(self, other: Any) -> BinaryExpression: ... + def __eq__(self, other: Any) -> BinaryExpression: ... + def __ne__(self, other: Any) -> BinaryExpression: ... + +class ConditionContext: + func: Union[_ContextLambda[bool], bool] + annotations: dict + namelist: List[str] + depth: int + def __init__(self, condition: _ContextLambda[bool] | bool, depth: int = 2) -> None: ... + def getframe(self, num: int, msg: str | None = None) -> FrameType: ... + def __enter__(self) -> Self: ... + def __exit__(self, *_) -> None: ... + + +@dataclass_transform() +class BinaryExpression(ExprMixin, _ContextLambda): + operand: Callable[[Any, Any], Any] + left: Any | _ContextLambda + right: Any | _ContextLambda + def __call__(self, context: _ContextLike) -> Any: ... + def __enter__(self) -> Self: ... + def __exit__(self, *_) -> None: ... + +@dataclass_transform() +class UnaryExpression(_ContextLambda): + name: str + operand: Callable[[Any], Any] + value: Any | _ContextLambda + def __call__(self, context: _ContextLike): ... + def __enter__(self): ... + def __exit__(self, *_) -> None: ... + +class ContextPath(ExprMixin, _ContextLambda): + path: str + call_kwargs: Dict[str, Any] + getitem_args: List[Any] + def __init__(self, path: Optional[str] = None) -> None: ... + def __call__(self, context: Optional[_ContextLike] = None, **kwds): ... + def __getitem__(self, key) -> Self: ... + def __type__(self) -> Type[Any]: ... + def __getattribute__(self, key: str) -> ContextPath: ... + @property + def parent(self) -> ContextPath: ... + +class ContextLength(ExprMixin, _ContextLambda): + path: str + def __init__(self, path: ContextPath) -> None: ... + def __call__(self, context: Optional[_ContextLike] = None, **kwds) -> Any: ... + +this: ContextPath +ctx: ContextPath +parent: ContextPath \ No newline at end of file diff --git a/src/caterpillar/exception.py b/src/caterpillar/exception.py index 7aac8c19..c0b8bf52 100644 --- a/src/caterpillar/exception.py +++ b/src/caterpillar/exception.py @@ -13,13 +13,11 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from caterpillar.abc import _ContextLike - class StructException(Exception): """Base class for all struct-related exceptions""" - def __init__(self, message: str, context: _ContextLike | None = None) -> None: + def __init__(self, message: str, context=None) -> None: super().__init__(message) self.context = context if context: diff --git a/src/caterpillar/exception.pyi b/src/caterpillar/exception.pyi new file mode 100755 index 00000000..8dac6a79 --- /dev/null +++ b/src/caterpillar/exception.pyi @@ -0,0 +1,28 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from caterpillar.abc import _ContextLike + +class StructException(Exception): + context: _ContextLike + def __init__(self, message: str, context: _ContextLike | None = None) -> None: ... + +class DynamicSizeError(StructException): ... +class OptionError(StructException): ... +class ValidationError(StructException): ... +class UnsupportedOperation(StructException): ... +class InvalidValueError(StructException): ... +class StreamError(StructException): ... +class DelegationError(StructException): ... +class Stop(StructException): ... diff --git a/src/caterpillar/fields/__init__.py b/src/caterpillar/fields/__init__.py index 887b5204..87236985 100644 --- a/src/caterpillar/fields/__init__.py +++ b/src/caterpillar/fields/__init__.py @@ -13,7 +13,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . from ._base import Field, INVALID_DEFAULT, DEFAULT_OPTION, singleton -from ._mixin import FieldMixin, FieldStruct, Chain, Operator +from ._mixin import FieldMixin, FieldStruct, Chain, Operator, get_args, get_kwargs from .common import ( PyStructFormattedField, Transformer, @@ -54,6 +54,7 @@ Aligned, align, Lazy, + ENUM_STRICT, ) from .varint import VarInt, VARINT_LSB, vint from .compression import ( @@ -63,9 +64,21 @@ LZMACompressed, LZOCompressed, ) -from .crypto import Encrypted, Xor, Or, And -from .net import IPv4Address, IPv6Address, MAC, MACAddress -from .pointer import uintptr, intptr, offintptr, offuintptr, Pointer +from .crypto import Encrypted, Xor, Or, And, KeyCipher +from .net import MAC, MACAddress, IPv4Address, IPv6Address +from .pointer import ( + uintptr, + intptr, + offintptr, + offuintptr, + Pointer, + pointer, + intptr_fn, + PTR_STRICT, + relative_pointer, + RelativePointer, + uintptr_fn, +) from .conditional import ConditionalChain, If, Else, ElseIf from .hook import IOHook from .digest import ( @@ -110,4 +123,141 @@ Crc32_Field, Adler_Algo, Adler_Field, + CTX_DIGEST, + CTX_DIGEST_ALGO, + CTX_DIGEST_HOOK, + CTX_DIGEST_OBJ, + HMACAlgorithm, ) + +__all__ = [ + "Digest", + "Algorithm", + "Md5", + "Sha1", + "Sha2_256", + "Sha2_224", + "Sha2_384", + "Sha2_512", + "Sha3_224", + "Sha3_256", + "Sha3_384", + "Sha3_512", + "Crc32", + "Adler", + "HMAC", + "DigestField", + "DigestFieldAction", + "Md5_Algo", + "Md5_Field", + "Sha1_Algo", + "Sha1_Field", + "Sha2_256_Algo", + "Sha2_256_Field", + "Sha2_224_Algo", + "Sha2_224_Field", + "Sha2_384_Algo", + "Sha2_384_Field", + "Sha2_512_Algo", + "Sha2_512_Field", + "Sha3_224_Algo", + "Sha3_224_Field", + "Sha3_256_Algo", + "Sha3_256_Field", + "Sha3_384_Algo", + "Sha3_384_Field", + "Sha3_512_Algo", + "Sha3_512_Field", + "Crc32_Algo", + "Crc32_Field", + "Adler_Algo", + "Adler_Field", + "CTX_DIGEST", + "CTX_DIGEST_ALGO", + "CTX_DIGEST_HOOK", + "CTX_DIGEST_OBJ", + "HMACAlgorithm", + "uintptr", + "intptr", + "offintptr", + "offuintptr", + "Pointer", + "pointer", + "intptr_fn", + "PTR_STRICT", + "relative_pointer", + "RelativePointer", + "uintptr_fn", + "Compressed", + "ZLibCompressed", + "Bz2Compressed", + "LZMACompressed", + "LZOCompressed", + "PyStructFormattedField", + "Transformer", + "Const", + "ConstBytes", + "ConstString", + "Enum", + "String", + "Bytes", + "Memory", + "Computed", + "Pass", + "CString", + "Prefixed", + "Int", + "UInt", + "padding", + "char", + "boolean", + "int8", + "uint8", + "int16", + "uint16", + "int24", + "uint24", + "int32", + "uint32", + "int64", + "uint64", + "ssize_t", + "size_t", + "float16", + "float32", + "float64", + "double", + "void_ptr", + "Uuid", + "Aligned", + "align", + "Lazy", + "ENUM_STRICT", + "Field", + "INVALID_DEFAULT", + "DEFAULT_OPTION", + "singleton", + "FieldMixin", + "FieldStruct", + "Chain", + "Operator", + "get_args", + "get_kwargs", + "VarInt", + "VARINT_LSB", + "vint", + "Encrypted", + "Xor", + "Or", + "And", + "KeyCipher", + "MAC", + "MACAddress", + "IPv4Address", + "IPv6Address", + "ConditionalChain", + "If", + "Else", + "ElseIf", + "IOHook", +] diff --git a/src/caterpillar/fields/_base.py b/src/caterpillar/fields/_base.py old mode 100644 new mode 100755 index 6398516f..31ebd409 --- a/src/caterpillar/fields/_base.py +++ b/src/caterpillar/fields/_base.py @@ -12,21 +12,14 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from typing import Self, Union, Set, Any, Dict, Optional, List +from typing import Union, Any, List from io import BytesIO from caterpillar.abc import ( _StructLike, - _ContextLambda, - _Switch, - _StreamType, - _ContextLike, _GreedyType, _PrefixedType, - hasstruct, - getstruct, - typeof, ) -from caterpillar.byteorder import ByteOrder, SysNative, Arch, system_arch +from caterpillar.byteorder import ByteOrder, SysNative, system_arch from caterpillar.exception import ( DynamicSizeError, StructException, @@ -41,10 +34,9 @@ F_SEQUENTIAL, Flag, ) -from caterpillar.context import CTX_OFFSETS, CTX_STREAM -from caterpillar.context import CTX_FIELD -from caterpillar.context import CTX_VALUE, CTX_SEQ +from caterpillar.context import CTX_OFFSETS, CTX_STREAM, CTX_FIELD, CTX_VALUE, CTX_SEQ from caterpillar import registry +from caterpillar.shared import hasstruct, getstruct, typeof def singleton(cls): @@ -57,80 +49,79 @@ def singleton(cls): DEFAULT_OPTION = object() -# @dataclass(init=False) class Field: """Represents a field in a data structure.""" - struct: Union[_StructLike, _ContextLambda] - """ - Stores a reference to the actual parsing struct that will be used to parse or - build our data. This attribute is never null. - """ - - order: ByteOrder - """ - An automatically inferred or explicitly specified byte order. Note that this - attribute may have no impact on the underlying struct. The default byte order - is ``SysNative``. - """ - - offset: Union[_ContextLambda, int] - """ - Using the ``@`` operator an offset can be assigned to a field. If set, the - stream will be reset and set to the original position. - - The minus one indicates that no offset has been associated with this field. - """ - - flags: Dict[int, Flag] - """ - Additional options that can be enabled using the logical OR operator ``|``. - - Note that there are default options that will be set automatically: - - * ``keep_position``: - Persists the streams position after parsing data using the underlying - struct. In relation to ``offset``, this option will reset the stream to - its original position if deactivated. - * ``dynamic``: - Specifies that this field does not store a constant size. - * ``sequential``: - An automatic flag that indicates this field stores a sequential struct. - """ - - amount: Union[_ContextLambda, int, _GreedyType, _PrefixedType] - """ - A constant or dynamic value to represent the amount of structs. Zero indicates - there are no sequence types associated with this field. - """ - - options: Union[_Switch, Dict[Any, _StructLike], None] - """ - An extra attribute that stores additional options that can be translates as a - switch statement. - """ - - condition: Union[_ContextLambda, bool] - """ - Given optional execution this attribute should be used to return a boolean value - that decides whether the value of this field should be set. Using ``//`` the - condition can be set during class declaration. - """ - - arch: Arch - """ - The field's architecture (inferred or explicitly specified). - """ - - default: Optional[Any] - """ - The configured default value. - """ - - bits: Union[_ContextLambda, int, None] - """ - The configured bits. - """ + # struct + # """ + # Stores a reference to the actual parsing struct that will be used to parse or + # build our data. This attribute is never null. + # """ + + # order: ByteOrder + # """ + # An automatically inferred or explicitly specified byte order. Note that this + # attribute may have no impact on the underlying struct. The default byte order + # is ``SysNative``. + # """ + + # offset + # """ + # Using the ``@`` operator an offset can be assigned to a field. If set, the + # stream will be reset and set to the original position. + + # The minus one indicates that no offset has been associated with this field. + # """ + + # flags: Dict[int, Flag] + # """ + # Additional options that can be enabled using the logical OR operator ``|``. + + # Note that there are default options that will be set automatically: + + # * ``keep_position``: + # Persists the streams position after parsing data using the underlying + # struct. In relation to ``offset``, this option will reset the stream to + # its original position if deactivated. + # * ``dynamic``: + # Specifies that this field does not store a constant size. + # * ``sequential``: + # An automatic flag that indicates this field stores a sequential struct. + # """ + + # amount: Union[_ContextLambda, int, _GreedyType, _PrefixedType] + # """ + # A constant or dynamic value to represent the amount of structs. Zero indicates + # there are no sequence types associated with this field. + # """ + + # options + # """ + # An extra attribute that stores additional options that can be translates as a + # switch statement. + # """ + + # condition + # """ + # Given optional execution this attribute should be used to return a boolean value + # that decides whether the value of this field should be set. Using ``//`` the + # condition can be set during class declaration. + # """ + + # arch + # """ + # The field's architecture (inferred or explicitly specified). + # """ + + # default + # """ + # The configured default value. + # """ + + # bits + # """ + # The configured bits. + # """ __slots__ = ( "struct", @@ -148,18 +139,17 @@ class Field: def __init__( self, - struct: Union[_StructLike, _ContextLambda], - order: ByteOrder | None = None, - offset: Union[_ContextLambda, int] = -1, - flags: Set[Flag] = None, - amount: Union[_ContextLambda, int, _PrefixedType] = 0, - options: Union[_Switch, Dict[Any, _StructLike], None] = None, - condition: Union[_ContextLambda, bool] = True, - arch: Arch = None, - default: Optional[Any] = INVALID_DEFAULT, - bits: Union[_ContextLambda, int, None] = None, + struct, + order=None, + offset=-1, + flags=None, + amount=0, + options=None, + condition=True, + arch=None, + default=INVALID_DEFAULT, + bits=None, ) -> None: - # NOTE: we use a custom init method to automatically set flags self.struct = struct self.order = order or SysNative self.flags = {hash(x): x for x in flags or set([F_KEEP_POSITION])} @@ -178,9 +168,7 @@ def __init__( # that None is still usable as default self.default = default - def _verify_context_value( - self, value: Union[_ContextLambda, Any], expected: type - ) -> None: + def _verify_context_value(self, value, expected) -> None: # As the offset value or amount may be dynamic, we have to candidate # types. There should be an error if none applies. if not isinstance(value, expected) and not callable(value): @@ -188,18 +176,18 @@ def _verify_context_value( f"Expected a valid value or context lambda, got {type(value)}" ) - def __or__(self, flag: Flag) -> Self: # add flags + def __or__(self, flag: Flag): # add flags if not isinstance(flag, Flag): raise TypeError(f"Expected a flag, got {type(flag)}") self.flags[hash(flag)] = flag return self - def __xor__(self, flag: Flag) -> Self: # remove flags: + def __xor__(self, flag: Flag): # remove flags: self.flags.pop(hash(flag), None) return self - def __matmul__(self, offset: Union[_ContextLambda, int]) -> Self: + def __matmul__(self, offset): self._verify_context_value(offset, int) self.offset = offset # This operation automatically removes the "keep_position" @@ -208,7 +196,7 @@ def __matmul__(self, offset: Union[_ContextLambda, int]) -> Self: self.flags.pop(F_KEEP_POSITION._hash_, None) return self - def __getitem__(self, dim: Union[_ContextLambda, int, _GreedyType]) -> Self: + def __getitem__(self, dim): self._verify_context_value(dim, (_GreedyType, int, _PrefixedType)) self.amount = dim if self.amount != 0: @@ -216,28 +204,28 @@ def __getitem__(self, dim: Union[_ContextLambda, int, _GreedyType]) -> Self: self.flags[F_SEQUENTIAL._hash_] = F_SEQUENTIAL return self - def __rshift__(self, switch: Union[_Switch, dict]) -> Self: + def __rshift__(self, switch): if not isinstance(switch, dict) and not callable(switch): raise TypeError(f"Expected a valid switch context, got {type(switch)}") self.options = switch return self - def __floordiv__(self, condition: Union[_ContextLambda, bool]) -> Self: + def __floordiv__(self, condition): self._verify_context_value(condition, bool) self.condition = condition return self - def __rsub__(self, bits: Union[_ContextLambda, int]) -> Self: + def __rsub__(self, bits): self._verify_context_value(bits, int) self.bits = bits return self - def __set_byteorder__(self, order: ByteOrder) -> Self: + def __set_byteorder__(self, order: ByteOrder): self.order = order return self - def __type__(self) -> type: + def __type__(self): return self.get_type() __ixor__ = __xor__ @@ -256,7 +244,7 @@ def is_seq(self) -> bool: # pylint: disable-next=protected-access return F_SEQUENTIAL._hash_ in self.flags - def is_enabled(self, context: _ContextLike) -> bool: + def is_enabled(self, context) -> bool: """Evaluates the condition of this field. :param context: the context on which to operate @@ -281,7 +269,7 @@ def has_flag(self, flag: Flag) -> bool: # pylint: disable-next=protected-access return flag._hash_ in self.flags or flag in GLOBAL_FIELD_FLAGS - def length(self, context: _ContextLike) -> Union[int, _GreedyType, _PrefixedType]: + def length(self, context): """Calculates the sequence length of this field. :param context: the context on which to operate @@ -298,11 +286,11 @@ def length(self, context: _ContextLike) -> Union[int, _GreedyType, _PrefixedType except Exception as exc: raise DynamicSizeError("Dynamic sized field!", context) from exc - def get_struct(self, value: Any, context: _ContextLike) -> _StructLike: + def get_struct(self, value, context): """Returns the struct from stored options. :param value: the unpacked or packed value - :type value: Any + :type value :param context: the current context :type context: _ContextLike :return: the struct that packs or unpacks the data @@ -327,11 +315,11 @@ def get_struct(self, value: Any, context: _ContextLike) -> _StructLike: return getstruct(struct) return struct - def get_offset(self, context: _ContextLike) -> int: + def get_offset(self, context) -> int: """Returns the offset position of this field""" return self.offset(context) if callable(self.offset) else self.offset - def get_type(self) -> type: + def get_type(self): """Returns the annotation type for this field :return: the annotation type @@ -348,24 +336,21 @@ def get_type(self) -> type: types = [typeof(s) for s in self.options.values()] return Union[*types, Any] - def get_name(self) -> Optional[str]: + def get_name(self): return getattr(self, "__name__", None) # IO related stuff - def __unpack__(self, context: _ContextLike) -> Optional[Any]: + def __unpack__(self, context): """Reads packed data from the given stream. This method returns nothing if this field is disabled and applies switch if additional options are configured. - :param stream: the data stream - :type stream: _StreamType :param context: the current context :type context: _ContextLike :return: the parsed data - :rtype: Optional[Any] """ - stream: _StreamType = context[CTX_STREAM] + stream = context[CTX_STREAM] if self.condition is not True and not self.is_enabled(context): # Disabled fields or context lambdas won't pack any data return @@ -413,7 +398,7 @@ def __unpack__(self, context: _ContextLike) -> Optional[Any]: return value - def __pack__(self, obj: Any, context: _ContextLike) -> None: + def __pack__(self, obj, context) -> None: """Writes the given object to the provided stream. There are several options associated with this function. First, disabled @@ -425,16 +410,14 @@ def __pack__(self, obj: Any, context: _ContextLike) -> None: flag ``KEEP_POSITION`` is not found. :param obj: the value to write - :type obj: Any - :param stream: the output stream - :type stream: _StreamType + :type obj :param context: the current context with a qualified path :type context: _ContextLike :raises TypeError: if the value is not iterable but this field is marked to be sequential """ # TODO: revisit code - stream: _StreamType = context[CTX_STREAM] + stream = context[CTX_STREAM] if self.condition is not True and not self.is_enabled(context): # Disabled fields or context lambdas won't pack any data return @@ -481,7 +464,7 @@ def __pack__(self, obj: Any, context: _ContextLike) -> None: context._root[CTX_OFFSETS][offset] = stream.getbuffer() context[CTX_STREAM] = base_stream - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: """Calculates the size of this field. There are several situations to bear in mind when executing this function: @@ -548,7 +531,7 @@ def __repr__(self) -> str: # --- private type converter --- @registry.TypeConverter(_StructLike) -def _type_converter(annotation: _StructLike, kwargs: dict) -> Field: +def _type_converter(annotation, kwargs): # REVISIT: more options ? arch = kwargs.pop("arch", None) order = kwargs.pop("order", None) @@ -559,11 +542,11 @@ def _type_converter(annotation: _StructLike, kwargs: dict) -> Field: class _CallableTypeConverter(registry.TypeConverter): - def matches(self, annotation: Any) -> bool: + def matches(self, annotation) -> bool: # must be a callable but not a type return callable(annotation) and not isinstance(annotation, type) - def convert(self, annotation: Any, kwargs: dict) -> _StructLike: + def convert(self, annotation, kwargs): arch = kwargs.pop("arch", None) order = kwargs.pop("order", None) # callables are treates as context lambdas diff --git a/src/caterpillar/fields/_base.pyi b/src/caterpillar/fields/_base.pyi new file mode 100755 index 00000000..55f37245 --- /dev/null +++ b/src/caterpillar/fields/_base.pyi @@ -0,0 +1,101 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from caterpillar import registry +from caterpillar.abc import ( + _ContextLambda, + _ContextLike, + _GreedyType, + _PrefixedType, + _StructLike, + _SwitchLike, + _IT, + _OT, + _LengthT, + _StructT, +) +from caterpillar.byteorder import ( + Arch, + ByteOrder, +) +from caterpillar.options import ( + Flag, +) +from typing import Any, Optional, Self, Type, TypeVar + +_T = TypeVar("_T") + +def singleton(cls: Type[_T]) -> _T: ... + +INVALID_DEFAULT: object = ... +DEFAULT_OPTION: object = ... + +class Field(_StructLike[_IT, _OT]): + struct: _StructT[_IT, _OT] + order: ByteOrder + offset: _ContextLambda | int + flags: dict[int, Flag] + amount: _LengthT + options: Optional[_SwitchLike[_IT, _OT]] + condition: _ContextLambda | bool + arch: Arch + default: _OT | None + bits: _ContextLambda | int | None + def __init__( + self, + struct: _StructT[_IT, _OT], + order: ByteOrder | None = None, + offset: _ContextLambda | int = -1, + flags: Optional[set[Flag]] = None, + amount: _ContextLambda | int | _PrefixedType = 0, + options: _SwitchLike | dict[Any, _StructLike] | None = None, + condition: _ContextLambda | bool = True, + arch: Optional[Arch] = None, + default: _OT | None = ..., + bits: _ContextLambda | int | None = None, + ) -> None: ... + def __or__(self, flag: Flag) -> Self: ... + def __xor__(self, flag: Flag) -> Self: ... + def __matmul__(self, offset: _ContextLambda | int) -> Self: ... + def __getitem__(self, dim: _LengthT) -> Self: ... + def __rshift__(self, switch: _SwitchLike[_IT, _OT]) -> Self: ... + def __floordiv__(self, condition: _ContextLambda | bool) -> Self: ... + def __rsub__(self, bits: _ContextLambda | int) -> Self: ... + def __set_byteorder__(self, order: ByteOrder) -> Self: ... + def __type__(self) -> type: ... + def __unpack__(self, context: _ContextLike) -> _OT: ... + def __pack__(self, obj: _IT, context: _ContextLike) -> None: ... + def __size__(self, context: _ContextLike) -> int: ... + __ixor__ = __xor__ + __ior__ = __or__ + __ifloordiv__ = __floordiv__ + __irshift__ = __rshift__ + __imatmul__ = __matmul__ + __isub__ = __rsub__ + def _verify_context_value(self, value: Any, expected: type) -> None: ... + def is_seq(self) -> bool: ... + def is_enabled(self, context: _ContextLike) -> bool: ... + def has_condition(self) -> bool: ... + def has_flag(self, flag: Flag) -> bool: ... + def length(self, context: _ContextLike) -> int | _GreedyType | _PrefixedType: ... + def get_struct( + self, value: Any, context: _ContextLike + ) -> _StructLike[_IT, _OT]: ... + def get_offset(self, context: _ContextLike) -> int: ... + def get_type(self) -> type: ... + def get_name(self) -> str | None: ... + +class _CallableTypeConverter(registry.TypeConverter): + def matches(self, annotation: Any) -> bool: ... + def convert(self, annotation: Any, kwargs: dict) -> Field: ... diff --git a/src/caterpillar/fields/_mixin.py b/src/caterpillar/fields/_mixin.py old mode 100644 new mode 100755 index 016d77dd..679e667b --- a/src/caterpillar/fields/_mixin.py +++ b/src/caterpillar/fields/_mixin.py @@ -13,21 +13,14 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . from io import BytesIO -from types import EllipsisType -from typing import Any, Collection, List, Union, Iterable, Callable +from typing import Iterable from functools import partial -from caterpillar.abc import ( - _ContextLike, - _StructLike, - _ContextLambda, - _Switch, - getstruct, -) from caterpillar.byteorder import ByteOrder, byteorder from caterpillar.options import Flag from caterpillar.context import CTX_SEQ, CTX_STREAM from caterpillar._common import unpack_seq, pack_seq, WithoutContextVar +from caterpillar.shared import getstruct from ._base import Field @@ -35,39 +28,39 @@ class FieldMixin: """A simple mixin to support operators used to create :class:`Field` instances.""" - def __or__(self, flag: Flag) -> Field: + def __or__(self, flag: Flag): """Creates a field *with* the given flag.""" return Field(self, byteorder(self)) | flag - def __xor__(self, flag: Flag) -> Field: + def __xor__(self, flag: Flag): """Creates a field *without* the given flag.""" return Field(self, byteorder(self)) ^ flag - def __matmul__(self, offset: Union[_ContextLambda, int]) -> Field: + def __matmul__(self, offset): """Creates a field that should start at the given offset.""" return Field(self, byteorder(self)) @ offset - def __getitem__(self, dim: Union[_ContextLambda, int, EllipsisType]) -> Field: + def __getitem__(self, dim): """Returns a sequenced field.""" return Field(self, byteorder(self))[dim] - def __rshift__(self, switch: Union[_Switch, dict]) -> Field: + def __rshift__(self, switch): """Inserts switch options into the new field""" return Field(self, byteorder(self)) >> switch - def __floordiv__(self, condition: Union[_ContextLambda, bool]) -> Field: + def __floordiv__(self, condition): """Returns a field with the given condition""" return Field(self, byteorder(self)) // condition - def __set_byteorder__(self, order: ByteOrder) -> Field: + def __set_byteorder__(self, order: ByteOrder): """Returns a field with the given byteorder""" return Field(self, order=order) - def __rsub__(self, bits: Union[_ContextLambda, int]) -> Field: + def __rsub__(self, bits): """Returns a field with the given bit count""" return Field(self, byteorder(self), bits=bits) - def __and__(self, other: _StructLike) -> "Chain": + def __and__(self, other): """Returns a chain with the next element added at the end""" if isinstance(other, Chain): return other & self @@ -93,7 +86,7 @@ class FieldStruct(FieldMixin): "__bits__": "TBD", } - def pack_single(self, obj: Any, context: _ContextLike) -> None: + def pack_single(self, obj, context) -> None: """ Abstract method to pack a single element. @@ -105,7 +98,7 @@ def pack_single(self, obj: Any, context: _ContextLike) -> None: """ raise NotImplementedError - def unpack_single(self, context: _ContextLike) -> Any: + def unpack_single(self, context): """ Abstract method to unpack a single element. @@ -116,7 +109,7 @@ def unpack_single(self, context: _ContextLike) -> Any: """ raise NotImplementedError - def pack_seq(self, seq: Collection, context: _ContextLike) -> None: + def pack_seq(self, seq, context) -> None: """ Pack a sequence of elements using the provided context. @@ -127,7 +120,7 @@ def pack_seq(self, seq: Collection, context: _ContextLike) -> None: """ pack_seq(seq, context, self.pack_single) - def unpack_seq(self, context: _ContextLike) -> List[Any]: + def unpack_seq(self, context): """ Unpack a sequence of elements using the provided context. @@ -137,7 +130,7 @@ def unpack_seq(self, context: _ContextLike) -> List[Any]: """ return unpack_seq(context, self.unpack_single) - def __pack__(self, obj: Any, context: _ContextLike) -> None: + def __pack__(self, obj, context) -> None: """ Pack data based on whether the field is sequential or not. @@ -148,7 +141,7 @@ def __pack__(self, obj: Any, context: _ContextLike) -> None: """ (self.pack_single if not context[CTX_SEQ] else self.pack_seq)(obj, context) - def __unpack__(self, context: _ContextLike) -> Any: + def __unpack__(self, context): """ Unpack data based on whether the field is sequential or not. @@ -186,15 +179,15 @@ class Chain(FieldStruct): __slots__ = ("_elements",) - def __init__(self, initial: _StructLike, *structs: _StructLike) -> None: + def __init__(self, initial, *structs) -> None: # start -> next -> next -> next -> done | unpack # Y # done <- previous <- previous <- start | pack self._elements = [getstruct(initial, initial)] - self._elements += list(map(lambda x: getstruct(x, x), structs)) + self._elements += [x for x in map(lambda x: getstruct(x, x), structs) if x] @property - def head(self) -> _StructLike: + def head(self): """ Get the head of the chain, i.e., the first structure. @@ -204,7 +197,7 @@ def head(self) -> _StructLike: return self._elements[0] @property - def tail(self) -> _StructLike: + def tail(self): """ Get the tail of the chain, i.e., the last structure. @@ -214,7 +207,7 @@ def tail(self) -> _StructLike: return self._elements[-1] - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: """ Calculate the size of the chain in bytes. @@ -235,7 +228,7 @@ def __type__(self) -> type: return self.tail.__type__() - def __and__(self, other: _StructLike) -> "Chain": + def __and__(self, other): """ Concatenate another structure to the end of the chain. @@ -247,7 +240,7 @@ def __and__(self, other: _StructLike) -> "Chain": self._elements.append(getstruct(other, other)) return self - def __rand__(self, other: _StructLike) -> "Chain": + def __rand__(self, other): """ Concatenate another structure to the beginning of the chain. @@ -258,7 +251,7 @@ def __rand__(self, other: _StructLike) -> "Chain": """ return self.__and__(other) - def unpack_single(self, context: _ContextLike) -> memoryview: + def unpack_single(self, context): """ Unpack a single data instance from the chain. @@ -278,7 +271,7 @@ def unpack_single(self, context: _ContextLike) -> memoryview: return data - def pack_single(self, obj: Any, context: _ContextLike) -> None: + def pack_single(self, obj, context) -> None: """ Pack a single data instance into the chain. @@ -311,10 +304,10 @@ class Operator: .. code-block:: python - from caterpillar.fields import uint16, _infix_ + from caterpillar.fields import uint16, Operator from caterpillar.model import struct - M = _infix_(lambda a, b: a[b*2]) + M = Operator(lambda a, b: a[b*2]) @struct class Format: @@ -325,7 +318,7 @@ class Format: .. code-block:: python - @_infix_ + @Operator def M(a, b): return a[b*2] @@ -333,21 +326,21 @@ def M(a, b): :type func: Callable[[Any, Any], _StructLike] """ - def __init__(self, func: Callable[[Any, Any], _StructLike]) -> None: + def __init__(self, func) -> None: self.func = func - def __truediv__(self, arg2) -> _StructLike: + def __truediv__(self, arg2): return self.func(arg2) - def __rtruediv__(self, arg1) -> "_infix_": + def __rtruediv__(self, arg1): return Operator(partial(self.func, arg1)) - def __call__(self, arg1, arg2) -> _StructLike: + def __call__(self, arg1, arg2): return self.func(arg1, arg2) # utility methods -def get_args(args: Any, context: _ContextLike) -> List[Any]: +def get_args(args, context): """ Get arguments for an instance. @@ -365,7 +358,7 @@ def get_args(args: Any, context: _ContextLike) -> List[Any]: return args -def get_kwargs(kwargs: dict, context: _ContextLike) -> dict: +def get_kwargs(kwargs: dict, context) -> dict: """ Process a dictionary of keyword arguments, replacing callable values with their results. diff --git a/src/caterpillar/fields/_mixin.pyi b/src/caterpillar/fields/_mixin.pyi new file mode 100755 index 00000000..a616c445 --- /dev/null +++ b/src/caterpillar/fields/_mixin.pyi @@ -0,0 +1,92 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from ._base import Field +from caterpillar.abc import ( + _ContextLambda, + _ContextLike, + _StructLike, + _SwitchLike, + _LengthT, + _IT, + _OT, +) +from caterpillar.byteorder import ByteOrder +from caterpillar.options import Flag +from typing import ( + Any, + Callable, + Collection, + Dict, + Generic, + List, + Type, + TypeVar, + Union, + overload, +) + +_NextOT = TypeVar("_NextOT") + +class FieldMixin(Generic[_IT, _OT]): + def __or__(self, flag: Flag) -> Field[_IT, _OT]: ... + def __xor__(self, flag: Flag) -> Field[_IT, _OT]: ... + def __matmul__(self, offset: _ContextLambda | int) -> Field[_IT, _OT]: ... + def __getitem__(self, dim: _LengthT) -> Field[Collection[_IT], Collection[_OT]]: ... + def __rshift__(self, switch: _SwitchLike) -> Field[_IT, _OT]: ... + def __floordiv__(self, condition: _ContextLambda | bool) -> Field[_IT, _OT]: ... + def __set_byteorder__(self, order: ByteOrder) -> Field[_IT, _OT]: ... + def __rsub__(self, bits: _ContextLambda | int) -> Field[_IT, _OT]: ... + def __and__(self, other: _StructLike) -> Chain: ... + +class FieldStruct(FieldMixin[_IT, _OT], _StructLike[_IT, _OT]): + def pack_single(self, obj: _IT, context: _ContextLike) -> None: ... + def unpack_single(self, context: _ContextLike) -> _OT: ... + def pack_seq(self, seq: Collection[_IT], context: _ContextLike) -> None: ... + def unpack_seq(self, context: _ContextLike) -> Collection[_OT]: ... + def __pack__(self, obj: _IT, context: _ContextLike) -> None: ... + def __unpack__(self, context: _ContextLike) -> _OT: ... + +class Chain(FieldStruct[_IT, _OT]): + @overload + def __init__( + self, initial: _StructLike[_IT, Any], *structs: _StructLike + ) -> None: ... + @overload + def __init__(self, initial: _StructLike[_IT, _OT], *structs: None) -> None: ... + @property + def head(self) -> _StructLike[_IT, Any]: ... + @property + def tail(self) -> _StructLike[Any, _OT]: ... + def __size__(self, context: _ContextLike) -> int: ... + def __type__(self) -> Type[_OT]: ... + def __and__(self, other: _StructLike[_OT, _NextOT]) -> Chain[_IT, _NextOT]: ... + def __rand__(self, other: _StructLike[_OT, _NextOT]) -> Chain[_IT, _NextOT]: ... + def unpack_single(self, context: _ContextLike) -> _OT: ... + def pack_single(self, obj: _IT, context: _ContextLike) -> None: ... + +class Operator: + func: Callable[[Any, Any], _StructLike] + def __init__(self, func: Callable[[Any, Any], _StructLike]) -> None: ... + def __truediv__(self, arg2) -> _StructLike: ... + def __rtruediv__(self, arg1) -> Operator: ... + def __call__(self, arg1, arg2) -> _StructLike: ... + +def get_args( + args: Union[Any, _ContextLambda, List[Union[Any, _ContextLambda]]], + context: _ContextLike, +) -> list[Any]: ... +def get_kwargs( + kwargs: Dict[Any, Union[Any, _ContextLambda]], context: _ContextLike +) -> dict: ... diff --git a/src/caterpillar/fields/common.py b/src/caterpillar/fields/common.py old mode 100644 new mode 100755 index ee70a13d..160d07b4 --- a/src/caterpillar/fields/common.py +++ b/src/caterpillar/fields/common.py @@ -16,17 +16,14 @@ import warnings from io import BytesIO -from typing import Sequence, Any, Optional, Union, List, Callable -from types import EllipsisType, NoneType +from typing import Any, Union +from types import NoneType from functools import cached_property from enum import Enum as _EnumType from uuid import UUID from caterpillar.abc import ( _StructLike, - _ContextLambda, - getstruct, - _EnumLike, _StreamType, _ContextLike, ) @@ -40,6 +37,7 @@ from caterpillar.byteorder import LittleEndian from caterpillar import registry from caterpillar._common import WithoutContextVar +from caterpillar.shared import getstruct from ._base import Field, INVALID_DEFAULT, singleton from ._mixin import FieldStruct @@ -107,7 +105,7 @@ def __type__(self) -> type: """ return self.ty - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: """ Calculate the size of the field in bytes. @@ -119,7 +117,7 @@ def __size__(self, context: _ContextLike) -> int: """ return self.__bits__ // 8 - def pack_single(self, obj: Any, context: _ContextLike) -> None: + def pack_single(self, obj, context) -> None: """ Pack a single value into the stream using the defined format character. @@ -140,7 +138,7 @@ def pack_single(self, obj: Any, context: _ContextLike) -> None: data = PyStruct.pack(fmt, obj) context[CTX_STREAM].write(data) - def pack_seq(self, seq: Sequence, context: _ContextLike) -> None: + def pack_seq(self, seq, context) -> None: """ Pack a sequence of values into the stream. @@ -154,7 +152,7 @@ def pack_seq(self, seq: Sequence, context: _ContextLike) -> None: else: super().pack_seq(seq, context) - def unpack_single(self, context: _ContextLike) -> Any: + def unpack_single(self, context): """ Unpack a single value from the stream. @@ -169,7 +167,7 @@ def unpack_single(self, context: _ContextLike) -> Any: ) return value[0] if value else None - def unpack_seq(self, context: _ContextLike) -> List[Any]: + def unpack_seq(self, context): """ Unpack a sequence of values from the stream. @@ -191,7 +189,7 @@ def unpack_seq(self, context: _ContextLike) -> List[Any]: size = (self.__bits__ // 8) * length return list(PyStruct.unpack(fmt, context[CTX_STREAM].read(size))) - def get_length(self, context: _ContextLike) -> int: + def get_length(self, context) -> int: """ Get the length of the field, which may be dynamically determined based on the context. @@ -239,8 +237,6 @@ def is_padding(self) -> bool: void_ptr = PyStructFormattedField("P", int) -_ConstType = Union[str, bytes, Any] - class Transformer(FieldStruct): """ @@ -249,15 +245,11 @@ class Transformer(FieldStruct): __slots__ = ("struct",) - def __init__(self, struct: _StructLike) -> None: + def __init__(self, struct) -> None: self.struct = struct self.__bits__ = getattr(self.struct, "__bits__", None) - # TODO: document this - def __fmt__(self) -> str: - return self.struct.__fmt__() - - def __type__(self) -> type: + def __type__(self): """ Get the type of the data encoded/decoded by the transformer. @@ -265,7 +257,7 @@ def __type__(self) -> type: """ return self.struct.__type__() - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: """ Get the size of the data encoded/decoded by the transformer. @@ -274,7 +266,7 @@ def __size__(self, context: _ContextLike) -> int: """ return self.struct.__size__(context) - def encode(self, obj: Any, context: _ContextLike) -> Any: + def encode(self, obj, context): """ Encode data using the wrapped _StructLike object. @@ -284,7 +276,7 @@ def encode(self, obj: Any, context: _ContextLike) -> Any: """ return obj - def decode(self, parsed: Any, context: _ContextLike) -> Any: + def decode(self, parsed, context): """ Decode data using the wrapped _StructLike object. @@ -294,7 +286,7 @@ def decode(self, parsed: Any, context: _ContextLike) -> Any: """ return parsed - def pack_single(self, obj: Any, context: _ContextLike) -> None: + def pack_single(self, obj, context) -> None: """ Pack a single value into the stream using encoding. @@ -304,7 +296,7 @@ def pack_single(self, obj: Any, context: _ContextLike) -> None: value = self.encode(obj, context) self.struct.__pack__(value, context) - def unpack_single(self, context: _ContextLike) -> Any: + def unpack_single(self, context): """ Unpack a single value from the stream and decode it. @@ -342,11 +334,11 @@ class Const(Transformer): __slots__ = ("value",) - def __init__(self, value: _ConstType, struct: _StructLike) -> None: + def __init__(self, value, struct) -> None: super().__init__(struct) self.value = value - def encode(self, obj: Any, context: _ContextLike) -> Any: + def encode(self, obj, context): """ Encode data using the constant value. This method will always return the constant value, regardless of the input. Therefore, :code:`None` @@ -364,7 +356,7 @@ def encode(self, obj: Any, context: _ContextLike) -> Any: """ return self.value - def decode(self, parsed: Any, context: _ContextLike) -> Any: + def decode(self, parsed, context): """ Decode data and ensure it matches the constant value. If the parsed value doesn't match, a `ValidationError` is raised. @@ -422,15 +414,15 @@ class Enum(Transformer): def __init__( self, - model: type, - struct: _StructLike, - default: Optional[_EnumLike | Any] = INVALID_DEFAULT, + model, + struct, + default=INVALID_DEFAULT, ) -> None: super().__init__(struct) self.model = model self.default = default - def __type__(self) -> type: + def __type__(self): """ Determine the type for this transformation, which is either the enum type or a union of the enum and struct types, depending on the global field flags. @@ -443,7 +435,7 @@ def __type__(self) -> type: return Union[self.model, self.struct.__type__()] - def encode(self, obj: Any, context: _ContextLike) -> Any: + def encode(self, obj, context): """ Encode an enumeration value into its corresponding encoded representation. @@ -464,7 +456,7 @@ def encode(self, obj: Any, context: _ContextLike) -> Any: return obj.value - def decode(self, parsed: Any, context: _ContextLike) -> Any: + def decode(self, parsed, context): """ Decode an encoded value (typically an integer) back to its corresponding enumeration value. @@ -505,10 +497,10 @@ def decode(self, parsed: Any, context: _ContextLike) -> Any: class _EnumTypeConverter(registry.TypeConverter): - def matches(self, annotation: Any) -> bool: + def matches(self, annotation) -> bool: return isinstance(annotation, type) and issubclass(annotation, _EnumType) - def convert(self, annotation: Any, kwargs: dict) -> _StructLike: + def convert(self, annotation, kwargs: dict): struct_obj = getstruct(annotation) if not struct_obj: raise ValidationError( @@ -560,7 +552,7 @@ class Memory(FieldStruct): def __init__( self, - length: Union[int, _ContextLambda, EllipsisType], + length, ) -> None: self.length = length @@ -572,7 +564,7 @@ def __type__(self) -> type: """ return memoryview - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: """ Calculate the size of the memory field based on the `length` parameter. @@ -584,7 +576,7 @@ def __size__(self, context: _ContextLike) -> int: """ return self.length(context) if callable(self.length) else self.length - def pack_single(self, obj: Union[memoryview, bytes], context: _ContextLike) -> None: + def pack_single(self, obj, context) -> None: """ Pack a single byte object (memoryview or bytes) into the stream. @@ -619,7 +611,7 @@ def pack_single(self, obj: Union[memoryview, bytes], context: _ContextLike) -> N ) context[CTX_STREAM].write(obj) - def unpack_single(self, context: _ContextLike) -> Any: + def unpack_single(self, context): """ Unpack a single byte object (memoryview) from the stream. @@ -654,7 +646,7 @@ def __type__(self) -> type: """ return bytes - def unpack_single(self, context: _ContextLike) -> Any: + def unpack_single(self, context): """ Unpack a single byte sequence (bytes) from the stream. @@ -684,8 +676,8 @@ class String(Memory): def __init__( self, - length: Union[int, _ContextLambda, EllipsisType], - encoding: Optional[str] = None, + length, + encoding=None, ) -> None: super().__init__(length) self.encoding = encoding or "utf-8" @@ -698,11 +690,11 @@ def __type__(self) -> type: """ return str - def pack_single(self, obj: str, context: _ContextLike) -> None: + def pack_single(self, obj: str, context) -> None: """Packs a single string into the stream.""" return super().pack_single(obj.encode(self.encoding), context) - def unpack_single(self, context: _ContextLike) -> Any: + def unpack_single(self, context): """ Unpack a single string from the stream. @@ -749,9 +741,9 @@ class CString(FieldStruct): def __init__( self, - length: Union[int, _ContextLambda, None, EllipsisType] = None, - encoding: Optional[str] = None, - pad: Union[str, int, None] = None, + length=None, + encoding=None, + pad=None, ) -> None: """ Initialize the String field with a fixed length or a length determined by a context lambda. @@ -784,7 +776,7 @@ def __class_getitem__(cls, dim) -> Field: """ return CString(...)[dim] - def __size__(self, context: _ContextLike) -> Any: + def __size__(self, context): """ Returns the size of the `CString` field. @@ -801,7 +793,7 @@ def __type__(self) -> type: """ return str - def pack_single(self, obj: str, context: _ContextLike) -> None: + def pack_single(self, obj: str, context) -> None: """ Pack a single string into the stream with padding. @@ -828,7 +820,7 @@ def pack_single(self, obj: str, context: _ContextLike) -> None: stream.write(encoded) stream.write(self._raw_pad) - def unpack_single(self, context: _ContextLike) -> Any: + def unpack_single(self, context): """ Unpack a single C-style string from the stream. @@ -894,7 +886,7 @@ class ConstString(Const): __slots__ = () - def __init__(self, value: str, encoding: Optional[str] = None) -> None: + def __init__(self, value: str, encoding=None) -> None: if not isinstance(value, str): raise TypeError("value must be a string") @@ -972,11 +964,11 @@ class Computed(FieldStruct): __slots__ = ("value",) - def __init__(self, value: Union[_ConstType, _ContextLambda]) -> None: + def __init__(self, value) -> None: self.value = value self.__bits__ = 0 - def __type__(self) -> type: + def __type__(self): """ Return the type of the computed field. @@ -984,7 +976,7 @@ def __type__(self) -> type: """ return Any if callable(self.value) else type(self.value) - def __pack__(self, obj: Any, context: _ContextLike) -> None: + def __pack__(self, obj, context) -> None: """ No packing is needed for computed fields, as the value is computed dynamically. @@ -996,7 +988,7 @@ def __pack__(self, obj: Any, context: _ContextLike) -> None: """ pass - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: """ Return the size of the computed field. @@ -1007,7 +999,7 @@ def __size__(self, context: _ContextLike) -> int: """ return 0 - def __unpack__(self, context: _ContextLike) -> Any: + def __unpack__(self, context): """ Unpack the computed value based on the context. @@ -1019,7 +1011,7 @@ def __unpack__(self, context: _ContextLike) -> Any: """ return self.value(context) if callable(self.value) else self.value - def pack_single(self, obj: Any, context: _ContextLike) -> None: + def pack_single(self, obj, context) -> None: """ No packing is needed for computed fields. @@ -1029,7 +1021,7 @@ def pack_single(self, obj: Any, context: _ContextLike) -> None: # No need for an implementation pass - def unpack_single(self, context: _ContextLike) -> None: + def unpack_single(self, context) -> None: """ No unpacking is needed for computed fields. @@ -1089,20 +1081,20 @@ def __type__(self) -> type: """ return None.__class__ - def __pack__(self, obj: Any, context: _ContextLike) -> None: + def __pack__(self, obj, context) -> None: pass - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: return 0 - def __unpack__(self, context: _ContextLike) -> Any: + def __unpack__(self, context): return None - def pack_single(self, obj: Any, context: _ContextLike) -> None: + def pack_single(self, obj, context) -> None: # No need for an implementation pass - def unpack_single(self, context: _ContextLike) -> None: + def unpack_single(self, context) -> None: # No need for an implementation pass @@ -1143,9 +1135,9 @@ class Prefixed(FieldStruct): def __init__( self, - prefix: _StructLike, - struct: Optional[_StructLike] = None, - encoding: Optional[str] = None, + prefix, + struct=None, + encoding=None, ): self.prefix = prefix self.struct = struct @@ -1158,7 +1150,7 @@ def __init__( ) self.encoding, self.struct = struct, None - def __type__(self) -> Optional[Union[type, str]]: + def __type__(self): """ Return the type associated with this Prefixed field. @@ -1169,7 +1161,7 @@ def __type__(self) -> Optional[Union[type, str]]: """ return bytes if self.struct is None else self.struct.__type__() - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: """ Prefixed fields do not have a fixed size. @@ -1180,7 +1172,7 @@ def __size__(self, context: _ContextLike) -> int: """ raise DynamicSizeError("Prefixed does not store a size", context) - def pack_single(self, obj: Any, context: _ContextLike) -> None: + def pack_single(self, obj, context) -> None: """ Pack a single object into the stream, with the prefix indicating the size. @@ -1189,7 +1181,6 @@ def pack_single(self, obj: Any, context: _ContextLike) -> None: :param obj: The object to pack (should be a byte sequence). :param context: The current context. """ - # REVISIT: We can only provide a value with __len__ here if self.struct is not None: data = BytesIO() with WithoutContextVar(context, CTX_STREAM, data): @@ -1203,7 +1194,7 @@ def pack_single(self, obj: Any, context: _ContextLike) -> None: self.prefix.__pack__(len(obj), context) context[CTX_STREAM].write(obj) - def unpack_single(self, context: _ContextLike) -> Any: + def unpack_single(self, context): """ Unpack a single object from the stream, using the prefix to determine the size. @@ -1244,13 +1235,20 @@ class Int(FieldStruct): __slots__ = ("signed", "size") - def __init__(self, bits: int, signed: bool = True) -> None: + def __init__(self, bits: int, signed=True) -> None: self.signed = signed self.__bits__ = bits if not isinstance(bits, int): raise ValueError(f"Invalid int size: {bits!r} - expected int") self.size = self.__bits__ // 8 + def __repr__(self) -> str: + name = "int" + if not self.signed: + name = f"u{name}" + + return f"<{name}{self.__bits__}>" + def __type__(self) -> type: """ Return the type associated with this Int field. @@ -1259,7 +1257,7 @@ def __type__(self) -> type: """ return int - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: """ Return the size of the integer in bytes. @@ -1268,7 +1266,7 @@ def __size__(self, context: _ContextLike) -> int: """ return self.size - def pack_single(self, obj: int, context: _ContextLike) -> None: + def pack_single(self, obj: int, context) -> None: """ Pack a single integer value into the stream. @@ -1286,7 +1284,7 @@ def pack_single(self, obj: int, context: _ContextLike) -> None: obj.to_bytes(self.size, byteorder, signed=self.signed) ) - def unpack_single(self, context: _ContextLike) -> int: + def unpack_single(self, context) -> int: """ Unpack a single integer value from the stream. @@ -1360,11 +1358,11 @@ class Aligned(FieldStruct): def __init__( self, - struct: _StructLike, - alignment: Union[int, _ContextLambda], - after: bool = False, - before: bool = False, - filler: Union[int, str, None] = None, + struct, + alignment, + after=False, + before=False, + filler=None, ) -> None: if not before and not after: raise ValueError("Must specify either before or after") @@ -1384,7 +1382,7 @@ def __init__( if not isinstance(self._filler, int): raise ValueError(f"Filler must be a single byte - got {filler!r}") - def __type__(self) -> Optional[Union[type, str]]: + def __type__(self): """ Return the type associated with this aligned field. @@ -1392,7 +1390,7 @@ def __type__(self) -> Optional[Union[type, str]]: """ return self.struct.__type__() - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: """ Calculate the size of the aligned field, accounting for padding based on the alignment. @@ -1408,7 +1406,7 @@ def __size__(self, context: _ContextLike) -> int: struct_size = self.struct.__size__(context) return struct_size + (self.alignment - (struct_size % self.alignment)) - def unpack_alignment(self, context: _ContextLike): + def unpack_alignment(self, context): """ Unpack padding for the alignment, verifying that the correct amount of padding is present. @@ -1431,7 +1429,7 @@ def unpack_alignment(self, context: _ContextLike): f"Expected {size} bytes of padding (value={self._filler!r}), got {data.count(self._filler)}" ) - def unpack_single(self, context: _ContextLike) -> Any: + def unpack_single(self, context): """ Unpack a single aligned field from the stream. @@ -1448,7 +1446,7 @@ def unpack_single(self, context: _ContextLike) -> Any: self.unpack_alignment(context) return obj - def pack_alignment(self, context: _ContextLike): + def pack_alignment(self, context): """ Apply padding for the alignment before or after the structure, depending on the `before` and `after` settings. @@ -1460,7 +1458,7 @@ def pack_alignment(self, context: _ContextLike): size = value - (stream.tell() % value) stream.write(bytes([self._filler] * size)) - def pack_single(self, obj: Any, context: _ContextLike) -> None: + def pack_single(self, obj, context) -> None: """ Pack a single aligned field into the stream, applying padding if necessary. @@ -1474,7 +1472,7 @@ def pack_single(self, obj: Any, context: _ContextLike) -> None: self.pack_alignment(context) -def align(alignment: Union[int, _ContextLambda]) -> _ContextLambda: +def align(alignment): """ Create a context lambda to calculate the alignment padding required at the current stream position. @@ -1499,7 +1497,7 @@ def align(alignment: Union[int, _ContextLambda]) -> _ContextLambda: :return: A context lambda function that returns the number of bytes to align the next structure. """ - def _get_aligned_size(context: _ContextLike) -> Any: + def _get_aligned_size(context: _ContextLike): pos = context[CTX_STREAM].tell() value = alignment(context) if callable(alignment) else alignment return value - (pos % value) @@ -1536,7 +1534,7 @@ class Lazy(FieldStruct): when the field is accessed. """ - def __init__(self, struct: Callable[[], _StructLike]) -> None: + def __init__(self, struct) -> None: if not callable(struct): raise TypeError(f"struct must be a callable - got {struct!r}") @@ -1554,7 +1552,7 @@ def struct(self) -> _StructLike: """ return self.struct_fn() - def __bits__(self) -> str: + def __bits__(self): """ Get the bit representation of the Lazy struct by delegating to the underlying struct. @@ -1563,7 +1561,7 @@ def __bits__(self) -> str: """ return self.struct.__bits__() - def __type__(self) -> type: + def __type__(self): """ Get the type associated with the Lazy struct by delegating to the underlying struct. @@ -1572,7 +1570,7 @@ def __type__(self) -> type: """ return self.struct.__type__() - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: """ Get the size of the Lazy struct by delegating to the underlying struct. @@ -1582,7 +1580,7 @@ def __size__(self, context: _ContextLike) -> int: """ return self.struct.__size__(context) - def pack_single(self, obj: Any, context: _ContextLike) -> None: + def pack_single(self, obj, context) -> None: """ Pack a single value using the Lazy struct by delegating to the underlying struct. @@ -1591,7 +1589,7 @@ def pack_single(self, obj: Any, context: _ContextLike) -> None: """ self.struct.__pack__(obj, context) - def unpack_single(self, context: _ContextLike) -> Any: + def unpack_single(self, context): """ Unpack a single value using the Lazy struct by delegating to the underlying struct. @@ -1633,7 +1631,7 @@ def __type__(self) -> type: """ return UUID - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: """ Get the size of the UUID field. @@ -1652,7 +1650,7 @@ def __bits__(self) -> int: """ return 128 - def __pack__(self, obj: UUID, context: _ContextLike) -> None: + def __pack__(self, obj: UUID, context) -> None: """ Pack a UUID object into the stream. @@ -1665,7 +1663,7 @@ def __pack__(self, obj: UUID, context: _ContextLike) -> None: is_le = context[CTX_FIELD].order is LittleEndian super().__pack__(obj.bytes_le if is_le else obj.bytes, context) - def __unpack__(self, context: _ContextLike) -> UUID: + def __unpack__(self, context) -> UUID: """ Unpack a UUID from the stream. diff --git a/src/caterpillar/fields/common.pyi b/src/caterpillar/fields/common.pyi new file mode 100755 index 00000000..ca209b2e --- /dev/null +++ b/src/caterpillar/fields/common.pyi @@ -0,0 +1,261 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from ._base import Field +from ._mixin import FieldStruct +from _typeshed import SupportsLenAndGetItem + +from caterpillar import registry +from caterpillar.abc import ( + _ContextLambda, + _ContextLike, + _LengthT, + _StructLike, + _IT, + _OT, +) +from caterpillar.options import Flag +from functools import cached_property +from types import EllipsisType, NoneType +from typing import ( + Any, + Callable, + Collection, + Generic, + List, + Optional, + Type, + TypeVar, + Union, +) +from uuid import UUID + +ENUM_STRICT: Flag[NoneType] = ... + +class PyStructFormattedField(FieldStruct[_IT, _IT]): + text: str + ty: Type[_IT] + __bits__: int + def __init__(self, ch: str, type_: Type[_IT]) -> None: ... + def __type__(self) -> type: ... + def __size__(self, context: _ContextLike) -> int: ... + def pack_single(self, obj: _IT, context: _ContextLike) -> None: ... + def pack_seq(self, seq: Collection[_IT], context: _ContextLike) -> None: ... + def unpack_single(self, context: _ContextLike) -> _IT: ... + def unpack_seq(self, context: _ContextLike) -> Collection[_IT]: ... + def get_length(self, context: _ContextLike) -> int: ... + def is_padding(self) -> bool: ... + +padding: PyStructFormattedField[NoneType] = ... +char: PyStructFormattedField[str] = ... +boolean: PyStructFormattedField[bool] = ... +int8: PyStructFormattedField[int] = ... +uint8: PyStructFormattedField[int] = ... +int16: PyStructFormattedField[int] = ... +uint16: PyStructFormattedField[int] = ... +int32: PyStructFormattedField[int] = ... +uint32: PyStructFormattedField[int] = ... +int64: PyStructFormattedField[int] = ... +uint64: PyStructFormattedField[int] = ... +ssize_t: PyStructFormattedField[int] = ... +size_t: PyStructFormattedField[int] = ... +float16: PyStructFormattedField[float] = ... +float32: PyStructFormattedField[float] = ... +float64: PyStructFormattedField[float] = ... +double: PyStructFormattedField[float] = ... +void_ptr: PyStructFormattedField[int] = ... + +_IT_transformed = TypeVar("_IT_transformed") +_OT_transformed = TypeVar("_OT_transformed") + +class Transformer( + Generic[_IT, _IT_transformed, _OT, _OT_transformed], + FieldStruct[_IT, _OT], +): + struct: _StructLike[_IT_transformed, _OT_transformed] + __bits__: Union[Callable[[], int], int] + def __init__( + self, struct: _StructLike[_IT_transformed, _OT_transformed] + ) -> None: ... + def __fmt__(self) -> str: ... + def __type__(self) -> Type[_OT]: ... + def __size__(self, context: _ContextLike) -> int: ... + def encode(self, obj: _IT, context: _ContextLike) -> _IT_transformed: ... + def decode(self, parsed: _OT_transformed, context: _ContextLike) -> _OT: ... + def pack_single(self, obj: _IT, context: _ContextLike) -> None: ... + def unpack_single(self, context: _ContextLike) -> _OT: ... + +class Const(Transformer[_IT, _IT, _IT, _IT]): + value: _IT + def __init__(self, value: _IT, struct: _StructLike[_IT, _IT]) -> None: ... + def encode(self, obj: _IT, context: _ContextLike) -> _IT: ... + def decode(self, parsed: _IT, context: _ContextLike) -> _IT: ... + +_EnumT = TypeVar("_EnumT") + +class Enum(Generic[_EnumT, _IT], Transformer[_EnumT, _IT, Union[_EnumT, _IT], _IT]): + model: Type[_EnumT] + default: _EnumT + def __init__( + self, + model: Type[_EnumT], + struct: _StructLike[_IT, _IT], + default: _EnumT | _IT | None = ..., + ) -> None: ... + def __type__(self) -> Union[Type[_EnumT], Type[_IT]]: ... + def encode(self, obj: _EnumT, context: _ContextLike) -> _IT: ... + def decode(self, parsed: _IT, context: _ContextLike) -> _EnumT: ... + +class _EnumTypeConverter(registry.TypeConverter): + def matches(self, annotation: Any) -> bool: ... + def convert(self, annotation: Any, kwargs: dict) -> _StructLike: ... + +_MemoryIT = TypeVar("_MemoryIT", default=Union[memoryview, bytes]) +_MemoryOT = TypeVar("_MemoryOT", default=memoryview) + +class Memory(Generic[_MemoryIT, _MemoryOT], FieldStruct[_MemoryIT, _MemoryOT]): + length: _LengthT + def __init__(self, length: _LengthT) -> None: ... + def __type__(self) -> Type[_MemoryOT]: ... + def __size__(self, context: _ContextLike) -> int: ... + def pack_single(self, obj: _MemoryIT, context: _ContextLike) -> None: ... + def unpack_single(self, context: _ContextLike) -> _MemoryOT: ... + +class Bytes(Memory[bytes, bytes]): + def __type__(self) -> Type[bytes]: ... + def unpack_single(self, context: _ContextLike) -> bytes: ... + +class String(Memory[str, str]): + encoding: str + def __init__(self, length: _LengthT, encoding: str | None = None) -> None: ... + def __type__(self) -> Type[str]: ... + def pack_single(self, obj: str, context: _ContextLike) -> None: ... + def unpack_single(self, context: _ContextLike) -> str: ... + +class CString(FieldStruct[str, str]): + length: _LengthT + encoding: str + pad: int + def __init__( + self, + length: int | _ContextLambda | None | EllipsisType = ..., + encoding: str | None = ..., + pad: str | int | None = ..., + ) -> None: ... + def __class_getitem__(cls, dim: _LengthT) -> Field[List[str], List[str]]: ... + def __size__(self, context: _ContextLike) -> Any: ... + def __type__(self) -> type: ... + def pack_single(self, obj: str, context: _ContextLike) -> None: ... + def unpack_single(self, context: _ContextLike) -> Any: ... + +class ConstString(Const[str]): + def __init__(self, value: str, encoding: str | None = None) -> None: ... + +class ConstBytes(Const[bytes]): + def __init__(self, value: bytes) -> None: ... + +class Computed(Generic[_IT], FieldStruct[NoneType, _IT]): + value: _IT + __bits__: int + def __init__(self, value: _IT | _ContextLambda[_IT]) -> None: ... + def __type__(self) -> Type[_IT]: ... + def __pack__(self, obj: NoneType, context: _ContextLike) -> None: ... + def __size__(self, context: _ContextLike) -> int: ... + def __unpack__(self, context: _ContextLike) -> _IT: ... + def pack_single(self, obj: NoneType, context: _ContextLike) -> None: ... + def unpack_single(self, context: _ContextLike) -> _IT: ... + +class _Pass(FieldStruct[None, None]): + def __bits__(self) -> int: ... + def __type__(self) -> Type[None]: ... + def __pack__(self, obj: None, context: _ContextLike) -> None: ... + def __size__(self, context: _ContextLike) -> int: ... + def __unpack__(self, context: _ContextLike) -> Any: ... + def pack_single(self, obj: None, context: _ContextLike) -> None: ... + def unpack_single(self, context: _ContextLike) -> None: ... + +Pass: _Pass + +_PrefixIOT = TypeVar("_PrefixIOT", bound=SupportsLenAndGetItem, default=bytes) + +class Prefixed(Generic[_PrefixIOT], FieldStruct[_PrefixIOT, _PrefixIOT]): + prefix: _StructLike[int, int] + struct: _StructLike[_PrefixIOT, _PrefixIOT] + encoding: Optional[str] + def __init__( + self, + prefix: _StructLike[int, int], + struct: _StructLike[_PrefixIOT, _PrefixIOT] | None = None, + encoding: str | None = None, + ) -> None: ... + def __type__(self) -> Type[_PrefixIOT]: ... + def __size__(self, context: _ContextLike) -> int: ... + def pack_single(self, obj: _PrefixIOT, context: _ContextLike) -> None: ... + def unpack_single(self, context: _ContextLike) -> _PrefixIOT: ... + +class Int(FieldStruct[int, int]): + signed: bool + __bits__: int + size: int + def __init__(self, bits: int, signed: bool = True) -> None: ... + def __type__(self) -> Type[int]: ... + def __size__(self, context: _ContextLike) -> int: ... + def pack_single(self, obj: int, context: _ContextLike) -> None: ... + def unpack_single(self, context: _ContextLike) -> int: ... + +class UInt(Int): + def __init__(self, bits: int) -> None: ... + +int24: Int +uint24: UInt + +class Aligned(FieldStruct[_IT, _OT]): + struct: _StructLike[_IT, _OT] + alignment: int | _ContextLambda[int] + def __init__( + self, + struct: _StructLike[_IT, _OT], + alignment: int | _ContextLambda[int], + after: bool = False, + before: bool = False, + filler: int | str | None = None, + ) -> None: ... + def __type__(self) -> Type[_OT]: ... + def __size__(self, context: _ContextLike) -> int: ... + def unpack_alignment(self, context: _ContextLike): ... + def unpack_single(self, context: _ContextLike) -> _OT: ... + def pack_alignment(self, context: _ContextLike): ... + def pack_single(self, obj: _IT, context: _ContextLike) -> None: ... + +def align(alignment: int | _ContextLambda[int]) -> _ContextLambda[int]: ... + +class Lazy(FieldStruct[_IT, _OT]): + struct_fn: Callable[[], _StructLike[_IT, _OT]] + def __init__(self, struct: Callable[[], _StructLike[_IT, _OT]]) -> None: ... + @cached_property + def struct(self) -> _StructLike[_IT, _OT]: ... + def __bits__(self) -> int: ... + def __type__(self) -> Type[_OT]: ... + def __size__(self, context: _ContextLike) -> int: ... + def pack_single(self, obj: _IT, context: _ContextLike) -> None: ... + def unpack_single(self, context: _ContextLike) -> _OT: ... + +class _Uuid(FieldStruct[UUID, UUID]): + def __type__(self) -> Type[UUID]: ... + def __size__(self, context: _ContextLike) -> int: ... + def __bits__(self) -> int: ... + def __pack__(self, obj: UUID, context: _ContextLike) -> None: ... + def __unpack__(self, context: _ContextLike) -> UUID: ... + +Uuid: _Uuid \ No newline at end of file diff --git a/src/caterpillar/fields/compression.py b/src/caterpillar/fields/compression.py old mode 100644 new mode 100755 index a465f115..41bdbbdb --- a/src/caterpillar/fields/compression.py +++ b/src/caterpillar/fields/compression.py @@ -12,18 +12,12 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from abc import abstractmethod -from typing import Protocol, Union, Callable, Optional, runtime_checkable +from typing import Protocol, runtime_checkable -from caterpillar.abc import ( - _ContextLike, - _StructLike, - _ContainsStruct, - getstruct, - hasstruct, -) -from ._mixin import get_kwargs -from .common import Transformer, Bytes +from caterpillar.abc import _StructLike +from caterpillar.shared import getstruct, hasstruct +from caterpillar.fields._mixin import get_kwargs +from caterpillar.fields.common import Transformer, Bytes @runtime_checkable @@ -36,7 +30,6 @@ class _Compressor(Protocol): Any class implementing this protocol must define these methods. """ - @abstractmethod def compress(self, data: bytes, **kwds) -> bytes: """ Compress the provided data. @@ -47,9 +40,8 @@ def compress(self, data: bytes, **kwds) -> bytes: :return: The compressed data. :rtype: bytes """ - pass + return b"" - @abstractmethod def decompress(self, data: bytes, **kwds) -> bytes: """ Decompress the provided data. @@ -60,7 +52,7 @@ def decompress(self, data: bytes, **kwds) -> bytes: :return: The decompressed data. :rtype: bytes """ - pass + return b"" class Compressed(Transformer): @@ -83,10 +75,10 @@ class Compressed(Transformer): def __init__( self, - compressor: _Compressor, - struct: Union[_ContainsStruct, _StructLike], - comp_kwargs: Optional[dict] = None, - decomp_kwargs: Optional[dict] = None, + compressor, + struct, + comp_kwargs=None, + decomp_kwargs=None, ) -> None: if hasstruct(struct): struct = getstruct(struct) @@ -95,7 +87,7 @@ def __init__( self.comp_args = comp_kwargs or {} self.decomp_args = decomp_kwargs or {} - def encode(self, obj: bytes, context: _ContextLike) -> bytes: + def encode(self, obj: bytes, context) -> bytes: """ Compress the input data using the provided compressor. @@ -108,7 +100,7 @@ def encode(self, obj: bytes, context: _ContextLike) -> bytes: """ return self.compressor.compress(obj, **get_kwargs(self.comp_args, context)) - def decode(self, parsed: bytes, context: _ContextLike) -> bytes: + def decode(self, parsed: bytes, context) -> bytes: """ Decompress the input data using the provided compressor. @@ -124,81 +116,77 @@ def decode(self, parsed: bytes, context: _ContextLike) -> bytes: ) -_LengthOrStruct = Union[_ContainsStruct, _StructLike, Callable, int] - - def compressed( - lib: _Compressor, - obj: _LengthOrStruct, - comp_kwargs: Optional[dict] = None, - decomp_kwargs: Optional[dict] = None, + lib, + obj, + comp_kwargs=None, + decomp_kwargs=None, ) -> _StructLike: if callable(obj) or isinstance(obj, int) or obj is ...: obj = Bytes(obj) return Compressed(lib, obj, comp_kwargs, decomp_kwargs) -try: - import zlib +def ZLibCompressed( + obj, + comp_kwargs=None, + decomp_kwargs=None, +): + """ + Create a struct representing zlib compression. + """ + try: + import zlib - def ZLibCompressed( - obj: _LengthOrStruct, - comp_kwargs: Optional[dict] = None, - decomp_kwargs: Optional[dict] = None, - ): - """ - Create a struct representing zlib compression. - """ return compressed(zlib, obj, comp_kwargs, decomp_kwargs) + except ImportError: + raise NotImplementedError("Could not import zlib!") -except ImportError: - ZLibCompressed = None -try: - import bz2 +def Bz2Compressed( + obj, + comp_kwargs=None, + decomp_kwargs=None, +): + """ + Create a struct representing bz2 compression. + """ + try: + import bz2 - def Bz2Compressed( - obj: _LengthOrStruct, - comp_kwargs: Optional[dict] = None, - decomp_kwargs: Optional[dict] = None, - ): - """ - Create a struct representing bz2 compression. - """ return compressed(bz2, obj, comp_kwargs, decomp_kwargs) + except ImportError: + raise NotImplementedError("Could not import bz2!") -except ImportError: - Bz2Compressed = None -try: - import lzma +def LZMACompressed( + obj, + comp_kwargs=None, + decomp_kwargs=None, +): + """ + Create a struct representing lzma compression. + """ + try: + import lzma - def LZMACompressed( - obj: _LengthOrStruct, - comp_kwargs: Optional[dict] = None, - decomp_kwargs: Optional[dict] = None, - ): - """ - Create a struct representing lzma compression. - """ return compressed(lzma, obj, comp_kwargs, decomp_kwargs) + except ImportError: + raise NotImplementedError("Could not import lzma!") -except ImportError: - LZMACompressed = None -try: - # install package manuall with pip install lzallright - import lzallright +def LZOCompressed( + obj, + comp_kwargs=None, + decomp_kwargs=None, +): + """ + Create a struct representing LZO compression. + """ + try: + # install package manuall with pip install lzallright + import lzallright - def LZOCompressed( - obj: _LengthOrStruct, - comp_kwargs: Optional[dict] = None, - decomp_kwargs: Optional[dict] = None, - ): - """ - Create a struct representing LZO compression. - """ return compressed(lzallright.LZOCompressor(), obj, comp_kwargs, decomp_kwargs) - -except ImportError: - LZOCompressed = None + except ImportError: + raise NotImplementedError("Could not import lzallright!") diff --git a/src/caterpillar/fields/compression.pyi b/src/caterpillar/fields/compression.pyi new file mode 100755 index 00000000..e310c7f8 --- /dev/null +++ b/src/caterpillar/fields/compression.pyi @@ -0,0 +1,71 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from .common import Transformer +from caterpillar.abc import ( + _ContainsStruct, + _ContextLike, + _StructLike, + _LengthT, +) +from typing import Protocol, runtime_checkable + +@runtime_checkable +class _Compressor(Protocol): + def compress(self, data: bytes, **kwds) -> bytes: ... + def decompress(self, data: bytes, **kwds) -> bytes: ... + +class Compressed(Transformer[bytes, bytes, bytes, bytes]): + compressor: _Compressor + comp_args: dict + decomp_args: dict + def __init__( + self, + compressor: _Compressor, + struct: _ContainsStruct[bytes, bytes] | _StructLike[bytes, bytes], + comp_kwargs: dict | None = None, + decomp_kwargs: dict | None = None, + ) -> None: ... + def encode(self, obj: bytes, context: _ContextLike) -> bytes: ... + def decode(self, parsed: bytes, context: _ContextLike) -> bytes: ... + +_LengthTorStructT = _LengthT | _ContainsStruct[bytes, bytes] | _StructLike[bytes, bytes] + +def compressed( + lib: _Compressor, + obj: _LengthTorStructT, + comp_kwargs: dict | None = None, + decomp_kwargs: dict | None = None, +) -> _StructLike[bytes, bytes]: ... + +def ZLibCompressed( + obj: _LengthTorStructT, + comp_kwargs: dict | None = None, + decomp_kwargs: dict | None = None, +) -> _StructLike[bytes, bytes]: ... +def Bz2Compressed( + obj: _LengthTorStructT, + comp_kwargs: dict | None = None, + decomp_kwargs: dict | None = None, +) -> _StructLike[bytes, bytes]: ... +def LZMACompressed( + obj: _LengthTorStructT, + comp_kwargs: dict | None = None, + decomp_kwargs: dict | None = None, +) -> _StructLike[bytes, bytes]: ... +def LZOCompressed( + obj: _LengthTorStructT, + comp_kwargs: dict | None = None, + decomp_kwargs: dict | None = None, +) -> _StructLike[bytes, bytes]: ... diff --git a/src/caterpillar/fields/conditional.py b/src/caterpillar/fields/conditional.py old mode 100644 new mode 100755 index 2fc28210..d16b82b0 --- a/src/caterpillar/fields/conditional.py +++ b/src/caterpillar/fields/conditional.py @@ -12,15 +12,11 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . -import sys -import warnings - from typing import Union, Any from typing import Optional -from caterpillar.abc import _ContextLambda, _StructLike -from caterpillar.abc import _ContextLike, typeof from caterpillar.context import ConditionContext from caterpillar.exception import ValidationError +from caterpillar.shared import typeof from ._base import Field @@ -40,17 +36,12 @@ class definition. While this class can't be used in class definitions, __slots__ = "chain", "conditions" - def __init__(self, struct: _StructLike, condition: _ContextLambda) -> None: - if (sys.version_info.major, sys.version_info.minor) >= (3, 14): - warnings.warn( - "Python3.14 breaks support for Contitional fields. Conditional " - "statements must be defined manually until a fix has been released." - ) + def __init__(self, struct, condition) -> None: self.chain = {} self.conditions = [] self.add(struct, condition) - def __type__(self) -> str: + def __type__(self): return Optional[Union[*map(typeof, self.chain.values())]] def __repr__(self) -> str: @@ -66,12 +57,12 @@ def __repr__(self) -> str: return f"" - def add(self, struct: _StructLike, func: _ContextLambda) -> None: + def add(self, struct, func) -> None: idx = len(self.chain) self.chain[idx] = struct self.conditions.append(func) - def get_struct(self, context: _ContextLike) -> Optional[_StructLike]: + def get_struct(self, context): index = 0 while index < len(self.chain): func = self.conditions[index] @@ -79,16 +70,16 @@ def get_struct(self, context: _ContextLike) -> Optional[_StructLike]: return self.chain[index] index += 1 - def __unpack__(self, context: _ContextLike) -> Any: + def __unpack__(self, context) -> Any: struct = self.get_struct(context) return struct.__unpack__(context) if struct else None - def __pack__(self, obj: Any, context: _ContextLike) -> None: + def __pack__(self, obj: Any, context) -> None: struct = self.get_struct(context) if struct: struct.__pack__(obj, context) - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: struct = self.get_struct(context) return struct.__size__(context) if struct else 0 @@ -196,4 +187,4 @@ def __exit__(self, *_): # REVISIT: There is one case where 'ELSE' is not applicable and will cause # a field to be present at all times. This problem exists if we add fields # into an else-branch without a previously defined field. -Else = ElseIf(lambda _: True) +Else = ElseIf(lambda context: True) diff --git a/src/caterpillar/fields/conditional.pyi b/src/caterpillar/fields/conditional.pyi new file mode 100755 index 00000000..b7c66379 --- /dev/null +++ b/src/caterpillar/fields/conditional.pyi @@ -0,0 +1,36 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from caterpillar.abc import _ContextLambda, _ContextLike, _StructLike +from caterpillar.context import ConditionContext +from typing import Any, Dict, List + +class ConditionalChain: + chain: Dict[int, _StructLike] + conditions: List[_ContextLambda[bool]] + def __init__(self, struct: _StructLike, condition: _ContextLambda[int]) -> None: ... + def __type__(self) -> type: ... + def add(self, struct: _StructLike, func: _ContextLambda[bool]) -> None: ... + def get_struct(self, context: _ContextLike) -> _StructLike | None: ... + def __unpack__(self, context: _ContextLike) -> Any: ... + def __pack__(self, obj: Any, context: _ContextLike) -> None: ... + def __size__(self, context: _ContextLike) -> int: ... + +class If(ConditionContext): ... + +class ElseIf(ConditionContext): + def __enter__(self): ... + def __exit__(self, *_) -> None: ... + +Else: ElseIf diff --git a/src/caterpillar/fields/crypto.py b/src/caterpillar/fields/crypto.py old mode 100644 new mode 100755 index 9acb115e..968302d5 --- a/src/caterpillar/fields/crypto.py +++ b/src/caterpillar/fields/crypto.py @@ -12,38 +12,24 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from typing import Union, Any, Type, Optional, Protocol, Iterable, runtime_checkable +from typing import Protocol, runtime_checkable -try: - from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes - from cryptography.hazmat.primitives.padding import PaddingContext -except ImportError: - Cipher = algorithms = modes = PaddingContext = Any - - -from caterpillar.abc import _StructLike, _ContextLike -from caterpillar.abc import _GreedyType, _ContextLambda from caterpillar.exception import UnsupportedOperation from caterpillar.exception import InvalidValueError -from caterpillar.context import CTX_STREAM, Context +from caterpillar.context import CTX_STREAM from .common import Memory, Bytes from ._mixin import get_args, get_kwargs @runtime_checkable class Padding(Protocol): # pylint: disable=missing-class-docstring - def unpadder(self) -> PaddingContext: + def unpadder(self): """Abstract method to get an unpadder for padding.""" - def padder(self) -> PaddingContext: + def padder(self): """Abstract method to get a padder for padding.""" -_ArgType = Union[_ContextLambda, Any] - -KwArgs = Context - - class Encrypted(Memory): """Struct that is able to encrypt/decrypt blocks of memory. @@ -67,16 +53,18 @@ class Encrypted(Memory): # REVISIT: this constructor looks ugly def __init__( self, - length: Union[int, _GreedyType, _ContextLambda], - algorithm: Type["algorithms.CipherAlgorithm"], - mode: Union[Type["modes.Mode"], "modes.Mode"], - padding: Union[Padding, Type[Padding]] = None, - algo_args: Optional[Iterable[_ArgType]] = None, - mode_args: Optional[Iterable[_ArgType]] = None, - padding_args: Optional[Iterable[_ArgType]] = None, - post: Optional[_StructLike] = None, + length, + algorithm, + mode, + padding=None, + algo_args=None, + mode_args=None, + padding_args=None, + post=None, ) -> None: - if Cipher is None: + try: + from cryptography.hazmat.primitives.ciphers import Cipher + except ImportError: raise UnsupportedOperation( ( "To use encryption with this framework, the module 'cryptography' " @@ -94,7 +82,7 @@ def __init__( self._padding_args = padding_args self.post = post - def algorithm(self, context: _ContextLike) -> "algorithms.CipherAlgorithm": + def algorithm(self, context): """ Get the encryption algorithm instance. @@ -103,11 +91,11 @@ def algorithm(self, context: _ContextLike) -> "algorithms.CipherAlgorithm": :return: An instance of the encryption algorithm. :rtype: algorithms.CipherAlgorithm """ - return self.get_instance( - algorithms.CipherAlgorithm, self._algo, self._algo_args, context - ) + from cryptography.hazmat.primitives.ciphers import CipherAlgorithm + + return self.get_instance(CipherAlgorithm, self._algo, self._algo_args, context) - def mode(self, context: _ContextLike) -> "modes.Mode": + def mode(self, context): """ Get the encryption mode instance. @@ -116,9 +104,11 @@ def mode(self, context: _ContextLike) -> "modes.Mode": :return: An instance of the encryption mode. :rtype: modes.Mode """ + from cryptography.hazmat.primitives.ciphers import modes + return self.get_instance(modes.Mode, self._mode, self._mode_args, context) - def padding(self, context: _ContextLike) -> Padding: + def padding(self, context) -> Padding: """ Get the padding scheme instance. @@ -129,9 +119,7 @@ def padding(self, context: _ContextLike) -> Padding: """ return self.get_instance(Padding, self._padding, self._padding_args, context) - def get_instance( - self, type_: type, field: Any, args: Any, context: _ContextLambda - ) -> Any: + def get_instance(self, type_, field, args, context): """ Get an instance of a specified type. @@ -155,7 +143,7 @@ def get_instance( args, kwargs = get_args(args, context), {} return field(*args, **kwargs) - def pack_single(self, obj: Any, context: _ContextLike) -> None: + def pack_single(self, obj, context) -> None: """ Pack a single element. @@ -164,6 +152,8 @@ def pack_single(self, obj: Any, context: _ContextLike) -> None: :param context: The current operation context. :type context: _ContextLike """ + from cryptography.hazmat.primitives.ciphers import Cipher + cipher = Cipher(self.algorithm(context), self.mode(context)) padding = self.padding(context) @@ -175,7 +165,7 @@ def pack_single(self, obj: Any, context: _ContextLike) -> None: encryptor = cipher.encryptor() super().pack_single(encryptor.update(data) + encryptor.finalize(), context) - def unpack_single(self, context: _ContextLike) -> memoryview: + def unpack_single(self, context): """ Unpack a single element. @@ -184,6 +174,7 @@ def unpack_single(self, context: _ContextLike) -> memoryview: :return: The unpacked element as a memoryview. :rtype: memoryview """ + from cryptography.hazmat.primitives.ciphers import Cipher value = super().unpack_single(context) cipher = Cipher(self.algorithm(context), self.mode(context)) @@ -197,29 +188,24 @@ def unpack_single(self, context: _ContextLike) -> memoryview: return memoryview(data) -_KeyType = Union[str, bytes, int, _ContextLambda] - - class KeyCipher(Bytes): - key: Union[str, bytes, int] - """The key that should be applied. + # key: bytes + # """The key that should be applied. - It will be converted automatically to bytes if not given. - """ + # It will be converted automatically to bytes if not given. + # """ - key_length: int - """Internal attribute to keep track of the key's length""" + # key_length: int + # """Internal attribute to keep track of the key's length""" __slots__ = "key", "key_length", "is_lazy" - def __init__( - self, key: _KeyType, length: Union[_ContextLambda, int, None] = None - ) -> None: + def __init__(self, key, length=None) -> None: super().__init__(length or ...) self.key = self.is_lazy = self.key_length = None self.set_key(key) - def set_key(self, key: _KeyType, context: _ContextLike = None) -> None: + def set_key(self, key, context=None) -> None: if callable(key) and context is None: # context lambda indicates the key will be computed at runtime self.key = key @@ -242,7 +228,7 @@ def set_key(self, key: _KeyType, context: _ContextLike = None) -> None: self.key_length = len(self.key) self.is_lazy = False - def process(self, obj: bytes, context: _ContextLike) -> bytes: + def process(self, obj: bytes, context) -> bytes: length = len(obj) data = bytearray(length) key = self.key @@ -255,10 +241,10 @@ def process(self, obj: bytes, context: _ContextLike) -> bytes: def _do_process(self, src: bytes, dest: bytearray): raise NotImplementedError - def pack_single(self, obj: bytes, context: _ContextLike) -> None: + def pack_single(self, obj: bytes, context) -> None: context[CTX_STREAM].write(self.process(obj, context)) - def unpack_single(self, context: _ContextLike) -> bytes: + def unpack_single(self, context) -> bytes: obj: bytes = super().unpack_single(context) return self.process(obj, context) diff --git a/src/caterpillar/fields/crypto.pyi b/src/caterpillar/fields/crypto.pyi new file mode 100755 index 00000000..57f43624 --- /dev/null +++ b/src/caterpillar/fields/crypto.pyi @@ -0,0 +1,75 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from .common import Bytes, Memory +from caterpillar.abc import ( + _ContextLambda, + _ContextLike, + _GreedyType, + _StructLike, + _LengthT, +) +from caterpillar.context import Context +from cryptography.hazmat.primitives.ciphers import modes, CipherAlgorithm +from cryptography.hazmat.primitives.padding import PaddingContext +from typing import Any, Iterable, Protocol, Type, Union, _VT + +class Padding(Protocol): + def unpadder(self) -> PaddingContext: ... + def padder(self) -> PaddingContext: ... + +KwArgs = Context +_ArgType = Union[_ContextLambda, Any] + +class Encrypted(Memory): + post: _StructLike | None + def __init__( + self, + length: int | _GreedyType | _ContextLambda, + algorithm: type[CipherAlgorithm], + mode: Type[modes.Mode] | modes.Mode, + padding: Padding | type[Padding] | None = None, + algo_args: Iterable[_ArgType] | None = None, + mode_args: Iterable[_ArgType] | None = None, + padding_args: Iterable[_ArgType] | None = None, + post: _StructLike | None = None, + ) -> None: ... + def algorithm(self, context: _ContextLike) -> CipherAlgorithm: ... + def mode(self, context: _ContextLike) -> modes.Mode: ... + def padding(self, context: _ContextLike) -> Padding: ... + def get_instance( + self, + type_: Type[_VT], + field: Any | _VT | None, + args: Any, + context: _ContextLambda, + ) -> _VT: ... + def pack_single(self, obj: Any, context: _ContextLike) -> None: ... + def unpack_single(self, context: _ContextLike) -> memoryview: ... + +_KeyType = Union[int, str, bytes, _ContextLambda[Union[int, str, bytes]]] + +class KeyCipher(Bytes): + key: _KeyType + key_length: int + is_lazy: bool + def __init__(self, key: _KeyType, length: _LengthT | None = None) -> None: ... + def set_key(self, key: _KeyType, context: _ContextLike | None = None) -> None: ... + def process(self, obj: bytes, context: _ContextLike) -> bytes: ... + def pack_single(self, obj: bytes, context: _ContextLike) -> None: ... + def unpack_single(self, context: _ContextLike) -> bytes: ... + +class Xor(KeyCipher): ... +class Or(KeyCipher): ... +class And(KeyCipher): ... diff --git a/src/caterpillar/fields/digest.py b/src/caterpillar/fields/digest.py old mode 100644 new mode 100755 index 94f68b55..01226c0b --- a/src/caterpillar/fields/digest.py +++ b/src/caterpillar/fields/digest.py @@ -17,15 +17,10 @@ import warnings import zlib -from typing import Any, Callable, Optional, Self, Type - -from caterpillar.abc import _ContextLike, _StructLike, _ContextLambda, _Action from caterpillar.context import CTX_OBJECT, CTX_STREAM from caterpillar.exception import StructException, ValidationError from caterpillar.shared import Action -from caterpillar.fields.hook import ( - IOHook, -) +from caterpillar.fields.hook import IOHook from ._base import Field from .common import Bytes, uint32 @@ -84,13 +79,7 @@ class Algorithm: __slots__ = ("_create", "_update", "_digest", "name") - def __init__( - self, - create: Callable[[_ContextLike], Any] | None = None, - update: Callable[[Any, bytes, _ContextLike], Any] | None = None, - digest: Callable[[Any, _ContextLike], bytes] | None = None, - name: Optional[str] = None, - ) -> None: + def __init__(self, create=None, update=None, digest=None, name=None) -> None: self._create = create self._update = update self._digest = digest @@ -107,7 +96,7 @@ def __repr__(self) -> str: """ return f"<{self.__class__.__name__} name={self.name or ''!r}>" - def create(self, context: _ContextLike) -> Any: + def create(self, context): """ Create an instance of the algorithm or checksum using the provided context. @@ -124,7 +113,7 @@ def create(self, context: _ContextLike) -> Any: raise NotImplementedError("create() is not implemented for this algorithm") - def update(self, algo_obj: Any, data: bytes, context: _ContextLike) -> Any: + def update(self, algo_obj, data: bytes, context): """ Update the algorithm or checksum with the given data. @@ -145,7 +134,7 @@ def update(self, algo_obj: Any, data: bytes, context: _ContextLike) -> Any: raise NotImplementedError("update() is not implemented for this algorithm") - def digest(self, algo_obj: Any, context: _ContextLike) -> bytes: + def digest(self, algo_obj, context) -> bytes: """ Compute the digest or checksum value from the algorithm instance. @@ -236,14 +225,7 @@ class Digest: :type path: Optional[str] """ - def __init__( - self, - algorithm: Algorithm, - struct: _StructLike, - name: Optional[str] = None, - verify: bool = False, - path: Optional[str] = None, - ) -> None: + def __init__(self, algorithm, struct, name=None, verify=False, path=None) -> None: if (sys.version_info.major, sys.version_info.minor) >= (3, 14): warnings.warn( "Python3.14 breaks support for Digest fields. The hash must be calculated " @@ -265,7 +247,7 @@ def __init__( self._verify = verify self.path = path or f"{CTX_OBJECT}.{self.name}" - def _get_annotations(self, frame: Any) -> dict[str, Any]: + def _get_annotations(self, frame): """ Retrieve the annotations (i.e., field definitions) from the current frame's local variables. @@ -290,7 +272,7 @@ def _get_annotations(self, frame: Any) -> dict[str, Any]: def __repr__(self) -> str: return f"Digest(algo={self.algo!r}, verify={self._verify!r})" - def __enter__(self) -> Self: + def __enter__(self): """ Install the start action for the digest field during struct definition. @@ -349,7 +331,7 @@ def __exit__(self, *_) -> None: if self._verify: annotations[f"{self.name}_verify"] = Action(unpack=self.verfiy) - def begin(self, context: _ContextLike) -> None: + def begin(self, context) -> None: """ Initialize the digest calculation at the beginning of packing/unpacking. @@ -362,7 +344,7 @@ def begin(self, context: _ContextLike) -> None: self._hook.init(context) self._obj = self.algo.create(context) - def end_pack(self, context: _ContextLike) -> None: + def end_pack(self, context) -> None: """ Finalize the digest calculation at the end of packing/unpacking. @@ -375,7 +357,7 @@ def end_pack(self, context: _ContextLike) -> None: context.__context_setattr__(self.path or self.name, self._digest) self._hook.finish(context) - def end_unpack(self, context: _ContextLike) -> None: + def end_unpack(self, context) -> None: """ Finalize the digest calculation at the end of unpacking. @@ -387,7 +369,7 @@ def end_unpack(self, context: _ContextLike) -> None: self._digest = self.algo.digest(self._obj, context) self._hook.finish(context) - def update(self, data: bytes, context: _ContextLike) -> None: + def update(self, data: bytes, context) -> None: """ Update the checksum with new data during packing/unpacking. @@ -400,7 +382,7 @@ def update(self, data: bytes, context: _ContextLike) -> None: """ self._obj = self.algo.update(self._obj, data, context) or self._obj - def verfiy(self, context: _ContextLike) -> None: + def verfiy(self, context) -> None: """ Verify the checksum upon unpacking. @@ -463,7 +445,7 @@ def __init__(self, target: str, algorithm: Algorithm) -> None: self._ctx_hook = f"{CTX_DIGEST_HOOK}__{target}" self._ctx_algo = f"{CTX_DIGEST_ALGO}__{target}" - def update(self, data: bytes, context: _ContextLike) -> None: + def update(self, data: bytes, context) -> None: """ Updates the digest object with new data. @@ -474,7 +456,7 @@ def update(self, data: bytes, context: _ContextLike) -> None: new_obj = self.algo.update(obj, data, context) context[self._ctx_obj] = new_obj or obj - def begin(self, context: _ContextLike) -> None: + def begin(self, context) -> None: """ Initializes the digest algorithm and attaches an IO hook to track data. """ @@ -518,7 +500,7 @@ class Format: :type verify: bool """ - def __init__(self, target: str, struct: _StructLike, verify: bool = False) -> None: + def __init__(self, target: str, struct, verify=False) -> None: self.name = target self.struct = struct self.verify = verify @@ -531,11 +513,11 @@ def __type__(self) -> type: """Defines the Python type returned after unpacking (always bytes).""" return bytes - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: """Returns the size in bytes of the digest field.""" return self.struct.__size__(context) - def __pack__(self, obj: None, context: _ContextLike) -> None: + def __pack__(self, obj: None, context) -> None: """ Called during packing. Computes the digest over all previously packed data, stores it in the context, finalizes the IO hook, and packs the digest itself. @@ -545,7 +527,7 @@ def __pack__(self, obj: None, context: _ContextLike) -> None: context[self._ctx_hook].finish(context) self.struct.__pack__(digest, context) - def __unpack__(self, context: _ContextLike): + def __unpack__(self, context): """ Called during unpacking. Computes the digest over all preceding data, reads the stored digest, optionally verifies it, and returns the unpacked value. @@ -573,7 +555,7 @@ def __unpack__(self, context: _ContextLike): return digest @staticmethod - def begin(target: str, algo: Algorithm) -> _Action: + def begin(target: str, algo): """Factory method to create a DigestFieldAction used at the start of a struct to set up hashing for the named digest field. @@ -588,7 +570,7 @@ def begin(target: str, algo: Algorithm) -> _Action: # --- public algorithms --- -def _hash_digest(algo: Algorithm, struct: _StructLike): +def _hash_digest(algo, struct): """ A utility function to create a `Digest` wrapper for a specific hash algorithm and struct. The wrapper initializes the digest calculation based on the algorithm provided. @@ -602,26 +584,26 @@ def _hash_digest(algo: Algorithm, struct: _StructLike): """ def _wrapper( - name: Optional[str] = None, - verify: bool = False, - path: Optional[str] = None, + name=None, + verify=False, + path=None, ) -> Digest: return Digest(algo, struct, name, verify, path) return _wrapper -def _hash_digest_field(struct: _StructLike): +def _hash_digest_field(struct): def _wrapper( name: str, - verify: bool = False, + verify=False, ) -> DigestField: return DigestField(target=name, struct=struct, verify=verify) return _wrapper -def _hashlib_algo(func) -> Algorithm: +def _hashlib_algo(func): """ Creates an `Algorithm` object from a hash function (e.g., hashlib.sha256). @@ -658,9 +640,8 @@ def _hashlib_algo(func) -> Algorithm: try: from cryptography.hazmat.primitives import hashes - from cryptography.hazmat.primitives import hmac - def _cryptography_hash_algo(cls: Type[hashes.HashAlgorithm]): + def _cryptography_hash_algo(cls): """ Creates an `Algorithm` from a cryptography `hashes.HashAlgorithm` class. @@ -698,62 +679,6 @@ def _cryptography_hash_algo(cls: Type[hashes.HashAlgorithm]): Sha3_512 = _hash_digest(Sha3_512_Algo, Bytes(64)) Md5 = _hash_digest(Md5_Algo, Bytes(16)) - class HMACAlgorithm(Algorithm): - """ - HMAC (Hash-based Message Authentication Code) algorithm implementation. - - This class wraps an HMAC algorithm using a specified hash function and key. - """ - - def __init__( - self, - key: bytes | _ContextLambda, - algorithm: hashes.HashAlgorithm, - ) -> None: - super().__init__(name=f"hmac_{algorithm.name}") - self._key = key - self._algorithm = algorithm - - def create(self, context: _ContextLike) -> Any: - """ - Creates an HMAC object with the provided key and algorithm. - """ - key = self._key(context) if callable(self._key) else self._key - return hmac.HMAC(key, self._algorithm) - - def update(self, algo_obj: Any, data: bytes, context: _ContextLike) -> Any: - """ - Updates the HMAC object with new data. - """ - return algo_obj.update(data) - - def digest(self, algo_obj: Any, context: _ContextLike) -> Any: - """ - Finalizes the HMAC object and returns the computed digest. - """ - return algo_obj.finalize() - - class HMAC(Digest): - """ - HMAC Digest handler, used to create and verify HMACs based on a provided key and algorithm. - """ - - def __init__( - self, - key: bytes | _ContextLambda, - algorithm: hashes.HashAlgorithm, - name: Optional[str] = None, - verify: bool = False, - path: Optional[str] = None, - ) -> None: - super().__init__( - HMACAlgorithm(key, algorithm), - Bytes(algorithm.digest_size), - name, - verify, - path, - ) - except ImportError: Sha1_Algo = _hashlib_algo(hashlib.sha1) Sha2_224_Algo = _hashlib_algo(hashlib.sha224) @@ -777,9 +702,6 @@ def __init__( Sha3_512 = _hash_digest(Sha3_512_Algo, Bytes(64)) Md5 = _hash_digest(Md5_Algo, Bytes(16)) - HMACAlgorithm = None - HMAC = None - Sha1_Field = _hash_digest_field(Bytes(32)) Sha2_224_Field = _hash_digest_field(Bytes(28)) @@ -791,3 +713,63 @@ def __init__( Sha3_384_Field = _hash_digest_field(Bytes(48)) Sha3_512_Field = _hash_digest_field(Bytes(64)) Md5_Field = _hash_digest_field(Bytes(16)) + + +class HMACAlgorithm(Algorithm): + """ + HMAC (Hash-based Message Authentication Code) algorithm implementation. + + This class wraps an HMAC algorithm using a specified hash function and key. + """ + + def __init__( + self, + key, + algorithm, + ) -> None: + super().__init__(name=f"hmac_{algorithm.name}") + self._key = key + self._algorithm = algorithm + + def create(self, context): + """ + Creates an HMAC object with the provided key and algorithm. + """ + from cryptography.hazmat.primitives import hmac + + key = self._key(context) if callable(self._key) else self._key + return hmac.HMAC(key, self._algorithm) + + def update(self, algo_obj, data: bytes, context): + """ + Updates the HMAC object with new data. + """ + return algo_obj.update(data) + + def digest(self, algo_obj, context): + """ + Finalizes the HMAC object and returns the computed digest. + """ + return algo_obj.finalize() + + +class HMAC(Digest): + """ + HMAC Digest handler, used to create and verify HMACs based on a provided key and algorithm. + """ + + def __init__( + self, + key, + algorithm, + name=None, + verify=False, + path=None, + ) -> None: + super().__init__( + HMACAlgorithm(key, algorithm), + Bytes(algorithm.digest_size), + name, + verify, + path, + ) diff --git a/src/caterpillar/fields/digest.pyi b/src/caterpillar/fields/digest.pyi new file mode 100755 index 00000000..0d198880 --- /dev/null +++ b/src/caterpillar/fields/digest.pyi @@ -0,0 +1,167 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from ._base import Field as Field +from .common import Bytes as Bytes, uint32 as uint32 +from caterpillar.abc import _ContextLambda, _ContextLike, _StructLike, _ActionLike +from caterpillar.context import CTX_OBJECT as CTX_OBJECT, CTX_STREAM as CTX_STREAM +from caterpillar.exception import ( + StructException as StructException, + ValidationError as ValidationError, +) +from caterpillar.fields.hook import IOHook as IOHook +from caterpillar.shared import Action as Action +from cryptography.hazmat.primitives import hashes +from typing import Callable, Generic, Optional, Protocol, Self, Type, TypeVar + +DEFAULT_DIGEST_PATH: str + +class _DigestValue: ... + +_AlgoObjT = TypeVar("_AlgoObjT") +_AlgoReturnT = TypeVar("_AlgoReturnT", default=bytes) + +class Algorithm(Generic[_AlgoObjT, _AlgoReturnT]): + name: str + def __init__( + self, + create: _ContextLambda[_AlgoObjT] | None = None, + update: Callable[[_AlgoObjT, bytes, _ContextLike], _AlgoObjT] | None = None, + digest: Callable[[_AlgoObjT, _ContextLike], _AlgoReturnT] | None = None, + name: str | None = None, + ) -> None: ... + def create(self, context: _ContextLike) -> _AlgoObjT: ... + def update( + self, algo_obj: _AlgoObjT, data: bytes, context: _ContextLike + ) -> _AlgoObjT: ... + def digest(self, algo_obj: _AlgoObjT, context: _ContextLike) -> _AlgoReturnT: ... + +class Digest(Generic[_AlgoObjT, _AlgoReturnT]): + algo: Algorithm[_AlgoObjT, _AlgoReturnT] + name: str + struct: _StructLike[_AlgoReturnT, _AlgoReturnT] + path: str + def __init__( + self, + algorithm: Algorithm[_AlgoObjT, _AlgoReturnT], + struct: _StructLike[_AlgoReturnT, _AlgoReturnT], + name: str | None = None, + verify: bool = False, + path: str | None = None, + ) -> None: ... + def __enter__(self) -> Self: ... + def __exit__(self, *_) -> None: ... + def begin(self, context: _ContextLike) -> None: ... + def end_pack(self, context: _ContextLike) -> None: ... + def end_unpack(self, context: _ContextLike) -> None: ... + def update(self, data: bytes, context: _ContextLike) -> None: ... + def verfiy(self, context: _ContextLike) -> None: ... + +CTX_DIGEST_OBJ: str +CTX_DIGEST_HOOK: str +CTX_DIGEST_ALGO: str +CTX_DIGEST: str + +class DigestFieldAction(Generic[_AlgoObjT, _AlgoReturnT]): + name: str + algo: Algorithm[_AlgoObjT, _AlgoReturnT] + def __init__( + self, target: str, algorithm: Algorithm[_AlgoObjT, _AlgoReturnT] + ) -> None: ... + def update(self, data: bytes, context: _ContextLike) -> None: ... + def begin(self, context: _ContextLike) -> None: ... + __action_pack__ = begin + __action_unpack__ = begin + +class DigestField(Generic[_AlgoReturnT], _StructLike[None, _AlgoReturnT]): + name: str + struct: _StructLike[_AlgoReturnT, _AlgoReturnT] + verify: bool + def __init__( + self, + target: str, + struct: _StructLike[_AlgoReturnT, _AlgoReturnT], + verify: bool = False, + ) -> None: ... + def __type__(self) -> Type[bytes]: ... + def __size__(self, context: _ContextLike) -> int: ... + def __pack__(self, obj: None, context: _ContextLike) -> None: ... + def __unpack__(self, context: _ContextLike) -> _AlgoReturnT: ... + @staticmethod + def begin( + target: str, algo: Algorithm[_AlgoObjT, _AlgoReturnT] + ) -> DigestFieldAction[_AlgoObjT, _AlgoReturnT]: ... + +class _DigestFactory(Protocol[_AlgoReturnT]): + def __call__( + self, name: Optional[str] = ..., verify: bool = ..., path: Optional[str] = ... + ) -> Digest[_AlgoReturnT]: ... + +Crc32_Algo: Algorithm[int, int] +Crc32: _DigestFactory[int] +Crc32_Field: DigestField[int] +Adler_Algo: Algorithm[int] +Adler: _DigestFactory[int] +Adler_Field: DigestField[int] +Sha1_Algo: Algorithm[hashes.Hash] +Sha2_224_Algo: Algorithm[hashes.Hash] +Sha2_256_Algo: Algorithm[hashes.Hash] +Sha2_384_Algo: Algorithm[hashes.Hash] +Sha2_512_Algo: Algorithm[hashes.Hash] +Sha3_224_Algo: Algorithm[hashes.Hash] +Sha3_256_Algo: Algorithm[hashes.Hash] +Sha3_384_Algo: Algorithm[hashes.Hash] +Sha3_512_Algo: Algorithm[hashes.Hash] +Md5_Algo: Algorithm[hashes.Hash] +Sha1: _DigestFactory[bytes] +Sha2_224: _DigestFactory[bytes] +Sha2_256: _DigestFactory[bytes] +Sha2_384: _DigestFactory[bytes] +Sha2_512: _DigestFactory[bytes] +Sha3_224: _DigestFactory[bytes] +Sha3_256: _DigestFactory[bytes] +Sha3_384: _DigestFactory[bytes] +Sha3_512: _DigestFactory[bytes] +Md5: _DigestFactory[bytes] + +class HMACAlgorithm(Algorithm[hashes.Hash]): + def __init__( + self, key: bytes | _ContextLambda[bytes], algorithm: hashes.HashAlgorithm + ) -> None: ... + def create(self, context: _ContextLike) -> hashes.Hash: ... + def update( + self, algo_obj: hashes.Hash, data: bytes, context: _ContextLike + ) -> hashes.Hash: ... + def digest(self, algo_obj: hashes.Hash, context: _ContextLike) -> bytes: ... + +class HMAC(Digest[bytes]): + def __init__( + self, + key: bytes | _ContextLambda[bytes], + algorithm: hashes.HashAlgorithm, + name: str | None = None, + verify: bool = False, + path: str | None = None, + ) -> None: ... + +Sha1_Field: DigestField[bytes] +Sha2_224_Field: DigestField[bytes] +Sha2_256_Field: DigestField[bytes] +Sha2_384_Field: DigestField[bytes] +Sha2_512_Field: DigestField[bytes] +Sha3_224_Field: DigestField[bytes] +Sha3_256_Field: DigestField[bytes] +Sha3_384_Field: DigestField[bytes] +Sha3_512_Field: DigestField[bytes] +Md5_Field: DigestField[bytes] diff --git a/src/caterpillar/fields/hook.py b/src/caterpillar/fields/hook.py old mode 100644 new mode 100755 index 858504c7..2934a19b --- a/src/caterpillar/fields/hook.py +++ b/src/caterpillar/fields/hook.py @@ -13,17 +13,9 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . from io import RawIOBase -from typing import Callable, Optional -from caterpillar.abc import _ContextLike from caterpillar.context import CTX_STREAM -HookInit = Callable[[_ContextLike], None] -HookUpdate = Callable[[bytes, _ContextLike], Optional[bytes]] -HookRead = Callable[[bytes, _ContextLike], Optional[bytes]] -HookWrite = Callable[[bytes, _ContextLike], Optional[bytes]] -HookFinish = Callable[[_ContextLike], None] - class IOHook(RawIOBase): """ @@ -49,13 +41,7 @@ class IOHook(RawIOBase): """ def __init__( - self, - io: RawIOBase, - init: Optional[HookInit] = None, - update: Optional[HookUpdate] = None, - read: Optional[HookRead] = None, - write: Optional[HookWrite] = None, - finish: Optional[HookFinish] = None, + self, io, init=None, update=None, read=None, write=None, finish=None ) -> None: # NOTE: no validation here if _io is valid, because # self.init will set it @@ -76,7 +62,7 @@ def assert_context_set(self) -> None: if self._context is None: raise ValueError("Context is not set") - def init(self, context: _ContextLike) -> None: + def init(self, context) -> None: """ Initialize the I/O hook with the provided context. This triggers the `init` hook, if available, and sets up the context for subsequent operations. @@ -90,7 +76,7 @@ def init(self, context: _ContextLike) -> None: self._io = context[CTX_STREAM] self._context[CTX_STREAM] = self - def finish(self, context: _ContextLike) -> None: + def finish(self, context) -> None: """ Finalize the I/O hook by calling the `finish` hook (if provided) and restoring the original I/O stream in the context. @@ -122,7 +108,7 @@ def readable(self) -> bool: """ return super().readable() - def read(self, size: int = -1) -> bytes | None: + def read(self, size: int = -1): """ Read data from the stream, applying the optional hooks (if any). @@ -147,7 +133,7 @@ def read(self, size: int = -1) -> bytes | None: return data - def write(self, b: bytes, /) -> int | None: + def write(self, b, /): """ Write data to the stream, applying the optional hooks (if any). diff --git a/src/caterpillar/fields/hook.pyi b/src/caterpillar/fields/hook.pyi new file mode 100755 index 00000000..8e713ecd --- /dev/null +++ b/src/caterpillar/fields/hook.pyi @@ -0,0 +1,46 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from typing import Callable, Optional +from typing_extensions import Buffer +from caterpillar.abc import _ContextLike, _ContextLambda +from caterpillar.context import CTX_STREAM as CTX_STREAM +from io import RawIOBase + +HookInit = _ContextLambda[None] +HookUpdate = Callable[[bytes, _ContextLike], Optional[bytes]] +HookRead = Callable[[bytes, _ContextLike], Optional[bytes]] +HookWrite = Callable[[bytes, _ContextLike], Optional[bytes]] +HookFinish = _ContextLambda[None] + +class IOHook(RawIOBase): + def __init__( + self, + io: RawIOBase | None, + init: HookInit | None = None, + update: HookUpdate | None = None, + read: HookRead | None = None, + write: HookWrite | None = None, + finish: HookFinish | None = None, + ) -> None: ... + def assert_context_set(self) -> None: ... + def init(self, context: _ContextLike) -> None: ... + def finish(self, context: _ContextLike) -> None: ... + def seekable(self) -> bool: ... + def readable(self) -> bool: ... + def read(self, size: int = -1) -> bytes | None: ... + def write(self, b: Buffer, /) -> int | None: ... + def writable(self) -> bool: ... + def tell(self) -> int: ... + def seek(self, offset: int, whence: int = 0) -> int: ... diff --git a/src/caterpillar/fields/net.py b/src/caterpillar/fields/net.py old mode 100644 new mode 100755 index 93e9cac8..0d61d9d0 --- a/src/caterpillar/fields/net.py +++ b/src/caterpillar/fields/net.py @@ -16,10 +16,6 @@ import binascii import re -from typing import Union, Any, Optional - - -from caterpillar.abc import _ContextLike from .common import Transformer, uint32, UInt, Bytes from ._base import singleton @@ -47,7 +43,7 @@ def __type__(self) -> type: """ return ipaddress.IPv4Address - def encode(self, obj: ipaddress.IPv4Address, context: _ContextLike) -> Any: + def encode(self, obj: ipaddress.IPv4Address, context): """ Encode an IPv4Address object. @@ -58,7 +54,7 @@ def encode(self, obj: ipaddress.IPv4Address, context: _ContextLike) -> Any: # pylint: disable-next=protected-access return obj._ip - def decode(self, parsed: int, context: _ContextLike) -> ipaddress.IPv4Address: + def decode(self, parsed: int, context) -> ipaddress.IPv4Address: """ Decode an encoded IPv4 address. @@ -92,7 +88,7 @@ def __type__(self) -> type: """ return ipaddress.IPv6Address - def encode(self, obj: ipaddress.IPv6Address, context: _ContextLike) -> Any: + def encode(self, obj: ipaddress.IPv6Address, context): """ Encode an IPv6Address object. @@ -103,7 +99,7 @@ def encode(self, obj: ipaddress.IPv6Address, context: _ContextLike) -> Any: # pylint: disable-next=protected-access return obj._ip - def decode(self, parsed: int, context: _ContextLike) -> ipaddress.IPv6Address: + def decode(self, parsed: int, context) -> ipaddress.IPv6Address: """ Decode an encoded IPv6 address. @@ -123,7 +119,7 @@ class MACAddress(Transformer): DELIMITERS = re.compile(rb"[:-]") - def __init__(self, sep: Optional[str] = None) -> None: + def __init__(self, sep=None) -> None: """ Initialize the MACAddress transformer. @@ -132,7 +128,7 @@ def __init__(self, sep: Optional[str] = None) -> None: super().__init__(Bytes(6)) self.sep = sep or ":" - def encode(self, obj: Union[str, bytes], context: _ContextLike) -> Any: + def encode(self, obj, context): """ Encode a MAC address. @@ -147,7 +143,7 @@ def encode(self, obj: Union[str, bytes], context: _ContextLike) -> Any: mac = re.sub(MACAddress.DELIMITERS, b"", obj) return binascii.unhexlify(mac) - def decode(self, parsed: bytes, context: _ContextLike) -> bytes: + def decode(self, parsed: bytes, context) -> bytes: """ Decode an encoded MAC address. diff --git a/src/caterpillar/fields/net.pyi b/src/caterpillar/fields/net.pyi new file mode 100755 index 00000000..666ac7a4 --- /dev/null +++ b/src/caterpillar/fields/net.pyi @@ -0,0 +1,38 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +import ipaddress + +from re import Pattern + +from ._base import singleton as singleton +from .common import ( + Bytes as Bytes, + Transformer as Transformer, + UInt as UInt, + uint32 as uint32, +) +from caterpillar.abc import _ContextLike + +IPv4Address: Transformer[ipaddress.IPv4Address, int, ipaddress.IPv4Address, int] +IPv6Address: Transformer[ipaddress.IPv6Address, int, ipaddress.IPv6Address, int] + +class MACAddress(Transformer[str | bytes, bytes, bytes, bytes]): + DELIMITERS: Pattern + sep: str + def __init__(self, sep: str | None = None) -> None: ... + def encode(self, obj: str | bytes, context: _ContextLike) -> bytes: ... + def decode(self, parsed: bytes, context: _ContextLike) -> bytes: ... + +MAC: MACAddress diff --git a/src/caterpillar/fields/pointer.py b/src/caterpillar/fields/pointer.py old mode 100644 new mode 100755 index cbc14f6e..7cbde26a --- a/src/caterpillar/fields/pointer.py +++ b/src/caterpillar/fields/pointer.py @@ -15,12 +15,13 @@ from typing import Any, Union, Optional -from caterpillar.abc import _ContextLike, _StructLike, _ContextLambda, getstruct +from caterpillar.abc import _ContextLike, _StructLike, _ContextLambda from caterpillar.byteorder import Arch from caterpillar.exception import DelegationError, StructException from caterpillar.context import CTX_STREAM, CTX_FIELD, CTX_ARCH, CTX_SEQ from caterpillar.options import Flag from caterpillar._common import WithoutContextVar +from caterpillar.shared import getstruct from ._mixin import FieldStruct from .common import uint16, uint24, uint32, uint64, uint8 @@ -38,15 +39,13 @@ class pointer(int): :ivar Any obj: The associated object, if any. """ - obj: Optional[Any] - def __repr__(self) -> str: result = super().__repr__() if self.obj is not None: result = f"<{type(self.obj).__name__}* {hex(self)}>" return result - def get(self) -> Optional[Any]: + def get(self): return self.obj @@ -58,20 +57,17 @@ class Pointer(FieldStruct): :ivar struct: The configured struct to use. """ - model: Optional[_StructLike] - struct: Union[_StructLike, _ContextLambda] - __slots__ = ("model", "struct") def __init__( self, - struct: Union[_StructLike, _ContextLambda], - model: Optional[_StructLike] = None, + struct, + model=None, ) -> None: self.struct = struct self.model = getstruct(model, model) if model is not None else None - def __mul__(self, model: _StructLike) -> "Pointer": + def __mul__(self, model): """ Create a new Pointer with a specified model. @@ -82,7 +78,7 @@ def __mul__(self, model: _StructLike) -> "Pointer": return type(self)(self.struct, model) - def __type__(self) -> type: + def __type__(self): """ Get the type associated with the Pointer. @@ -91,7 +87,7 @@ def __type__(self) -> type: """ return pointer - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context): """ Get the size of the Pointer struct. @@ -104,7 +100,7 @@ def __size__(self, context: _ContextLike) -> int: struct = self.struct(context) return struct.__size__(context) - def unpack_single(self, context: _ContextLike) -> Union[int, pointer]: + def unpack_single(self, context): """ Unpack a single value using the Pointer struct. @@ -142,7 +138,7 @@ def unpack_single(self, context: _ContextLike) -> Union[int, pointer]: stream.seek(fallback) return self._create(value, start, model_obj, context) - def pack_single(self, obj: Any, context: _ContextLike) -> None: + def pack_single(self, obj, context) -> None: """ Pack a single value using the Pointer struct. @@ -156,7 +152,7 @@ def pack_single(self, obj: Any, context: _ContextLike) -> None: with WithoutContextVar(context, CTX_SEQ, False): struct.__pack__(int(obj), context) - def _to_offset(self, value: Any, start: int, context: _ContextLike) -> int: + def _to_offset(self, value, start: int, context) -> int: """ Convert the pointer value to an offset. @@ -168,7 +164,7 @@ def _to_offset(self, value: Any, start: int, context: _ContextLike) -> int: """ return value - def _clean(self, value: int, context: _ContextLike) -> Any: + def _clean(self, value: int, context) -> Any: """ Clean the pointer value. @@ -178,7 +174,7 @@ def _clean(self, value: int, context: _ContextLike) -> Any: """ return value - def _create(self, value: Any, start: int, model_obj: Any, context: _ContextLike): + def _create(self, value, start: int, model_obj, context): """ Create a new pointer object. @@ -197,7 +193,7 @@ def _create(self, value: Any, start: int, model_obj: Any, context: _ContextLike) SIGNED_POINTER_TYS = {x.__bits__: x for x in [int8, int16, int24, int32, int64]} -def uintptr_fn(context: _ContextLike) -> _StructLike: +def uintptr_fn(context): """ Generator function to decide which struct to use as the pointer type based on the current architecture. @@ -210,7 +206,7 @@ def uintptr_fn(context: _ContextLike) -> _StructLike: return UNSIGNED_POINTER_TYS.get(arch.ptr_size, UInt(arch.ptr_size)) -def intptr_fn(context: _ContextLike) -> _StructLike: +def intptr_fn(context): """ Generator function to decide which struct to use as the pointer type based on the current architecture. @@ -254,7 +250,7 @@ class RelativePointer(Pointer): A struct that represents a relative pointer to another struct within the stream. """ - def __type__(self) -> type: + def __type__(self): """ Get the type associated with the RelativePointer. @@ -266,7 +262,7 @@ def __type__(self) -> type: return relative_pointer - def _to_offset(self, value: Any, start: int, context: _ContextLike) -> int: + def _to_offset(self, value, start: int, context) -> int: """ Convert the relative pointer value to an offset. @@ -278,7 +274,7 @@ def _to_offset(self, value: Any, start: int, context: _ContextLike) -> int: """ return start + value - def _create(self, value: Any, start: int, model_obj: Any, context: _ContextLike): + def _create(self, value, start: int, model_obj, context): """ Create a new relative pointer object. diff --git a/src/caterpillar/fields/pointer.pyi b/src/caterpillar/fields/pointer.pyi new file mode 100755 index 00000000..a55ab16b --- /dev/null +++ b/src/caterpillar/fields/pointer.pyi @@ -0,0 +1,62 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from ._mixin import FieldStruct +from caterpillar.abc import _ContextLambda, _ContextLike, _StructLike +from caterpillar.options import Flag +from typing import Dict, Generic, Optional, Type, TypeVar + +PTR_STRICT: Flag[None] + +_PtrValueT = TypeVar("_PtrValueT") + +class pointer(Generic[_PtrValueT], int): + obj: Optional[_PtrValueT] + def get(self) -> _PtrValueT | None: ... + +class Pointer(FieldStruct[int, pointer[_PtrValueT]]): + model: _StructLike[_PtrValueT, _PtrValueT] | None + struct: _StructLike[int, int] | _ContextLambda[_StructLike[int, int]] + def __init__( + self, + struct: _StructLike[int, int] | _ContextLambda[_StructLike[int, int]], + model: _StructLike[_PtrValueT, _PtrValueT] | None = None, + ) -> None: ... + def __mul__( + self, model: _StructLike[_PtrValueT, _PtrValueT] + ) -> Pointer[_PtrValueT]: ... + def __type__(self) -> Type[pointer[_PtrValueT]]: ... + def __size__(self, context: _ContextLike) -> int: ... + def unpack_single(self, context: _ContextLike) -> pointer[_PtrValueT]: ... + def pack_single(self, obj: int, context: _ContextLike) -> None: ... + +UNSIGNED_POINTER_TYS: Dict +SIGNED_POINTER_TYS: Dict + +def uintptr_fn(context: _ContextLike) -> _StructLike[int, int]: ... +def intptr_fn(context: _ContextLike) -> _StructLike[int, int]: ... + +uintptr: Pointer[None] +intptr: Pointer[None] + +class relative_pointer(pointer[_PtrValueT]): + base: int + @property + def absolute(self) -> int: ... + +class RelativePointer(Pointer[_PtrValueT]): + def __type__(self) -> relative_pointer[_PtrValueT]: ... + +offintptr: RelativePointer[None] +offuintptr: RelativePointer[None] diff --git a/src/caterpillar/fields/varint.py b/src/caterpillar/fields/varint.py old mode 100644 new mode 100755 index 38d36d01..46bc8c8b --- a/src/caterpillar/fields/varint.py +++ b/src/caterpillar/fields/varint.py @@ -14,9 +14,6 @@ # along with this program. If not, see . from __future__ import annotations -from typing import Any - -from caterpillar.abc import _StreamType, _ContextLike from caterpillar.exception import InvalidValueError, DynamicSizeError, StreamError from caterpillar.byteorder import LittleEndian from caterpillar.context import CTX_FIELD, CTX_STREAM @@ -54,10 +51,10 @@ class VarInt(FieldStruct): def __type__(self) -> type: return int - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: raise DynamicSizeError("VarInt has dynamic size!") - def bit_config(self, context: _ContextLike) -> tuple: + def bit_config(self, context) -> tuple: high_bit = 1 << 7 low_bit = 0 if context[CTX_FIELD].has_flag(VARINT_LSB): @@ -65,7 +62,7 @@ def bit_config(self, context: _ContextLike) -> tuple: low_bit = 1 << 7 return high_bit, low_bit - def pack_single(self, obj: int, context: _ContextLike) -> None: + def pack_single(self, obj: int, context) -> None: """ Pack a single value into the stream. @@ -78,7 +75,7 @@ def pack_single(self, obj: int, context: _ContextLike) -> None: if obj < 0: raise InvalidValueError("Invalid negative value for VarInt encoding!") - stream: _StreamType = context[CTX_STREAM] + stream = context[CTX_STREAM] order = context[CTX_FIELD].order is_little = order == LittleEndian @@ -101,7 +98,7 @@ def pack_single(self, obj: int, context: _ContextLike) -> None: # Just write all bytes to the stream stream.write(bytes(data)) - def unpack_single(self, context: _ContextLike) -> Any: + def unpack_single(self, context): """ Unpack a single value from the stream. @@ -109,7 +106,7 @@ def unpack_single(self, context: _ContextLike) -> Any: :param context: The current context. :return: The unpacked value. """ - stream: _StreamType = context[CTX_STREAM] + stream = context[CTX_STREAM] data = [] _, lb = self.bit_config(context) shift = 0 diff --git a/src/caterpillar/fields/varint.pyi b/src/caterpillar/fields/varint.pyi new file mode 100755 index 00000000..b3386145 --- /dev/null +++ b/src/caterpillar/fields/varint.pyi @@ -0,0 +1,15 @@ +from ._mixin import FieldStruct +from caterpillar.abc import _ContextLike +from caterpillar.options import Flag +from typing import Tuple, Type + +VARINT_LSB: Flag[None] + +class VarInt(FieldStruct[int, int]): + def __type__(self) -> Type[int]: ... + def __size__(self, context: _ContextLike) -> int: ... + def bit_config(self, context: _ContextLike) -> Tuple[int, int]: ... + def pack_single(self, obj: int, context: _ContextLike) -> None: ... + def unpack_single(self, context: _ContextLike) -> int: ... + +vint: VarInt diff --git a/src/caterpillar/include/caterpillar/macros.h b/src/caterpillar/include/caterpillar/macros.h index 1ed7f31d..24221bb8 100644 --- a/src/caterpillar/include/caterpillar/macros.h +++ b/src/caterpillar/include/caterpillar/macros.h @@ -24,6 +24,7 @@ #include #define PY_3_13_PLUS (PY_VERSION_HEX >= 0x030d0000) +#define PY_3_14_PLUS (PY_VERSION_HEX >= 0x030e0000) #define _Cp_NameStr(x) ("caterpillar._C." x) diff --git a/src/caterpillar/model/__init__.py b/src/caterpillar/model/__init__.py index 88f58eef..7957f103 100644 --- a/src/caterpillar/model/__init__.py +++ b/src/caterpillar/model/__init__.py @@ -25,5 +25,50 @@ pack_file, sizeof, ) -from ._bitfield import BitField, bitfield, BitFieldGroup +from ._bitfield import ( + Bitfield, + bitfield, + BitfieldEntry, + BitfieldGroup, + BitfieldValueFactory, + issigned, + getbits, + NewGroup, + EndGroup, + SetAlignment, + EnumFactory, + CharFactory, + DEFAULT_ALIGNMENT, +) from ._template import istemplate, template, TemplateTypeVar, derive + +__all__ = [ + "Sequence", + "Struct", + "struct", + "UnionHook", + "union", + "unpack", + "unpack_file", + "pack", + "pack_into", + "pack_file", + "sizeof", + "Bitfield", + "bitfield", + "BitfieldGroup", + "issigned", + "getbits", + "istemplate", + "template", + "TemplateTypeVar", + "derive", + "NewGroup", + "EndGroup", + "SetAlignment", + "BitfieldEntry", + "BitfieldValueFactory", + "EnumFactory", + "CharFactory", + "DEFAULT_ALIGNMENT", +] diff --git a/src/caterpillar/model/_base.py b/src/caterpillar/model/_base.py old mode 100644 new mode 100755 index 1cfad671..af5e0636 --- a/src/caterpillar/model/_base.py +++ b/src/caterpillar/model/_base.py @@ -14,15 +14,17 @@ # along with this program. If not, see . import re -from typing import Optional, Self, Tuple -from typing import List, Dict, Any -from typing import Set, Iterable, Union - - -from caterpillar.abc import _StructLike, _ContextLike, _StreamType, _Action -from caterpillar.context import Context, CTX_PATH, CTX_OBJECT, CTX_STREAM, CTX_SEQ +from typing import Optional, Self, Iterable + +from caterpillar.context import ( + CTX_FIELD, + Context, + CTX_PATH, + CTX_OBJECT, + CTX_STREAM, + CTX_SEQ, +) from caterpillar.byteorder import ( - BYTEORDER_FIELD, ByteOrder, SysNative, Arch, @@ -43,7 +45,12 @@ Const, ) from caterpillar._common import unpack_seq, pack_seq -from caterpillar.shared import ATTR_ACTION_PACK, ATTR_ACTION_UNPACK, Action +from caterpillar.shared import ( + ATTR_ACTION_PACK, + ATTR_ACTION_UNPACK, + Action, + ATTR_BYTEORDER, +) from caterpillar import registry @@ -58,13 +65,13 @@ class Sequence(FieldMixin): Sequence(fields=['a']) """ - model: Any + model: type """ Specifies the target class/dictionary used as the base model. """ # second value with action in tuple is reserved - fields: List[Field | Tuple[_Action, None]] + fields: list """A list of all fields defined in this struct. This attribute stores the fields in an *ordered* collection, whereby ordered @@ -83,12 +90,12 @@ class Sequence(FieldMixin): Global architecture definition (will be inferred on all fields) """ - options: Set[Flag] + options: set """ Additional options specifying what to include in the final class. """ - field_options: Set[Flag] + field_options: set """ Global field flags that will be applied on all fields. """ @@ -106,7 +113,7 @@ class Sequence(FieldMixin): def __init__( self, - model: Optional[Dict[str, Field]] = None, + model: Optional[dict] = None, order: Optional[ByteOrder] = None, arch: Optional[Arch] = None, options: Iterable[Flag] | None = None, @@ -119,7 +126,7 @@ def __init__( self.field_options = set(field_options or []) # these fields will be set or used while processing the model type - self._member_map_: Dict[str, Field] = {} + self._member_map_ = {} self.fields = [] self.is_union = S_UNION in self.options # Process all fields in the model @@ -161,7 +168,7 @@ def has_option(self, option: Flag) -> bool: """ return option in self.options - def _included(self, name: str, default: Optional[Any], annotation: Any) -> bool: + def _included(self, name: str, default, annotation) -> bool: """ Check if a field with the given name should be included. @@ -179,10 +186,10 @@ def _included(self, name: str, default: Optional[Any], annotation: Any) -> bool: return True - def _set_default(self, name: str, value: Any) -> None: + def _set_default(self, name: str, value) -> None: pass - def _process_default(self, name, annotation: Any, had_default=False) -> Any: + def _process_default(self, name, annotation, had_default=False): default = getattr(self.model, name, INVALID_DEFAULT) # constant values that are not in the form of fields, structs or types should # be wrapped into constant values. For more information, see _process_field @@ -244,17 +251,13 @@ def _process_model(self) -> None: for name in removables: self._remove_from_model(name) - def _prepare_fields(self) -> Dict[str, Any]: + def _prepare_fields(self): return self.model - def _process_annotation( - self, annotation: Any, default: Optional[Any], order: ByteOrder, arch: Arch - ) -> Union[_StructLike, Field]: + def _process_annotation(self, annotation, default, order: ByteOrder, arch: Arch): return registry.to_struct(annotation, arch=arch, order=order) - def _process_field( - self, name: str, annotation: Any, default: Optional[Any] - ) -> Field: + def _process_field(self, name: str, annotation, default) -> Field: """ Process a field in the model. @@ -263,10 +266,10 @@ def _process_field( :param default: The default value of the field. :return: The processed field. """ - field: Field = None - struct: _StructLike = None + field = None + struct = None - order = getattr(annotation, BYTEORDER_FIELD, self.order or SysNative) + order = getattr(annotation, ATTR_BYTEORDER, self.order or SysNative) arch = self.arch or system_arch result = self._process_annotation(annotation, default, order, arch) if isinstance(result, Field): @@ -287,7 +290,7 @@ def _process_field( field.default = default field.order = self.order or field.order field.arch = self.arch or field.arch - field.flags.update(self.field_options) + field.flags.update({hash(x): x for x in self.field_options}) return field def add_field(self, name: str, field: Field, included: bool = False) -> None: @@ -303,7 +306,7 @@ def add_field(self, name: str, field: Field, included: bool = False) -> None: if included: self._member_map_[name] = field - def add_action(self, action: _Action) -> None: + def add_action(self, action) -> None: self.fields.append((action, None)) def del_field(self, name: str, field: Field) -> None: @@ -316,10 +319,10 @@ def del_field(self, name: str, field: Field) -> None: self._member_map_.pop(name, None) self.fields.remove(field) - def get_members(self) -> Dict[str, Field]: + def get_members(self): return self._member_map_.copy() - def __size__(self, context: _ContextLike) -> int: + def __size__(self, context) -> int: """ Get the size of the struct. @@ -334,15 +337,16 @@ def __size__(self, context: _ContextLike) -> int: return max(sizes) if self.is_union else sum(sizes) - def unpack_one(self, context: _ContextLike) -> Optional[Any]: + def unpack_one(self, context): # At first, we define the object context where the parsed values # will be stored - init_data: Dict[str, Any] = Context() + init_data = Context() context[CTX_OBJECT] = Context(_parent=context) base_path = context[CTX_PATH] if self.is_union: - start = context[CTX_STREAM].tell() + stream = context[CTX_STREAM] + start = stream.tell() max_size = 0 for field in self.fields: @@ -354,7 +358,7 @@ def unpack_one(self, context: _ContextLike) -> Optional[Any]: continue if self.is_union: - pos = context[CTX_STREAM].tell() + pos = stream.tell() # REVISIT: make this a real attribute name = field.__name__ @@ -368,8 +372,8 @@ def unpack_one(self, context: _ContextLike) -> Optional[Any]: if self.is_union: # This union implementation will cover the max size - max_size = max(context[CTX_STREAM], stream.tell() - pos) - context[CTX_STREAM].seek(start) + max_size = max(max_size, stream.tell() - pos) + stream.seek(start) obj = init_data if self.is_union: @@ -377,7 +381,7 @@ def unpack_one(self, context: _ContextLike) -> Optional[Any]: stream.seek(start + max_size) return obj - def __unpack__(self, context: _ContextLike) -> Optional[Any]: + def __unpack__(self, context): """ Unpack the struct from the stream. @@ -388,7 +392,10 @@ def __unpack__(self, context: _ContextLike) -> Optional[Any]: base_path = context[CTX_PATH] # REVISIT: the name 'this_context' is misleading here this_context = Context( - _parent=context, _io=context[CTX_STREAM], _path=base_path + _root=context._root, + _parent=context, + _io=context[CTX_STREAM], + _path=base_path, ) # See __pack__ for more information field: Optional[Field] = context.get("_field") @@ -396,12 +403,12 @@ def __unpack__(self, context: _ContextLike) -> Optional[Any]: return unpack_seq(context, self.unpack_one) return self.unpack_one(this_context) - def get_value(self, obj: Any, name: str, field: Field) -> Optional[Any]: + def get_value(self, obj, name: str, field: Field): return obj.get(name, None) - def pack_one(self, obj: Dict[str, Any], context: _ContextLike) -> None: + def pack_one(self, obj, context) -> None: max_size = 0 - union_field: Optional[_StructLike] = None + union_field = None base_path: str = context[CTX_PATH] for field in self.fields: @@ -425,7 +432,7 @@ def pack_one(self, obj: Dict[str, Any], context: _ContextLike) -> None: if name in self._member_map_: value = self.get_value(obj, name, field) else: - # REVISIT: this line might not be necessary if const fields alredy + # REVISIT: this line might not be necessary if const fields already # use their internal value. value = field.default if field.default != INVALID_DEFAULT else None field.__pack__(value, context) @@ -442,15 +449,16 @@ def pack_one(self, obj: Dict[str, Any], context: _ContextLike) -> None: value = self.get_value(obj, name, union_field) union_field.__pack__(value, context) - def __pack__(self, obj: Any, context: _ContextLike) -> None: + def __pack__(self, obj, context) -> None: # As structs can be used in field definitions a field will call this struct # and could potentially be a sequence. Therefore, we have to check whether we # should unpack multiple objects. - field: Optional[Field] = context.get("_field") + field: Optional[Field] = context.get(CTX_FIELD) if field and context[CTX_SEQ]: pack_seq(obj, context, self.pack_one) else: ctx = Context( + _root=context._root, _parent=context, _io=context[CTX_STREAM], _path=context[CTX_PATH], @@ -467,7 +475,7 @@ def __str__(self) -> str: # --- private sequence tyoe converter --- @registry.TypeConverter(dict) -def _type_converter(annotation: Any, kwargs: dict) -> _StructLike: +def _type_converter(annotation, kwargs: dict): arch = kwargs.pop("arch", None) order = kwargs.pop("order", None) return Sequence(model=annotation, order=order, arch=arch) diff --git a/src/caterpillar/model/_base.pyi b/src/caterpillar/model/_base.pyi new file mode 100755 index 00000000..332a2af5 --- /dev/null +++ b/src/caterpillar/model/_base.pyi @@ -0,0 +1,72 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from typing import Any, Dict, Generic, Iterable, Self, Optional, Type, TypeVar, Union + +from caterpillar.abc import _StructLike, _ActionLike, _ContextLike, _ContainsStruct +from caterpillar.byteorder import ByteOrder, Arch +from caterpillar.fields._base import Field +from caterpillar.fields._mixin import FieldMixin +from caterpillar.options import Flag + +_SeqModelT = TypeVar( + "_SeqModelT", default=Dict[str, Union[_StructLike, _ContainsStruct, type]] +) + +class Sequence(FieldMixin, Generic[_SeqModelT], _StructLike[_SeqModelT, _SeqModelT]): + model: Any + fields: list[Field | tuple[_ActionLike, None]] + order: Optional[ByteOrder] + arch: Optional[Arch] + options: set[Flag] + field_options: set[Flag] + is_union: bool + _member_map_: Dict[str, Field] + def __init__( + self, + model: Optional[dict[str, Field]] = None, + order: Optional[ByteOrder] = None, + arch: Optional[Arch] = None, + options: Optional[Iterable[Flag]] = None, + field_options: Optional[Iterable[Flag]] = None, + ) -> None: ... + def __add__(self, sequence) -> Self: ... + def __sub__(self, sequence) -> Self: ... + __iadd__ = __add__ + __isub__ = __sub__ + def has_option(self, option: Flag) -> bool: ... + def add_field(self, name: str, field: Field, included: bool = False) -> None: ... + def add_action(self, action: _ActionLike) -> None: ... + def del_field(self, name: str, field: Field) -> None: ... + def get_members(self) -> dict[str, Field]: ... + def unpack_one(self, context: _ContextLike) -> _SeqModelT: ... + def get_value(self, obj: Any, name: str, field: Field) -> Any: ... + def pack_one(self, obj: _SeqModelT, context: _ContextLike) -> None: ... + def __size__(self, context: _ContextLike) -> int: ... + def __unpack__(self, context: _ContextLike) -> _SeqModelT: ... + def __type__(self) -> Type[_SeqModelT]: ... + def __pack__(self, obj: _SeqModelT, context: _ContextLike) -> None: ... + def _set_default(self, name: str, value: Any) -> Any: ... + def _included(self, name: str, default: Any | None, annotation: Any) -> bool: ... + def _process_default( + self, name: str, annotation: Any, had_default: bool = ... + ) -> Any: ... + def _replace_type(self, name: str, type_: type) -> None: ... + def _remove_from_model(self, name: str) -> None: ... + def _process_model(self) -> None: ... + def _prepare_fields(self) -> Dict[str, Any]: ... + def _process_annotation( + self, annotation: Any, default: Any, order: ByteOrder, arch: Arch + ) -> _StructLike: ... + def _process_field(self, name: str, annotation: Any, default: Any) -> Field: ... diff --git a/src/caterpillar/model/_bitfield.py b/src/caterpillar/model/_bitfield.py old mode 100644 new mode 100755 index 91c4aace..96248184 --- a/src/caterpillar/model/_bitfield.py +++ b/src/caterpillar/model/_bitfield.py @@ -12,110 +12,562 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . -import struct as libstruct - -from typing import Optional, Any, Dict -from typing import Iterable, Tuple -from typing import Self, List -from dataclasses import dataclass, field as dcfield - -from caterpillar.abc import _StructLike, _ContextLike, _StreamType, typeof +from collections.abc import Iterable +from caterpillar.fields.common import Int +from caterpillar.shared import ( + ATTR_ACTION_PACK, + ATTR_ACTION_UNPACK, + ATTR_BYTEORDER, + typeof, + ATTR_BITS, + ATTR_SIGNED, +) from caterpillar.byteorder import ( - Arch, - ByteOrder, - byteorder, - system_arch, LittleEndian, + SysNative, + system_arch, ) from caterpillar.options import ( - Flag, + B_GROUP_NEW, GLOBAL_BITFIELD_FLAGS, GLOBAL_STRUCT_OPTIONS, GLOBAL_UNION_OPTIONS, + B_OVERWRITE_ALIGNMENT, + B_GROUP_END, + B_GROUP_KEEP, + B_NO_AUTO_BOOL, + Flag, ) from caterpillar.fields import ( Field, - uint8, - uint16, - uint32, - uint64, - boolean, Pass, + INVALID_DEFAULT, ) -from caterpillar.exception import ValidationError, DelegationError -from caterpillar.context import Context, CTX_PATH, CTX_OBJECT, CTX_STREAM +from caterpillar.exception import StructException +from caterpillar.context import CTX_PATH, Context, CTX_OBJECT, CTX_STREAM + +from ._struct import Struct, sizeof + + +# --- Bitfield Concept --- +# NEW REVISED CONCEPT +# Each Bitfield instance maintains a sequence of bitfield groups, where each group +# contains a collection of sized fields. A bitfield group may consist of either multiple +# entries (i.e., any types that can be converted to an integral type) or a single +# _StructLike object. For example, consider the following bitfield definition: +# +# @bitfield +# class Format: +# a1: 1 +# a2: 1 - boolean +# _ : 0 +# b1: char +# c1: uint32 +# +# This Bitfield definition will generate three distinct bitfield groups (labeled here as +# groups a, b, and c). By default, bitfields use 8-bit alignment, leading to the following +# layout: +# +# Group Pos Bits +# a 0x00 8 +# b 0x01 8 +# c 0x02 32 +# +# Internally, only the first group requires special bit-level parsing. The remaining groups +# (b and c) are treated as standard structures since they span full bytes or words without +# sub-byte alignment. This dynamic grouping mechanism allows leveraging full struct-like +# class definitions within bitfields. +# +# This new approach enables more complex and expressive bitfield definitions. The annotation +# syntax is therefore extended as follows: +# +# +---------------------------------------------------+--------------------------------------+ +# 1.| : [ - ] | Standard field with optional type | +# +---------------------------------------------------+--------------------------------------+ +# 2.| : 0 | Aligns to the next byte boundary | +# +---------------------------------------------------+--------------------------------------+ +# 3.| : | Struct-like field (no bits consumed) | +# +---------------------------------------------------+--------------------------------------+ +# 4.| : (,) | Field with custom type factory | +# +---------------------------------------------------+--------------------------------------+ +# 5.| : (,[,]) | bits with custom type factory | +# | : (,[]) | and options | +# +---------------------------------------------------+--------------------------------------+ +# +# Processing Rules: +# +# Rule 1.: +# - Default alignment is 1 byte (8 bits). +# - If followed by a rule 2 declaration, the remaining bits in the current byte are padded. +# - If a is provided: +# - typeof() is used to infer the factory. +# - etbits() and sizeof() determine the fieldโ€™s alignment. +# - If a custom alignment is configured in the Bitfield constructor, inferred alignment is +# ignored unless the field includes the B_OVERWRITE_ALIGNMENT option. +# - If the B_GROUP_END option is set, the current group is finalized and a new one is started. +# +# Rule 2.: +# - This rule forces alignment to the next byte boundary. +# - The field is ignored during final class generation (name is discarded). +# - The current group is finalized unless the bitfield is configured with B_GROUP_KEEP +# +# Rule 3.: +# - Equivalent to struct-like class field definitions. +# - Automatically implies a rule 2 alignment. +# - Always finalizes the current group regardless of B_GROUP_KEEP. +# +# Rule 4.: +# - Extension of Rule 1. +# - Explicitly defines a conversion factory for the field. +# - The factory must be: +# - A built-in type (e.g., int, bool) supporting __int__, or +# - A type or instance of BitfieldValueFactory. +# +# Rule 5.: +# - Builds upon Rule 4 with support for options. +# - Options can be passed as a list or single element. +# - Supported Options: +# - NewGroup: Aligns the current group, starts a new one, and adds the entry to it. +# - EndGroup: Adds the entry to the current group, then aligns it. +# - SetAlignment: Changes the current working alignment. +# - Note: Option order affects behavior and must be considered carefully. + +#: The default alignment (in bits) used for bitfield group boundaries +DEFAULT_ALIGNMENT = 8 + +#: Alias for the `B_GROUP_NEW` flag, used to indicate that a new bitfield group should be started. +NewGroup = B_GROUP_NEW + +#: Alias for the `B_GROUP_END` flag, used to indicate that the current bitfield group +#: should be finalized. +EndGroup = B_GROUP_END + + +class SetAlignment: + """ + Instructional flag used to update the current bitfield alignment dynamically during + bitfield generation. + + This class allows to explicitly set a new alignment boundary (in bits) for subsequent fields + or groups in a bitfield definition. This enables finer control over how bitfield groups are + organized and aligned. + + :param new_alignment: The alignment size in bits to be used from this point forward in the bitfield layout. + :type new_alignment: int + """ -from ._struct import Struct + def __init__(self, new_alignment: int) -> None: + self.alignment = new_alignment -BitTuple = Tuple[int, int, type] + @staticmethod + def flag(new_alignment: int): + """Create a :class:`Flag` instance representing a request to set a new alignment. + This method is intended for use where a generic :class:`Flag` is expected rather than a full + :class:`SetAlignment` object, e.g. for setting options for a :class:`Field`. -BITS_ATTR = "__bits__" -SIGNED_ATTR = "__signed__" + >>> field = 5 - uint32 | SetAlignment.flag(32) + :param new_alignment: The alignment size in bits. + :type new_alignment: int + :return: A `Flag` object with the key `"bitfield.new_alignment"` and the specified alignment as its value. + :rtype: Flag + """ + return Flag("bitfield.new_alignment", new_alignment) + + def __hash__(self) -> int: + return hash("SetAlignment") + + +def getbits(obj) -> int: + """Retrieve the bit-width of a given object. -def getbits(obj: Any) -> int: - __bits__ = getattr(obj, BITS_ATTR) + This function checks for a :py:func:`__bits__` attribute on the object. The object must either implement + the :class:`_SupportsBits` or :class:`_ContainsBits` protocol. + + >>> class A: + ... __bits__ = 3 + ... + >>> a = A() + >>> getbits(a) + 3 + + :param obj: The object for which the bit-width should be determined. It is expected to have an :attr:`ATTR_BITS` attribute. + :type obj: Any + :return: The number of bits used by the object. + :rtype: int + :raises AttributeError: If the object does not have an attribute defined by :attr:`ATTR_BITS`. + """ + __bits__ = getattr(obj, ATTR_BITS) return __bits__() if callable(__bits__) else __bits__ -def issigned(obj: Any) -> bool: - return bool(getattr(obj, SIGNED_ATTR, None)) +def issigned(obj) -> bool: + """Determine whether a given object represents a signed field. + :param obj: The object for which signedness should be determined. + :type obj: Any + :return: :code:`True` if the field is marked as signed, :code:`False` otherwise. + :rtype: bool + """ + return bool(getattr(obj, ATTR_SIGNED, None)) -def getformat(obj: Any) -> str: - attr = getattr(obj, "__fmt__") - return attr() if callable(attr) else attr +class BitfieldValueFactory: + """ + A generic factory class responsible for converting values between Python objects and integers + for use in bitfield entries. -@dataclass(init=False) -class BitFieldGroup: - size: int - pos: int - fmt: str - fields: Dict[BitTuple, Field] = dcfield(default_factory=dict) + By default, the factory converts to and from Python's built-in :code:`int` type, but it can be customized + to support any type that accepts an integer in its constructor and implements :code:`__int__`. - def __init__(self, size: int, pos: int, fields: Dict = None) -> None: - self.size = size - self.pos = pos - self.fields = fields or {} - # this has to get refactored - if 8 < size <= 16: - self.fmt = "H" - elif 16 < size <= 32: - self.fmt = "I" - elif 32 < size <= 64: - self.fmt = "Q" - else: - self.fmt = "B" + :param target: The target type to which integer values will be converted., defaults to None + :type target: type, optional + """ + + __slots__ = ("target",) + + def __init__(self, target=None) -> None: + self.target = target or int + + def to_int(self, obj) -> int: + """Convert a Python object to an integer. + + :param obj: The object to convert. + :type obj: Any + :return: The integer representation of the object. + :rtype: int + """ + return int(obj) + + def from_int(self, value: int): + """Convert an integer to the target object type. + + :param value: The integer to convert. + :type value: int + :return: The value converted to the target type. + :rtype: Any + """ + return self.target(value) + + +class EnumFactory(BitfieldValueFactory): + """A value factory for enum-like types used in bitfields. + + This factory attempts to convert between integers and enumeration instances, + using the provided :code:`model` (which should support :code:`__int__`). It + can operate in strict or lenient mode: + + - In strict mode, a :class:`ValueError` is raised if conversion fails. + - In lenient mode, the raw integer is returned if the value is not in the enum. + + :param model: The enum model or mapping type to use. + :type model: Type + :param strict: Whether to raise an error on unknown values. + :type strict: bool + + .. code-block:: python + :caption: Example + + class Status(enum.IntEnum): + OK = 0 + ERROR = 1 + + factory = EnumFactory(Status, strict=True) + factory.from_int(0) # -> Status.OK + factory.from_int(2) # -> ValueError (strict mode) + """ + + def __init__(self, model, strict=False) -> None: + super().__init__(model) + self.strict = strict + + def from_int(self, value: int): + """ + Convert an integer into an enum instance or raw int. + + :param value: The integer to convert. + :type value: int + :return: Enum instance or raw int if not found (in non-strict mode). + :rtype: Any + :raises ValueError: If strict is enabled and value is not valid. + """ + try: + return super().from_int(value) + except ValueError: + if self.strict: + raise + return value + + +class CharFactory(BitfieldValueFactory): + """ + A value factory for handling single ASCII/Unicode characters as integers. + + This factory allows treating a character field as a one-byte integer and vice versa, + automatically converting during packing and unpacking. + """ + + def __init__(self) -> None: + super().__init__(str) + + def from_int(self, value: int): + """ + Convert an integer to its character representation. + + :param value: Integer ASCII or Unicode code point. + :type value: int + :return: Corresponding character. + :rtype: str + """ + return chr(value) + + def to_int(self, obj) -> int: + """ + Convert a character to its integer (ordinal) representation. + + :param obj: The character to convert. + :type obj: str + :return: Corresponding integer value. + :rtype: int + """ + return ord(obj) + + +class BitfieldEntry: + """ + Represents a single entry in a bitfield, including its bit position, width, name, and + conversion behavior. + + May also represent a special action or directive instead of a field. + + :param bit: The starting bit position within its group. + :type bit: int + :param width: The number of bits used by this field. + :type width: int + :param name: The name of the field. + :type name: str + :param factory: A factory for type conversion. Defaults to BitfieldValueFactory. + :type factory: type or BitfieldValueFactory or None + :param action: Optional action object for special handling (e.g., alignment or padding). + :type action: Any + """ + + __slots__ = ("bit", "width", "name", "factory", "action", "low_mask") + + def __init__( + self, bit: int, width: int, name: str, factory=None, action=None + ) -> None: + self.bit = bit + self.width = width + self.name = name + self.factory = factory or BitfieldValueFactory() + if isinstance(self.factory, type): + self.factory = self.factory() + self.action = action + self.low_mask = (1 << self.width) - 1 + + @staticmethod + def new_action(action): + """ + Create a new action-type entry (e.g., padding, control directive). + + :param action: The action object to encapsulate. + :type action: Any + :return: A BitfieldEntry instance with no bit-width, used for meta instructions. + :rtype: BitfieldEntry + """ + return BitfieldEntry(0, 0, "", action=action) + + def shift(self, value_width: int) -> int: + """ + Calculate how much to shift the field when extracting it from a value. + + :param value_width: The total bit width of the container. + :type value_width: int + :return: The number of bits to shift. + :rtype: int + """ + bit_pos = max(0, value_width - self.bit) + return max(bit_pos - self.width, 0) + + def is_action(self) -> bool: + """ + Check whether this entry is an action (i.e., not a data field). + + :return: True if this is an action entry. + :rtype: bool + """ + return self.action is not None + + def __repr__(self) -> str: + """ + Return a human-readable string representation of the bitfield entry. + + :return: String representation. + :rtype: str + """ + if self.is_action(): + return repr(self.action) + + r = f"" + + +class BitfieldGroup: + """ + A group of one or more bitfield entries. Groups are used to organize fields within a single + alignment unit and may represent either packed fields or standalone fields. + + :param bit_count: The number of bits in the group, or -1 for single field representation. + :type bit_count: int + """ + + __slots__ = ("entries", "bit_count") + + def __init__(self, bit_count: int) -> None: + self.bit_count = bit_count + self.entries = [] + + def is_field(self) -> bool: + """ + Determine whether the group contains a single non-bitfield field. + + :return: True if the group holds a single struct-like field. + :rtype: bool + """ + return self.bit_count == -1 + + def get_field(self): + """ + Get the single field from this group. + + :return: The field object. + :rtype: BitfieldEntry + """ + return self.entries[0] + + def set_field(self, field): + """ + Set the group to hold only the given field and mark it as a standalone field group. + + :param field: The field to store in this group. + :type field: BitfieldEntry + """ + self.entries = [field] + self.bit_count = -1 + + def align_to(self, alignment: int): + """ + Align the bit count of this group to the specified boundary. + + :param alignment: The number of bits to align to. + :type alignment: int + """ + if not self.is_field(): + pad = self.bit_count % alignment + if pad > 0: + self.bit_count += alignment - pad + + def is_empty(self) -> bool: + """ + Check if the group contains any entries. + + :return: True if the group is empty. + :rtype: bool + """ + return len(self.entries) == 0 + + def get_size(self, context=None): + """ + Get the size of this group in bytes. + + :param context: Optional context used for size evaluation. + :type context: Any + :return: The size of the group in bytes. + :rtype: int + """ + if self.is_field(): + field = self.get_field() + return field.__size__(context) if context else sizeof(field) + + return self.bit_count // 8 + + def get_bits(self, context=None): + """ + Get the total number of bits in this group. + + :param context: Optional context used for size evaluation. + :type context: Any + :return: The number of bits. + :rtype: int + """ + return self.get_size(context) * 8 + + def __repr__(self) -> str: + if self.is_field(): + return repr(self.get_field()) + + return f"" -class BitField(Struct): - groups: List[BitFieldGroup] +class Bitfield(Struct): + """ + A Bitfield represents a packed structure composed of bit-level fields. This + class allows for the declarative definition of compact memory representations + where each field can occupy an arbitrary number of bits, not necessarily + aligned to byte boundaries. + + Core Implementation: + - Bitfields are organized into BitfieldGroups, which manage alignment and field aggregation. + - Entries can be individual bit widths or wrapped fields with explicit alignment. + - Special field options like NewGroup and EndGroup can control group layout. + - Supports value factories for type conversion and symbolic runtime actions. + + Available global options: + - :code:`B_NO_AUTO_BOOL`: disables automatically converting 1bit fields to boolean + - :code:`B_GROUP_KEEP`: disables finalizing groups when using the alignment definition syntax + + :param model: The model for the structure. + :type model: Any + :param order: Byte order of the structure. + :type order: Optional[str] + :param arch: Target architecture. + :type arch: Optional[str] + :param options: Global structure options. + :type options: Optional[set] + :param field_options: Field-specific options. + :type field_options: Optional[set] + :param alignment: Bit alignment size. + :type alignment: Optional[int] + """ __slots__ = ( "groups", - "_bit_pos", - "_abs_bit_pos", - "_current_group", - "__fmt__", - "__bits__", + "alignment", ) def __init__( self, - model: type, - order: Optional[ByteOrder] = None, - arch: Optional[Arch] = None, - options: Iterable[Flag] = None, - field_options: Iterable[Flag] = None, + model, + order=None, + arch=None, + options=None, + field_options=None, + alignment=None, ) -> None: - self.groups: List[BitFieldGroup] = [] + self.alignment = alignment or DEFAULT_ALIGNMENT # These fields remain private and will be deleted after processing - self._bit_pos: int = 0 - self._abs_bit_pos: int = 0 - self._current_group: BitFieldGroup = None - + self._current_alignment = self.alignment + self._current_group = BitfieldGroup(self._current_alignment) + self._bit_pos = 0 + self.groups = [self._current_group] super().__init__( model=model, order=order, @@ -126,259 +578,505 @@ def __init__( # Add additional options based on the struct's type self.options.difference_update(GLOBAL_STRUCT_OPTIONS, GLOBAL_UNION_OPTIONS) self.options.update(GLOBAL_BITFIELD_FLAGS) - self.__bits__ = sum(map(lambda x: x.size, self.groups)) - self.__fmt__ = "".join(map(lambda x: x.fmt, self.groups)) + self.groups = [group for group in self.groups if not group.is_empty()] + self.groups[-1].align_to(self._current_alignment) + # REVISIT: should be enable modification after processing? del self._bit_pos - del self._abs_bit_pos + del self._current_alignment del self._current_group - def __add__(self, other: "BitField") -> Self: - if not isinstance(other, BitField): - raise ValidationError( - f"Attempted to add a non-bitfield struct to a bitfield! (type={type(other)})" + def __add__(self, sequence): + """ + Append another Bitfield instance to this one. + + :param sequence: Another Bitfield instance. + :type sequence: Bitfield + :return: Combined Bitfield. + :rtype: Bitfield + :raises TypeError: If sequence is not a Bitfield. + """ + if not isinstance(sequence, Bitfield): + # REVISIT: we could just add each field as a group individually? + raise TypeError( + f"Attempted to add a non-bitfield struct to a bitfield! (type={type(sequence)})" ) - # REVISIT: undefined bahaviour when parsing - return super(Struct, self).__add__(other) - def _process_field( - self, name: str, annotation: Any, default: Optional[Any] + self.groups.extend(sequence.groups) + return super(Struct, self).__add__(sequence) + + def _process_align(self, options) -> Field: + """ + Process an alignment directive. + + .. code-block:: bnf + + : 0 + + :param options: A list of alignment-related options. + :type options: Optional[list] + :return: A placeholder field. + :rtype: Field + """ + # 2.: the current group will be finalized + self._current_group.align_to(self._current_alignment) + if not self.has_option(B_GROUP_KEEP): + self._current_group = self._new_group(self._current_alignment) + else: + self._bit_pos = self._current_group.bit_count + + for option in options or []: + if self._process_alignment_option(option): + continue + + alignment = self._current_alignment + group = self._current_group + if option.name in (EndGroup.name, NewGroup.name): + # finalize current group (same effect for alignment statement) + group.align_to(alignment) + self._current_group = self._new_group(alignment) + + return Field(Pass) + + def _process_bits(self, name: str, bits: int, factory=None, options=None) -> Field: + """ + Process a bitfield entry with a given width. + + :param name: Field name. + :type name: str + :param bits: Width in bits. + :type bits: int + :param factory: Optional value factory. + :type factory: Optional[BitfieldValueFactory] + :param options: Field-specific options. + :type options: Optional[list] + :return: Resulting Field. + :rtype: Field + """ + if bits == 0: + return self._process_align(options) + + if not factory and bits == 1 and not self.has_option(B_NO_AUTO_BOOL): + factory = BitfieldValueFactory(bool) + + entry = BitfieldEntry(self._bit_pos, bits, name, factory) + if not self._process_options(options, entry): + group = self._current_group + group.entries.append(entry) + # Adjust the size of the goup dynamically + self._bit_pos += entry.width + group.bit_count = max(group.bit_count, self._bit_pos) + # this is only symbolic + return Field(Int(bits)) + + def _process_bits_field( + self, name: str, field, options=None, factory=None ) -> Field: """ - Process a field in the model. + Process a bitfield that wraps another field instance. - :param name: The name of the field. - :param annotation: The annotation of the field. - :param default: The default value of the field. - :return: The processed field. + :param name: Field name. + :type name: str + :param field: The field instance. + :type field: Field + :param options: List of options. + :type options: Optional[list] + :param factory: Optional value factory. + :type factory: Optional[BitfieldValueFactory] + :return: Resulting Field. + :rtype: Field + :raises TypeError: If bit width is not an integer. """ + if field.bits is None: + # we don't need to check for NewGroup and EndGroup options here as no + # bits are specified and the field gets its own group. + for option in options or []: + self._process_alignment_option(option) - # Fields can be defined as follows: - # name : bit_count [ - struct ] [ = default_value ] - # or - # name : struct [ = default_value ] - struct: _StructLike = None - field: Field = None + # bits not present -> treat defintion as simple field, which means we finalize + # the current group, create a new FIELD GROUP and another new one after that + self._current_group.align_to(self._current_alignment) + field_group = self._new_group(-1) + field_group.set_field(field) + self._current_group = self._new_group(self._current_alignment) + return field - order = byteorder(annotation, self.order) - group: BitFieldGroup = self._current_group - arch = self.arch or system_arch + width = field.bits + if not isinstance(width, int): + raise TypeError( + f"Bitfield: field definition of {name!r} requires an integer as bits" + ) - width = 0 - if group is None: - # First, specify the current group and add it to the rest of - # the internal representation. - group = BitFieldGroup(8, self._abs_bit_pos) - self.groups.append(group) - self._current_group = group - - if isinstance(annotation, int): - # CASE 1: Only the bit amount is specified. We take the current group - # into consideration and decide whether we have to apply a padding. - # TODO: cleanup - struct = uint8 # this struct is only used to infer the right type - if 8 < annotation <= 16: - struct = uint16 - elif 16 < annotation <= 32: - struct = uint32 - elif 32 < annotation <= 64: - struct = uint64 - elif annotation == 1: - struct = boolean - if annotation != 0: - width = annotation - else: - # Special case: a zero indicates we have to start a new byte. To - # accomplish that, we simply create a new BitFieldGroup. The padding - # is generated by moving the internal absolute bit position forwards. - self._current_group = BitFieldGroup(8, self._abs_bit_pos) - width = 8 - self._abs_bit_pos % 8 - struct = Pass # NoneType will be inferred - - elif isinstance(annotation, Field): - # CASE 2: the field has been defined using a type specifier. - field = annotation - if field.bits is None: - # We have to use a special method to calculate the amount of this this - # field takes. - try: - width = getbits(field.struct) - except ValueError as exc: - raise ValidationError( - f"Field {name!r} does not specify a bit width!" - ) from exc - else: - width = field.bits + entry = BitfieldEntry( + self._bit_pos, width, name, factory or BitfieldValueFactory(typeof(field)) + ) + if not self._process_options(options, entry): + group = self._current_group + group.entries.append(entry) + self._bit_pos += width + group.bit_count = max(group.bit_count, self._bit_pos) - elif isinstance(annotation, _StructLike): - # CASE 3: Only the struct is given without a bit width - struct = annotation + if field.has_flag(B_OVERWRITE_ALIGNMENT): try: - # The same applies here: the struct MUST specify a bit width - width = getbits(struct) - except ValueError as exc: - raise ValidationError( - f"Field {name!r} does not specify a bit width!" - ) from exc - - if struct is not None: - field = Field(struct, order, arch=arch, default=default) - - if field is None: - raise ValidationError( - f"Field {name!r} could not be created: {annotation!r}" - ) - field.default = default - field.order = self.order or field.order - field.arch = self.arch or field.arch - field.bits = field.bits or width - field.flags.update(self.field_options) - - # Now, we have to check whether a new byte has to be started - if group.size - self._bit_pos < width: - if field.bits is None: - # Only type specified, we take its bit count - size = width - else: - leftover = width % 8 - size = width + ((8 - leftover) if leftover > 0 else 0) - group = BitFieldGroup(size, self._abs_bit_pos) - self._current_group = group - self.groups.append(group) - self._bit_pos = 0 - - type_ = typeof(field.struct) - bit_pos = max(group.size - 1 - self._bit_pos, 0) - # NOTE: I know, we're calling this method twice now, but it saves some - # iterations later on. - if self._included(name, default, annotation): - group.fields[(bit_pos, width, type_)] = field - self._bit_pos += width - self._abs_bit_pos += width + field_bits = getbits(field.struct) + except Exception: + field_bits = sizeof(field) * 8 + + self._current_alignment = field_bits or DEFAULT_ALIGNMENT + self._current_group.align_to(self._current_alignment) + return field - def _included(self, name: str, default: Optional[Any], annotation: Any) -> bool: + def _process_options(self, options, entry=None) -> bool: + consumed = False + for option in options or []: + if self._process_alignment_option(option): + continue + + group = self._current_group + alignment = self._current_alignment + if option.name == EndGroup.name: + if entry: + group.entries.append(entry) + self._bit_pos += entry.width + group.bit_count = max(group.bit_count, self._bit_pos) + consumed = True + + group.align_to(alignment) + self._current_group = self._new_group(alignment) + elif option.name == NewGroup.name: + # finalize current group, create a new one and add the entry to the newly + # created group + group.align_to(alignment) + self._current_group = group = self._new_group(alignment) + + if entry: + group.entries.append(entry) + # position was reset to zero + self._bit_pos += entry.width + group.bit_count = max(group.bit_count, self._bit_pos) + consumed = True + return consumed + + def _new_group(self, alignment): + new_group = BitfieldGroup(alignment) + self.groups.append(new_group) + self._bit_pos = 0 + return new_group + + def _process_alignment_option(self, option): + if isinstance(option, SetAlignment): + # update current working alignment + self._current_alignment = option.alignment or DEFAULT_ALIGNMENT + self._current_group.align_to(self._current_alignment) + return True + elif isinstance(option, Flag): + if option.name == "bitfield.new_alignment": + self._current_alignment = option.value or DEFAULT_ALIGNMENT + self._current_group.align_to(self._current_alignment) + return True + + return False + + def _process_field(self, name: str, annotation, default): + arch = self.arch or system_arch + order = getattr(annotation, ATTR_BYTEORDER, self.order or SysNative) + match annotation: + case int(): + if annotation == 0: + return self._process_align(None) + else: + # 1. (without field) defines the width using the default value factory + return self._process_bits(name, annotation) + + case tuple(): # NEW EXTENDED DEFINITION + if len(annotation) == 0: + raise ValueError( + f"Extended field definition for {name!r} does not define any values!" + ) + + if len(annotation) == 1: + # definition: (, ) + (width,) = annotation + if width == 0: + raise ValueError( + f"Extended field {name!r} defines forbidden width of zero" + ) + + return self._process_bits(name, width) + + options = [] + factory = None + (width, factory_or_option, *extra_options) = annotation + if isinstance(factory_or_option, BitfieldValueFactory): + factory = factory_or_option + elif isinstance(factory_or_option, type): + # here we enable just specifying types instead of a factory + if not issubclass(factory_or_option, BitfieldValueFactory): + factory = BitfieldValueFactory(factory_or_option) + else: + factory = factory_or_option() + else: + # treat as option or as a list of options + options = ( + [factory_or_option] + if not isinstance(factory_or_option, Iterable) + else list(factory_or_option) + ) + + # extra options may be a list or single element + for extra in extra_options: + options.extend(extra if isinstance(extra, Iterable) else [extra]) + + if isinstance(width, int): + # rule no. 5 + return self._process_bits(name, width, factory, options) + + field = width + if not isinstance(field, Field): + field = Field(field, order=order, arch=arch, default=default) + + return self._process_bits_field(name, field, options, factory) + + case _: + # rule 1 (with field) or rule 3 + field = self._process_annotation(annotation, default, order, arch) + if not isinstance(field, Field): + field = Field(field, order=order, arch=arch, default=default) + return self._process_bits_field(name, field) + + def _included(self, name: str, default, annotation) -> bool: if not super()._included(name, default, annotation): return False - if isinstance(annotation, int) and annotation == 0: - # padding should be ignored - return False + width = annotation + if isinstance(annotation, tuple): + width, *_ = annotation + + if isinstance(width, int): + return width != 0 + + if isinstance(width, Field): + return width.bits != 0 return True - def group(self, bit_index: int) -> Optional[BitFieldGroup]: - grp: BitFieldGroup = None - for candidate in self.groups: - if bit_index > candidate.pos: - break - grp = candidate - return grp - - def __size__(self, context: _ContextLike) -> int: - # The size of a bitfield is alsways static - return self.__bits__ // 8 - - def unpack_one(self, context: _ContextLike) -> Optional[Any]: - # At first, we define the object context where the parsed values - # will be stored - init_data: Dict[str, Any] = Context() + def _replace_type(self, name: str, type_: type) -> None: + entry = self.get_entry(name) + if entry is not None: + if not entry.factory: + type_ = int + elif isinstance(entry.factory, BitfieldValueFactory): + type_ = entry.factory.target or object + else: + type_ = object + + # else: must be a field with a known type + return super()._replace_type(name, type_) + + def __size__(self, context) -> int: + """ + Calculate the total size of the bitfield structure. + + :param context: Packing context. + :type context: Any + :return: Total size in bytes. + :rtype: int + """ + # size is different as our model includes correct padding + return sum(map(lambda g: g.get_size(context), self.groups)) + + def __bits__(self) -> int: + """ + Compute the total number of bits in the structure. + + :return: Total bit count. + :rtype: int + """ + return sum(map(lambda g: g.get_bits(), self.groups)) + + def unpack_one(self, context): + init_data = Context() context[CTX_OBJECT] = Context(_parent=context) - values = libstruct.unpack( - f"{self.order.ch}{self.__fmt__}", - context[CTX_STREAM].read(self.__bits__ // 8), - ) - for i, group in enumerate(self.groups): - # each group specifies the fields we are about to unpack. But first, we have - # to read the bits from the stream - value = values[i] - for bit_info, field in group.fields.items(): - name: str = field.__name__ - # The field should be ignored if it is not within the - # member map (this usually means we have a padding field) - if name not in self._member_map_: - continue - # unnecessary: - # context[CTX_PATH] = f"{base_path}.<{i}>.{name}" - bit_pos, width, factory = bit_info - low_mask = (1 << width) - 1 - if width == 1: - field_value = bool(value & low_mask << bit_pos) - else: - shift = max(bit_pos + 1 - width, 0) - field_value: int = (value >> shift) & low_mask - if factory is not None: - field_value = factory(field_value) - # Finally, apply the new value - init_data[name] = field_value + base_path = context[CTX_PATH] + # REVISIT + endian = "little" if self.order == LittleEndian else "big" + for group in self.groups: + if group.is_field(): + # unpack using field instance + field = group.get_field() + name = field.__name__ + context[CTX_PATH] = f"{base_path}.{name}" + value = field.__unpack__(context) + context[CTX_OBJECT][name] = value + if name in self._member_map_: + init_data[name] = value + + else: + raw_data = context[CTX_STREAM].read(group.get_size()) + if not raw_data: + # set context path to next entry for debugging + context[CTX_PATH] = f"{base_path}.{group.entries[0].name}" + raise StructException( + f"Failed to parse group of size {group.bit_count}bits: unexpected EOF!", + context, + ) + raw_value = int.from_bytes(raw_data, endian) + for entry in group.entries: + # each entry may be an action + context[CTX_PATH] = f"{base_path}.{entry.name}" + if entry.is_action(): + func = getattr(entry.action, ATTR_ACTION_UNPACK, None) + if func: + func(context) + continue + + value = (raw_value >> entry.shift(group.bit_count)) & entry.low_mask + if entry.factory: + value = entry.factory.from_int(value) + + init_data[entry.name] = value return self.model(**init_data) - def pack_one(self, obj: Any, context: _ContextLike) -> None: - # REVISIT: this function is very time consuming. should be do something - # about that? - stream: _StreamType = context[CTX_STREAM] - values = [] + def pack_one(self, obj, context) -> None: + base_path = context[CTX_PATH] + # REVISIT + endian = "little" if self.order == LittleEndian else "big" for group in self.groups: - # The same applies here, but we convert all values to int instead of reading - # them from the stream - value = 0 - for bit_info, field in group.fields.items(): - # Setup the field's context - name: str = field.__name__ - # Padding is translated into zeros - if name not in self._member_map_: - continue - - bit_pos, width, _ = bit_info - field_value = getattr(obj, name, 0) or 0 - shift = bit_pos + 1 - width - # Here's the tricky part: we have to convert all values to int - # without knowing their type. We make use of Python's data model, - # which defines a function particularly for this use-case: __int__ - # - # See https://docs.python.org/3/reference/datamodel.html#object.__int__ - try: - # REVISIT: what about the field's width - value |= int(field_value) << shift - except NotImplementedError as exc: - raise DelegationError( - f"Field {name!r} does not support to-int conversion!" - ) from exc - # REVISIT: is this cheating? - values.append(value) - stream.write(libstruct.pack(f"{self.order.ch}{self.__fmt__}", *values)) + if group.is_field(): + field = group.get_field() + name = field.__name__ + context[CTX_PATH] = f"{base_path}.{name}" + if name in self._member_map_: + value = self.get_value(obj, name, field) + else: + value = field.default if field.default != INVALID_DEFAULT else None + + field.__pack__(value, context) + else: + value = 0 + for entry in group.entries: + context[CTX_PATH] = f"{base_path}.{entry.name}" + if entry.is_action(): + func = getattr(entry.action, ATTR_ACTION_PACK, None) + if func: + func(context) + continue + + entry_value = self.get_value(obj, entry.name, None) + if entry.factory: + entry_value = entry.factory.to_int(entry_value) + + # silently ignore invalid values + value |= (entry_value & entry.low_mask) << entry.shift( + group.bit_count + ) + context[CTX_STREAM].write(value.to_bytes(group.bit_count // 8, endian)) + + def add_action(self, action) -> None: + self._current_group.entries.append( + BitfieldEntry(0, 0, "", action=action) + ) + return super().add_action(action) + + def get_entry(self, name: str): + for group in self.groups: + if group.is_field(): + continue + + for entry in group.entries: + if entry.name == name: + return entry def _make_bitfield( cls: type, - options: Iterable[Flag], - order: Optional[ByteOrder] = None, - arch: Optional[Arch] = None, - field_options: Iterable[Flag] = None, + /, + *, + options, + order=None, + arch=None, + field_options=None, + alignment=None, ) -> type: - _ = BitField( - cls, order=order, arch=arch, options=options, field_options=field_options + _ = Bitfield( + cls, + order=order, + arch=arch, + options=options, + field_options=field_options, + alignment=alignment, ) return cls def bitfield( - cls: type = None, + cls=None, /, *, - options: Iterable[Flag] = None, - order: Optional[ByteOrder] = None, - arch: Optional[Arch] = None, - field_options: Iterable[Flag] = None, + options=None, + order=None, + arch=None, + field_options=None, + alignment=None, ): + """ + Decorator that transforms a class definition into a :class:`Bitfield` structure. + + This decorator enables defining bitfields using simple class syntax, + with support for custom alignment, ordering, architecture, and field options. + + :param cls: The user-defined class to transform. + :type cls: Optional[type] + :param options: A set of global or structure-specific options. + :type options: Optional[set] + :param order: Optional byte order for serialization (e.g., 'little' or 'big'). + :type order: Optional[str] + :param arch: Optional architecture string (e.g., 'x86', 'arm'). + :type arch: Optional[str] + :param field_options: Optional default options for fields. + :type field_options: Optional[set] + :param alignment: Optional alignment in bits. + :type alignment: Optional[int] + :return: The decorated class, enhanced as a `Bitfield` structure. + :rtype: type + + .. code-block:: python + + from caterpillar.py import bitfield, SetAlignment, uint16 + + @bitfield + class Packet: + version : 3 + type : (5, SetAlignment(16)) + length : 10 + _ : 0 # align to 16bits + payload : uint16 + + # You can now pack/unpack Packet instances as compact binary bitfields + pkt = Packet(version=1, type=2, length=128, payload=0xABCD) + packed = pack(pkt) + unpacked = unpack(Packet, packed) + """ + def wrap(cls): return _make_bitfield( - cls, options=options, order=order, arch=arch, field_options=field_options + cls, + options=options, + order=order, + arch=arch, + field_options=field_options, + alignment=alignment, ) if cls is not None: return _make_bitfield( - cls, options=options, order=order, arch=arch, field_options=field_options + cls, + options=options, + order=order, + arch=arch, + field_options=field_options, + alignment=alignment, ) return wrap diff --git a/src/caterpillar/model/_bitfield.pyi b/src/caterpillar/model/_bitfield.pyi new file mode 100755 index 00000000..abd9304b --- /dev/null +++ b/src/caterpillar/model/_bitfield.pyi @@ -0,0 +1,164 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from enum import Enum +from typing import ( + Any, + Callable, + Generic, + Iterable, + List, + Optional, + Type, + TypeVar, + overload, +) + +from caterpillar.abc import _ContextLike, _ActionLike +from caterpillar.byteorder import Arch, ByteOrder +from caterpillar.options import B_GROUP_END, B_GROUP_NEW, Flag +from caterpillar.fields._base import Field +from caterpillar.model._struct import Struct + +_ModelT = TypeVar("_ModelT") + +DEFAULT_ALIGNMENT: int + +def getbits(obj: Any) -> int: ... +def issigned(obj: Any) -> bool: ... + +class SetAlignment: + alignment: int + def __init__(self, new_alignment: int) -> None: ... + @staticmethod + def flag(new_alignment: int) -> Flag[int]: ... + def __hash__(self) -> int: ... + +NewGroup = B_GROUP_NEW +EndGroup = B_GROUP_END + +_VT = TypeVar("_VT", default=int) + +class BitfieldValueFactory(Generic[_VT]): + def to_int(self, obj: _VT) -> int: ... + def from_int(self, value: int) -> _VT: ... + +_EnumT = TypeVar("_EnumT", bound=Enum) + +class EnumFactory(Generic[_EnumT], BitfieldValueFactory[_EnumT | int]): + strict: bool + def __init__(self, model: Type[_EnumT], strict: bool = ...) -> None: ... + def from_int(self, value: int) -> _EnumT | int: ... + +class CharFactory(BitfieldValueFactory[str]): + def from_int(self, value: int) -> str: ... + def to_int(self, obj: str) -> int: ... + +class BitfieldEntry: + bit: int + width: int + name: str + factory: Optional[BitfieldValueFactory] + action: Optional[_ActionLike] + low_mask: int + + def __init__( + self, + bit: int, + width: int, + name: str, + factory: BitfieldValueFactory | Type | None = ..., + action: _ActionLike | None = ..., + ) -> None: ... + @staticmethod + def new_action(action: _ActionLike) -> BitfieldEntry: ... + def shift(self, value_width: int) -> int: ... + def is_action(self) -> bool: ... + +class BitfieldGroup: + bit_count: int + entries: List[BitfieldEntry | Field] + def __init__(self, bit_count: int) -> None: ... + def is_field(self) -> bool: ... + def get_field(self) -> Field: ... + def set_field(self, field: Field) -> None: ... + def align_to(self, alignment: int) -> None: ... + def is_empty(self) -> bool: ... + def get_size(self, context: _ContextLike | None = ...) -> int: ... + def get_bits(self, context: _ContextLike | None = ...) -> int: ... + +class Bitfield(Struct[_ModelT]): + alignment: int + groups: List[BitfieldGroup] + def __init__( + self, + model, + order: ByteOrder | None = ..., + arch: Arch | None = ..., + options: Iterable[Flag] | None = ..., + field_options: Iterable[Flag] | None = ..., + alignment: int | None = ..., + ) -> None: ... + def __add__(self, sequence): ... + def __size__(self, context: _ContextLike) -> int: ... + def __bits__(self) -> int: ... + def unpack_one(self, context: _ContextLike): ... + def pack_one(self, obj: _ModelT, context: _ContextLike) -> None: ... + def add_action(self, action: _ActionLike) -> None: ... + def get_entry(self, name: str) -> Optional[BitfieldEntry]: ... + def _process_align( + self, options: Optional[Iterable[Flag | SetAlignment]] + ) -> Field: ... + def _process_bits( + self, + name: str, + bits: int, + factory: Optional[BitfieldValueFactory | Type] = ..., + options: Optional[Iterable[Flag | SetAlignment]] = ..., + ) -> Field: ... + def _process_bits_field( + self, + name: str, + field: Field, + factory: Optional[BitfieldValueFactory | Type] = ..., + options: Optional[Iterable[Flag | SetAlignment]] = ..., + ) -> Field: ... + def _process_options( + self, + options: Iterable[Flag | SetAlignment], + entry: Optional[BitfieldEntry] = ..., + ) -> bool: ... + def _new_group(self, alignment: int) -> BitfieldGroup: ... + def _process_alignment_option(self, option: Flag | SetAlignment) -> bool: ... + +@overload +def bitfield( + cls: None = None, + /, + *, + options: Iterable[Flag] | None = ..., + order: ByteOrder | None = ..., + arch: Arch | None = ..., + field_options: Iterable[Flag] | None = ..., +) -> Callable[[Type[_ModelT]], Type[_ModelT]]: ... +@overload +def bitfield( + cls: Type[_ModelT], + /, + *, + options: Iterable[Flag] | None = ..., + order: ByteOrder | None = ..., + arch: Arch | None = ..., + field_options: Iterable[Flag] | None = ..., +) -> Type[_ModelT]: ... diff --git a/src/caterpillar/model/_struct.py b/src/caterpillar/model/_struct.py old mode 100644 new mode 100755 index cec2a4b5..f5350fd0 --- a/src/caterpillar/model/_struct.py +++ b/src/caterpillar/model/_struct.py @@ -17,23 +17,18 @@ from tempfile import TemporaryFile from io import BytesIO, IOBase -from typing import Optional, Type, TypeVar, Union, Callable -from typing import Dict, Any, Iterable from collections import OrderedDict from shutil import copyfileobj -from caterpillar.abc import getstruct, hasstruct, STRUCT_FIELD -from caterpillar.abc import _StructLike, _StreamType, _SupportsUnpack, _SupportsPack -from caterpillar.abc import _ContainsStruct, _ContextLike, _SupportsSize +from caterpillar.shared import getstruct, hasstruct, ATTR_STRUCT +from caterpillar.abc import _SupportsUnpack, _SupportsSize from caterpillar.context import Context, CTX_STREAM -from caterpillar.byteorder import ByteOrder, Arch from caterpillar.exception import InvalidValueError from caterpillar.options import ( S_EVAL_ANNOTATIONS, S_UNION, S_ADD_BYTES, S_SLOTS, - Flag, GLOBAL_STRUCT_OPTIONS, GLOBAL_UNION_OPTIONS, ) @@ -43,8 +38,6 @@ from ._base import Sequence -_T = TypeVar("_T") - # REVISIT: remove dataclasses dependency class Struct(Sequence): @@ -57,7 +50,7 @@ class Struct(Sequence): :param options: Additional options specifying what to include in the final class. """ - _member_map_: Dict[str, Field] + _member_map_: dict # An internal field that maps the field names of all class attributes to their # corresponding struct fields. @@ -66,12 +59,12 @@ class Struct(Sequence): def __init__( self, model: type, - order: Optional[ByteOrder] = None, - arch: Optional[Arch] = None, - options: Iterable[Flag] | None = None, - field_options: Iterable[Flag] | None = None, - kw_only: bool = False, - hook_cls: Optional[type] = None, + order=None, + arch=None, + options=None, + field_options=None, + kw_only=False, + hook_cls=None, ) -> None: self.kw_only = kw_only options = set(options or []) @@ -85,7 +78,7 @@ def __init__( options=options, field_options=field_options, ) - setattr(self.model, STRUCT_FIELD, self) + setattr(self.model, ATTR_STRUCT, self) # Add additional options based on the struct's type slots = self.has_option(S_SLOTS) self.model = dc.dataclass(self.model, kw_only=self.kw_only, slots=slots) @@ -99,10 +92,10 @@ def __init__( if self.has_option(S_ADD_BYTES): setattr(self.model, "__bytes__", _struct_bytes(self)) - def __type__(self) -> type: + def __type__(self): return self.model - def _prepare_fields(self) -> Dict[str, Any]: + def _prepare_fields(self): # We will inspect all base classes in reverse order and selectively # utilize classes that store a struct instance. Beginning at position # -1, concluding at 0, and using a step size of -1: @@ -117,10 +110,10 @@ def _prepare_fields(self) -> Dict[str, Any]: # The why is described in detail here: https://docs.python.org/3/howto/annotations.html return inspect.get_annotations(self.model, eval_str=eval_str) - def _set_default(self, name: str, value: Any) -> None: + def _set_default(self, name: str, value) -> None: setattr(self.model, name, value) - def _process_default(self, name, annotation: Any, had_default=False) -> Any: + def _process_default(self, name, annotation, had_default=False): default = super()._process_default(name, annotation, had_default) if default is INVALID_DEFAULT and had_default: self.kw_only = True @@ -132,10 +125,10 @@ def _replace_type(self, name: str, type_: type) -> None: def _remove_from_model(self, name: str) -> None: self.model.__annotations__.pop(name) - def unpack_one(self, context: _ContextLike) -> Optional[Any]: + def unpack_one(self, context): return self.model(**super().unpack_one(context)) - def get_value(self, obj: Any, name: str, field: Field) -> Optional[Any]: + def get_value(self, obj, name: str, field: Field): return getattr(obj, name, None) @@ -145,24 +138,24 @@ class _StructTypeConverter(registry.TypeConverter): def __init__(self) -> None: super().__init__() - def matches(self, annotation: Any) -> bool: + def matches(self, annotation) -> bool: return isinstance(annotation, type) and getstruct(annotation) is not None - def convert(self, annotation: Any, kwargs: dict) -> _StructLike: + def convert(self, annotation, kwargs: dict): return getstruct(annotation) registry.annotation_registry.append(_StructTypeConverter()) -def _struct_bytes(model: Struct) -> Callable: +def _struct_bytes(model: Struct): def to_bytes(self) -> bytes: return pack(self, model) return to_bytes -def _struct_getitem(model: Struct) -> Field: +def _struct_getitem(model: Struct): def class_getitem(*args): if len(args) == 2: _, dim = args @@ -178,12 +171,12 @@ def class_getitem(*args): def _make_struct( cls: type, - options: Iterable[Flag] = None, - order: Optional[ByteOrder] = None, - arch: Optional[Arch] = None, - field_options: Iterable[Flag] = None, - kw_only: bool = False, - hook_cls: Optional[type] = None, + options=None, + order=None, + arch=None, + field_options=None, + kw_only=False, + hook_cls=None, ) -> type: """ Helper function to create a Struct class. @@ -207,7 +200,16 @@ def _make_struct( return _.model -def struct(cls: Type[_T] | None = None, /, **kwds) -> Type[_T]: +def struct( + cls=None, + /, + *, + options=None, + order=None, + arch=None, + field_options=None, + kw_only=False, +): """ Decorator to create a Struct class. @@ -220,10 +222,24 @@ def struct(cls: Type[_T] | None = None, /, **kwds) -> Type[_T]: """ def wrap(cls): - return _make_struct(cls, **kwds) + return _make_struct( + cls, + order=order, + arch=arch, + options=options, + field_options=field_options, + kw_only=kw_only, + ) if cls is not None: - return _make_struct(cls, **kwds) + return _make_struct( + cls, + order=order, + arch=arch, + options=options, + field_options=field_options, + kw_only=kw_only, + ) return wrap @@ -264,17 +280,17 @@ def __exit__(self, exc_type, exc_value, traceback) -> None: # This variable MUST be reset afterward self._processing_ = False - def __model_init__(self, obj: Any, *args, **kwargs) -> None: + def __model_init__(self, obj, *args, **kwargs) -> None: # since it is possible now, to specify non-kw_only constructors, # we have to capture both, args and kwargs with self: return self._model_init_(obj, *args, **kwargs) - def __model_setattr__(self, obj: Any, key: str, new_value: Any) -> None: + def __model_setattr__(self, obj, key: str, new_value) -> None: # The target attribute will alyaws be set object.__setattr__(obj, key, new_value) - members: Dict[str, Field] = self.struct.get_members() + members = self.struct.get_members() if self._processing_ or key not in members: # Refresh can't be done if: # 1) the current instance is alredy being processed @@ -285,9 +301,7 @@ def __model_setattr__(self, obj: Any, key: str, new_value: Any) -> None: # delegation into method allows for customisation self.refresh(obj, key, new_value, members) - def refresh( - self, obj: Any, key: str, new_value: Any, members: Dict[str, Field] - ) -> None: + def refresh(self, obj, key: str, new_value, members) -> None: # DEFAULT: retrieve the current field and temporarily pack its data field = members[key] data = pack(new_value, field) @@ -304,7 +318,7 @@ def refresh( stream.seek(0) -def _union_init(hook: UnionHook) -> Callable: +def _union_init(hook): # wrapper function to capture the calling instance def init(self, *args, **kwargs) -> None: return hook.__model_init__(self, *args, **kwargs) @@ -312,15 +326,25 @@ def init(self, *args, **kwargs) -> None: return init -def _union_setattr(hook: UnionHook) -> Callable: +def _union_setattr(hook): # wrapper function to capture the calling instance - def setattribute(self, key: str, value: Any) -> None: + def setattribute(self, key: str, value) -> None: hook.__model_setattr__(self, key, value) return setattribute -def union(cls: type = None, /, *, options: Iterable[Flag] = None, **kwds): +def union( + cls=None, + /, + *, + options=None, + order=None, + arch=None, + field_options=None, + kw_only=False, + hook_cls=None, +): """ Decorator to create a Union class. @@ -334,17 +358,37 @@ def union(cls: type = None, /, *, options: Iterable[Flag] = None, **kwds): options = set(list(options or []) + [S_UNION]) def wrap(cls): - return _make_struct(cls, options=options, **kwds) + return _make_struct( + cls, + order=order, + arch=arch, + options=options, + field_options=field_options, + kw_only=kw_only, + hook_cls=hook_cls, + ) if cls is not None: - return _make_struct(cls, options=options, **kwds) + return _make_struct( + cls, + order=order, + arch=arch, + options=options, + field_options=field_options, + kw_only=kw_only, + hook_cls=hook_cls, + ) return wrap def pack( - obj: Union[Any, _ContainsStruct], - struct: Optional[_SupportsPack] = None, + obj, + struct=None, + /, + *, + use_tempfile=False, + as_field=False, **kwds, ) -> bytes: """ @@ -357,16 +401,18 @@ def pack( :return: The packed bytes. """ buffer = BytesIO() - pack_into(obj, buffer, struct, **kwds) + pack_into(obj, buffer, struct, use_tempfile=use_tempfile, as_field=as_field, **kwds) return buffer.getvalue() def pack_into( - obj: Union[Any, _ContainsStruct], - buffer: _StreamType, - struct: Optional[_StructLike] = None, - use_tempfile: bool = False, - as_field: bool = False, + obj, + buffer, + struct=None, + /, + *, + use_tempfile=False, + as_field=False, **kwds, ) -> None: """ @@ -379,6 +425,7 @@ def pack_into( data is written to the `buffer`. Example 1: Packing an object into a bytes buffer + >>> buffer = BytesIO() >>> my_obj = SomeObject() # Assume SomeObject is a valid object to be packed >>> pack_into(my_obj, buffer, struct=SomeStruct()) # Using a specific struct @@ -386,10 +433,12 @@ def pack_into( b"..." Example 2: Packing into a file-like stream (e.g., file) + >>> with open('packed_data.bin', 'wb') as f: ... pack_into(my_obj, f, struct=SomeStruct()) # Pack into a file Example 3: Using `as_field` to wrap the struct in a Field before packing + >>> buffer = BytesIO() >>> pack_into(42, buffer, struct=uint8, as_field=True) >>> buffer.getvalue() @@ -404,9 +453,16 @@ def pack_into( :raises TypeError: If no `struct` is specified and cannot be inferred from the object. """ - offsets: Dict[int, memoryview] = OrderedDict() + offsets = OrderedDict() + # NOTE: we don't have to set _root here because the default root context + # will be this instance. context = Context( - _parent=None, _path="", _pos=0, _offsets=offsets, mode=MODE_PACK, **kwds + _parent=None, + _path="", + _pos=0, + _offsets=offsets, + mode=MODE_PACK, + **kwds, ) if struct is None: struct = getstruct(obj) @@ -452,10 +508,13 @@ def pack_into( def pack_file( - obj: Union[Any, _ContainsStruct], + obj, filename: str, - struct: Optional[_StructLike] = None, - use_tempfile: bool = False, + struct=None, + /, + *, + use_tempfile=False, + as_field=False, **kwds, ) -> None: """ @@ -469,15 +528,17 @@ def pack_file( :return: None """ with open(filename, "w+b") as fp: - pack_into(obj, fp, struct, use_tempfile, **kwds) + pack_into(obj, fp, struct, use_tempfile=use_tempfile, as_field=as_field, **kwds) def unpack( - struct: Union[_SupportsUnpack, _ContainsStruct], - buffer: Union[bytes, _StreamType], - as_field: bool = False, + struct, + buffer, + /, + *, + as_field=False, **kwds, -) -> Any: +): """ Unpack an object from a bytes buffer or stream using the specified struct. @@ -488,6 +549,7 @@ def unpack( context as attributes. Example: + >>> buffer = b'\\x00\\x01\\x02\\x03' >>> struct = SomeStruct() >>> unpack(struct, buffer) @@ -525,10 +587,13 @@ def unpack( def unpack_file( - struct: Union[_StructLike, _ContainsStruct], + struct, filename: str, + /, + *, + as_field=False, **kwds, -) -> Any: +): """ Unpack an object from a file using the specified struct. @@ -539,12 +604,16 @@ def unpack_file( :return: The unpacked object. """ with open(filename, "rb") as fp: - return unpack(struct, fp, **kwds) + return unpack(struct, fp, as_field=as_field, **kwds) -def sizeof(obj: Union[_StructLike, _ContainsStruct, _SupportsSize], **kwds) -> int: +def sizeof(obj, **kwds) -> int: context = Context(_parent=None, _path="", **kwds) struct_ = obj if hasstruct(struct_): struct_ = getstruct(struct_) + + if not isinstance(struct_, _SupportsSize): + raise TypeError(f"{type(struct_).__name__} does not support size calculation!") + return struct_.__size__(context) diff --git a/src/caterpillar/model/_struct.pyi b/src/caterpillar/model/_struct.pyi new file mode 100755 index 00000000..e64d9d82 --- /dev/null +++ b/src/caterpillar/model/_struct.pyi @@ -0,0 +1,229 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +import types +from typing import ( + Any, + Callable, + Iterable, + Optional, + TypeVar, + Union, + Type, + Generic, + overload, +) + +from caterpillar import registry +from caterpillar.abc import ( + _ContainsStruct, + _OT, + _IT, + _SupportsPack, + _StreamType, + _SupportsUnpack, + _ContextLike, + _StructLike, + _SupportsSize, +) +from caterpillar.byteorder import Arch, ByteOrder +from caterpillar.options import Flag +from caterpillar.fields._base import Field +from caterpillar.model._base import Sequence + +_ModelT = TypeVar("_ModelT") + +class Struct(Sequence[_ModelT]): + kw_only: bool + model: Type[_ModelT] + def __init__( + self, + model: Type[_ModelT], + options: Optional[Iterable[Flag]] = None, + order: Optional[ByteOrder] = None, + arch: Optional[Arch] = None, + field_options: Optional[Flag] = None, + kw_only: bool = False, + hook_cls: Optional[Type[_UnionHookLike[_ModelT]]] = None, + ) -> None: ... + def __type__(self) -> Type[_ModelT]: ... + def unpack_one(self, context: _ContextLike) -> _ModelT: ... + def get_value(self, obj: Any, name: str, field: Field) -> Any | None: ... + +class _StructTypeConverter(registry.TypeConverter): + def __init__(self) -> None: ... + def matches(self, annotation: Any) -> bool: ... + def convert(self, annotation: Any, kwargs: dict) -> Struct: ... + +class _UnionHookLike(Generic[_ModelT]): + def __model_init__(self, obj: _ModelT, *args, **kwargs) -> None: ... + def __model_setattr__(self, obj: _ModelT, key: str, new_value: Any) -> None: ... + +class UnionHook(Generic[_ModelT]): + struct: Struct[_ModelT] + max_size: int + def __init__(self, struct_: Struct[_ModelT]) -> None: ... + def __enter__(self) -> None: ... + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_value: BaseException | None, + traceback: types.TracebackType | None, + ) -> None: ... + def __model_init__(self, obj: Any, *args, **kwargs) -> None: ... + def __model_setattr__(self, obj: Any, key: str, new_value: Any) -> None: ... + def refresh( + self, obj: Any, key: str, new_value: Any, members: dict[str, Field] + ) -> None: ... + +@overload +def struct( + cls: Type[_ModelT], + /, + *, + options: Optional[Iterable[Flag]] = None, + order: Optional[ByteOrder] = None, + arch: Optional[Arch] = None, + field_options: Optional[Flag] = None, + kw_only: bool = False, +) -> Type[_ModelT]: ... +@overload +def struct( + cls: None = None, + /, + *, + options: Optional[Iterable[Flag]] = None, + order: Optional[ByteOrder] = None, + arch: Optional[Arch] = None, + field_options: Optional[Flag] = None, + kw_only: bool = False, +) -> Callable[[_ModelT], _ModelT]: ... +@overload +def union( + cls: Type[_ModelT], + /, + *, + options: Optional[Iterable[Flag]] = None, + order: Optional[ByteOrder] = None, + arch: Optional[Arch] = None, + field_options: Optional[Flag] = None, + kw_only: bool = False, + hook_cls: Optional[Type[_UnionHookLike[_ModelT]]] = None, +) -> Type[_ModelT]: ... +@overload +def union( + cls: None = None, + /, + *, + options: Optional[Iterable[Flag]] = None, + order: Optional[ByteOrder] = None, + arch: Optional[Arch] = None, + field_options: Optional[Flag] = None, + kw_only: bool = False, + hook_cls: Optional[Type[_UnionHookLike[_ModelT]]] = None, +) -> Callable[[_ModelT], _ModelT]: ... +@overload +def pack( + obj: _ContainsStruct[_ModelT, _ModelT], + struct: None = None, + /, + *, + use_tempfile: bool = ..., + as_field: bool = ..., + **kwds, +) -> bytes: ... +@overload +def pack( + obj: _IT, + struct: Union[_ModelT, _ContainsStruct[_IT, _OT], _SupportsPack[_IT]] = None, + /, + *, + use_tempfile: bool = ..., + as_field: bool = ..., + **kwds, +) -> bytes: ... +@overload +def pack_into( + obj: _ContainsStruct[_ModelT, _ModelT], + buffer: _StreamType, + struct: None = None, + /, + *, + use_tempfile: bool = ..., + as_field: bool = ..., + **kwds, +) -> bytes: ... +@overload +def pack_into( + obj: _IT, + buffer: _StreamType, + struct: Union[_SupportsPack[_IT], _ContainsStruct[_IT, _OT]], + /, + *, + use_tempfile: bool = ..., + as_field: bool = ..., + **kwds, +) -> bytes: ... +@overload +def pack_file( + obj: _ContainsStruct[_ModelT, _ModelT], + filename: str, + struct: None = None, + /, + *, + use_tempfile: bool = ..., + as_field: bool = ..., + **kwds, +) -> bytes: ... +@overload +def pack_file( + obj: _IT, + filename: str, + struct: Union[_SupportsPack[_IT], _ContainsStruct[_IT, _OT]], + /, + *, + use_tempfile: bool = ..., + as_field: bool = ..., + **kwds, +) -> bytes: ... +@overload +def unpack( + struct: Union[_SupportsUnpack[_OT], _ContainsStruct[_IT, _OT]], + buffer: bytes | _StreamType, + /, + *, + as_field: bool = ..., + **kwds, +) -> _OT: ... +@overload +def unpack( + struct: Type[_ModelT], + buffer: bytes | _StreamType, + /, + *, + as_field: bool = ..., + **kwds, +) -> _ModelT: ... +def unpack_file( + struct: Union[_SupportsUnpack[_OT], _ContainsStruct[_IT, _OT]], + filename: str, + /, + *, + as_field: bool = ..., + **kwds, +) -> _OT: ... +def sizeof( + obj: Union[_SupportsSize, _ContainsStruct[Any, Any], _StructLike[Any, Any], type], + **kwds, +) -> int: ... diff --git a/src/caterpillar/model/_template.py b/src/caterpillar/model/_template.py old mode 100644 new mode 100755 index 9d818a3b..9de8c45b --- a/src/caterpillar/model/_template.py +++ b/src/caterpillar/model/_template.py @@ -13,25 +13,16 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from __future__ import annotations - import sys import inspect import types import dataclasses -from typing import Dict, Any -from typing import Union, Self -from typing import Optional -from typing import Callable, TypeVar - -from caterpillar.byteorder import ByteOrder, Arch -from caterpillar.abc import _GreedyType, _PrefixedType -from caterpillar.abc import _ContextLambda, _Switch -from caterpillar.abc import _StructLike +from caterpillar.abc import _GreedyType from caterpillar.fields import Field, INVALID_DEFAULT from caterpillar.model import Struct from caterpillar.options import S_UNION +from caterpillar.shared import ATTR_TEMPLATE class TemplateTypeVar: @@ -55,7 +46,7 @@ class TemplateTypeVar: name: str """The bound name of this type variable""" - field_kwds: Dict[str, Any] + field_kwds: dict """Arguments that will be passed to the created field instance.""" def __init__(self, name: str, **field_kwds) -> None: @@ -73,31 +64,29 @@ def __repr__(self) -> str: return f"~{self.name}[{count}]" # Now we have to implement all special operators defined in FieldMixin - def __getitem__( - self, amount: Union[int, _GreedyType, _PrefixedType, _ContextLambda] - ) -> TemplateTypeVar: + def __getitem__(self, amount): return TemplateTypeVar(self.name, amount=amount, **self.field_kwds) - def __rshift__(self, switch: Union[dict, _Switch]) -> Self: + def __rshift__(self, switch): return TemplateTypeVar(self.name, options=switch, **self.field_kwds) - def __matmul__(self, offset: Union[int, _ContextLambda]) -> Self: + def __matmul__(self, offset): return TemplateTypeVar(self.name, offset=offset, **self.field_kwds) - def __set_byteorder__(self, order: ByteOrder) -> Self: + def __set_byteorder__(self, order): return TemplateTypeVar(self.name, order=order, **self.field_kwds) - def __rsub__(self, bits: Union[int, _ContextLambda]) -> Self: + def __rsub__(self, bits): return TemplateTypeVar(self.name, bits=bits, **self.field_kwds) # @scheduled_for_removal - def __floordiv__(self, condition: Union[_ContextLambda, bool]) -> Self: + def __floordiv__(self, condition): return TemplateTypeVar(self.name, condition=condition, **self.field_kwds) def to_field( self, - struct: Union[_StructLike, _ContextLambda], - arch: Optional[Arch] = None, + struct, + arch, default=INVALID_DEFAULT, ) -> Field: # REVISIT: what about flags? @@ -122,13 +111,10 @@ def get_caller_module(frame: int = 1) -> str: raise ModuleNotFoundError("Could not load module from caller!") from e -TEMPLATE_ATTR = "__template__" - - @dataclasses.dataclass class TemplateInfo: - required_tys: Dict[str, _StructLike] - positional_tys: Dict[str, _StructLike] + required_tys: dict + positional_tys: dict def is_defined(self, name: str) -> bool: return name in list(self.required_tys) + list(self.positional_tys) @@ -144,12 +130,12 @@ def add_positional(self, name: str, default=None) -> None: self.positional_tys[name] = default -def istemplate(obj: Any) -> bool: +def istemplate(obj) -> bool: """Return true if the object is a template.""" - return hasattr(obj, TEMPLATE_ATTR) + return hasattr(obj, ATTR_TEMPLATE) -def template(*args: Union[str, TemplateTypeVar], **kwargs) -> Callable[[type], type]: +def template(*args, **kwargs): """ Defines required template type variables if necessary and prepares template class definition. @@ -197,13 +183,13 @@ def create_template_class(cls) -> type: for name in disposable: # Only temporary template vars will be removed delattr(module, name) - setattr(cls, TEMPLATE_ATTR, info) + setattr(cls, ATTR_TEMPLATE, info) return cls return create_template_class -def get_mangled_name(model_ty: type, annotations: Dict[str, Any]) -> str: +def get_mangled_name(model_ty: type, annotations: dict) -> str: ty_name = model_ty.__name__ parts = [] for name, value in annotations.items(): @@ -214,7 +200,7 @@ def get_mangled_name(model_ty: type, annotations: Dict[str, Any]) -> str: def derive( - template_ty: type, *tys_args, partial=False, name=None, union=False, **tys_kwargs + template_ty, *tys_args, partial=False, name=None, union=False, **tys_kwargs ) -> type: """Creates a new struct class based on the given template class. @@ -237,7 +223,7 @@ def derive( if not istemplate(template_ty): raise TypeError(f"{template_ty.__name__} is not a template class!") - info: TemplateInfo = getattr(template_ty, TEMPLATE_ATTR) + info: TemplateInfo = getattr(template_ty, ATTR_TEMPLATE) if len(tys_args) > len(info.required_tys): raise ValueError( f"Expected max. {len(info.required_tys)} positional arguments - got {len(tys_args)}!" @@ -321,5 +307,5 @@ def derive( new_info.required_tys[name] = replacement elif name in info.positional_tys: new_info.positional_tys[name] = replacement - setattr(new_ty, TEMPLATE_ATTR, new_info) + setattr(new_ty, ATTR_TEMPLATE, new_info) return new_ty diff --git a/src/caterpillar/model/_template.pyi b/src/caterpillar/model/_template.pyi new file mode 100755 index 00000000..c26cdea4 --- /dev/null +++ b/src/caterpillar/model/_template.pyi @@ -0,0 +1,53 @@ +from typing import Any, Callable, Protocol, Type, TypeVar + +from caterpillar.abc import ( + _LengthT, + _StructLike, + _ContextLambda, + _SwitchLike, +) +from caterpillar.byteorder import ByteOrder, Arch +from caterpillar.fields._base import Field + +_TemplateModelT = TypeVar("_TemplateModelT") + +class _ContainsTemplate(Protocol): + __template__: TemplateInfo + +class TemplateTypeVar: + name: str + field_kwds: dict[str, Any] + def __init__(self, name: str, **field_kwds) -> None: ... + def __getitem__(self, amount: _LengthT) -> TemplateTypeVar: ... + def __rshift__(self, switch: dict | _SwitchLike) -> TemplateTypeVar: ... + def __matmul__(self, offset: int | _ContextLambda) -> TemplateTypeVar: ... + def __set_byteorder__(self, order: ByteOrder) -> TemplateTypeVar: ... + def __rsub__(self, bits: int | _ContextLambda) -> TemplateTypeVar: ... + def __floordiv__(self, condition: _ContextLambda | bool) -> TemplateTypeVar: ... + def to_field( + self, + struct: _StructLike | _ContextLambda, + arch: Arch | None = None, + default=..., + ) -> Field: ... + +class TemplateInfo: + required_tys: dict[str, _StructLike] + positional_tys: dict[str, _StructLike] + def is_defined(self, name: str) -> bool: ... + def add_required(self, name: str) -> None: ... + def add_positional(self, name: str, default: Any = None) -> None: ... + +def istemplate(obj: Any) -> bool: ... +def template( + *args: str | TemplateTypeVar, **kwargs +) -> Callable[[Type[_TemplateModelT]], Type[_TemplateModelT]]: ... +def get_mangled_name(model_ty: type, annotations: dict[str, Any]) -> str: ... +def derive( + template_ty: Type[_ContainsTemplate], + *tys_args, + partial: bool = False, + name: str | None = None, + union: bool = False, + **tys_kwargs, +) -> type: ... diff --git a/src/caterpillar/options.py b/src/caterpillar/options.py old mode 100644 new mode 100755 index fd1524e3..322b41db --- a/src/caterpillar/options.py +++ b/src/caterpillar/options.py @@ -14,8 +14,7 @@ # along with this program. If not, see . from dataclasses import dataclass -from typing import Any, Optional, Set, Union - +from typing import Any @dataclass(init=False) class Flag: @@ -24,12 +23,12 @@ class Flag: name: str """The name of this flag""" - value: Optional[Any] = None + value: Any = None """ Optional configuration value. """ - def __init__(self, name: str, value: Optional[Any] = None) -> None: + def __init__(self, name: str, value=None) -> None: self.name = name self.value = value self._hash_ = hash(name) @@ -44,19 +43,19 @@ def __hash__(self) -> int: #: Defaults that will be applied to **all** structs. -GLOBAL_STRUCT_OPTIONS: Set[Flag] = set() +GLOBAL_STRUCT_OPTIONS = set() #: Defaults that will be applied on **all** unions. -GLOBAL_UNION_OPTIONS: Set[Flag] = set() +GLOBAL_UNION_OPTIONS = set() #: Default field flags that will be applied on **all** fields. -GLOBAL_FIELD_FLAGS: Set[Flag] = set() +GLOBAL_FIELD_FLAGS = set() #: Default field flags that will be applied on **all** bit-fields. -GLOBAL_BITFIELD_FLAGS: Set[Flag] = set() +GLOBAL_BITFIELD_FLAGS = set() -def configure(base: Set[Flag], *flags: Flag) -> None: +def configure(base, *flags: Flag) -> None: """ Update the base set of flags with additional flags. @@ -96,7 +95,7 @@ def set_union_flags(*flags: Flag) -> None: configure(GLOBAL_UNION_OPTIONS, *flags) -def get_flags(obj: Any, attr: Optional[str] = None) -> Optional[Set[Flag]]: +def get_flags(obj, attr=None): """ Get the flags associated with an object. @@ -107,7 +106,7 @@ def get_flags(obj: Any, attr: Optional[str] = None) -> Optional[Set[Flag]]: return getattr(obj, attr or "flags", None) -def has_flag(flag: Union[str, Flag], obj: Any, attr: Optional[str] = None) -> bool: +def has_flag(flag, obj, attr=None) -> bool: """ Check if an object has a specific flag. @@ -125,7 +124,7 @@ def has_flag(flag: Union[str, Flag], obj: Any, attr: Optional[str] = None) -> bo return flag in flags -def get_flag(name: str, obj: Any, attr: Optional[str] = None) -> Optional[Flag]: +def get_flag(name: str, obj, attr=None): """ Get a specific flag associated with an object. @@ -145,19 +144,26 @@ def get_flag(name: str, obj: Any, attr: Optional[str] = None) -> Optional[Flag]: ############################################################################### # for structs and unions -S_DISCARD_UNNAMED = Flag("discard_unnamed") -S_DISCARD_CONST = Flag("discard_const") -S_UNION = Flag("union") -S_REPLACE_TYPES = Flag("replace_types") -S_EVAL_ANNOTATIONS = Flag("eval_annotations") +S_DISCARD_UNNAMED = Flag("struct.discard_unnamed") +S_DISCARD_CONST = Flag("struct.discard_const") +S_UNION = Flag("struct.union") +S_REPLACE_TYPES = Flag("struct.replace_types") +S_EVAL_ANNOTATIONS = Flag("struct.eval_annotations") S_ADD_BYTES = Flag("struct.bytes_method") S_SLOTS = Flag("struct.slots") # for fields -F_KEEP_POSITION = Flag("keep_position") -F_DYNAMIC = Flag("dynamic") -F_SEQUENTIAL = Flag("sequential") -F_OFFSET_OVERRIDE = Flag("offset_override") +F_KEEP_POSITION = Flag("field.keep_position") +F_DYNAMIC = Flag("field.dynamic") +F_SEQUENTIAL = Flag("field.sequential") +F_OFFSET_OVERRIDE = Flag("field.offset_override") # value intentionally left blank O_ARRAY_FACTORY = Flag("option.array_factory", value=None) + +# bitfield options +B_OVERWRITE_ALIGNMENT = Flag("bitfield.overwrite_alignment") +B_GROUP_END = Flag("bitfield.group.end") +B_GROUP_NEW = Flag("bitfield.group.new") +B_GROUP_KEEP = Flag("bitfield.group.keep") +B_NO_AUTO_BOOL = Flag("bitfield.no_auto_bool") \ No newline at end of file diff --git a/src/caterpillar/options.pyi b/src/caterpillar/options.pyi new file mode 100755 index 00000000..b03a48e5 --- /dev/null +++ b/src/caterpillar/options.pyi @@ -0,0 +1,56 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from types import NoneType +from typing import Any, Callable, Collection, Generic, Iterable, TypeVar + +_VT = TypeVar("_VT", default=NoneType) + +class Flag(Generic[_VT]): + name: str + value: _VT | None = ... + _hash_: int + def __init__(self, name: str, value: _VT | None = None) -> None: ... + def __hash__(self) -> int: ... + +GLOBAL_STRUCT_OPTIONS: set[Flag] = ... +GLOBAL_UNION_OPTIONS: set[Flag] = ... +GLOBAL_FIELD_FLAGS: set[Flag] = ... +GLOBAL_BITFIELD_FLAGS: set[Flag] = ... + +def configure(base: set[Flag], *flags: Flag) -> None: ... +def set_struct_flags(*flags: Flag, with_union: bool = False) -> None: ... +def set_field_flags(*flags: Flag) -> None: ... +def set_union_flags(*flags: Flag) -> None: ... +def get_flags(obj: Any, attr: str | None = None) -> set[Flag] | None: ... +def has_flag(flag: str | Flag, obj: Any, attr: str | None = None) -> bool: ... +def get_flag(name: str, obj: Any, attr: str | None = None) -> Flag | None: ... + +S_DISCARD_UNNAMED: Flag +S_DISCARD_CONST: Flag +S_UNION: Flag +S_REPLACE_TYPES: Flag +S_EVAL_ANNOTATIONS: Flag +S_ADD_BYTES: Flag +S_SLOTS: Flag +F_KEEP_POSITION: Flag +F_DYNAMIC: Flag +F_SEQUENTIAL: Flag +F_OFFSET_OVERRIDE: Flag +O_ARRAY_FACTORY: Flag[Callable[[Iterable], Collection]] +B_OVERWRITE_ALIGNMENT: Flag +B_GROUP_END: Flag +B_GROUP_NEW: Flag +B_GROUP_KEEP: Flag +B_NO_AUTO_BOOL: Flag \ No newline at end of file diff --git a/src/caterpillar/py.py b/src/caterpillar/py.py index b7426578..b0168cbd 100644 --- a/src/caterpillar/py.py +++ b/src/caterpillar/py.py @@ -1,24 +1,19 @@ -# use this module to import everything Python related +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . -from .abc import ( - _ContextLike, - _StructLike, - _ContextLambda, - _Switch, - _SupportsUnpack, - _SupportsSize, - _SupportsPack, - _ContainsStruct, - getstruct, - hasstruct, - typeof, - _EnumLike, - _GreedyType, - _PrefixedType, - _StreamFactory, - _StreamType, - STRUCT_FIELD, -) +# use this module to import everything Python related from .registry import TypeConverter, annotation_registry, to_struct from .byteorder import ( ByteOrder, @@ -44,7 +39,6 @@ x86_64, AMD, AMD64, - BYTEORDER_FIELD, ) from .context import ( Context, @@ -67,6 +61,7 @@ CTX_PATH, CTX_SEQ, CTX_ARCH, + ExprMixin, ) from .exception import ( StructException, @@ -105,5 +100,123 @@ has_flag, O_ARRAY_FACTORY, ) -from ._common import WithoutContextVar -from .shared import ATTR_ACTION_PACK, ATTR_STRUCT, Action \ No newline at end of file +from ._common import WithoutContextVar, iseof, pack_seq, unpack_seq +from .shared import ( + ATTR_ACTION_PACK, + ATTR_STRUCT, + Action, + ATTR_ACTION_UNPACK, + ATTR_BITS, + ATTR_BYTEORDER, + ATTR_SIGNED, + ATTR_TEMPLATE, + ATTR_TYPE, + getstruct, + hasstruct, + MODE_PACK, + MODE_UNPACK, + typeof, +) + +from .model import __all__ as model_all +from .fields import __all__ as fields_all + +# pyright: reportUnsupportedDunderAll=false +__all__ = model_all + fields_all + [ + "ExprMixin", + "WithoutContextVar", + "AARCH64", + "AMD", + "AMD64", + "ARM", + "ARM64", + "Arch", + "BigEndian", + "ByteOrder", + "LittleEndian", + "MIPS", + "MIPS64", + "Native", + "NetEndian", + "PowerPC", + "PowerPC64", + "RISC_V", + "RISC_V64", + "SPARC", + "SPARC64", + "SysNative", + "system_arch", + "x86", + "x86_64", + "BinaryExpression", + "CTX_ARCH", + "CTX_FIELD", + "CTX_INDEX", + "CTX_OBJECT", + "CTX_OFFSETS", + "CTX_PARENT", + "CTX_PATH", + "CTX_POS", + "CTX_SEQ", + "CTX_STREAM", + "CTX_VALUE", + "ConditionContext", + "Context", + "ContextLength", + "ContextPath", + "UnaryExpression", + "ctx", + "parent", + "this", + "DelegationError", + "DynamicSizeError", + "InvalidValueError", + "OptionError", + "Stop", + "StreamError", + "StructException", + "ValidationError", + "F_DYNAMIC", + "F_KEEP_POSITION", + "F_OFFSET_OVERRIDE", + "F_SEQUENTIAL", + "Flag", + "GLOBAL_BITFIELD_FLAGS", + "GLOBAL_FIELD_FLAGS", + "GLOBAL_STRUCT_OPTIONS", + "GLOBAL_UNION_OPTIONS", + "O_ARRAY_FACTORY", + "S_ADD_BYTES", + "S_DISCARD_CONST", + "S_DISCARD_UNNAMED", + "S_EVAL_ANNOTATIONS", + "S_REPLACE_TYPES", + "S_SLOTS", + "S_UNION", + "get_flag", + "get_flags", + "has_flag", + "set_field_flags", + "set_struct_flags", + "set_union_flags", + "TypeConverter", + "annotation_registry", + "to_struct", + "ATTR_ACTION_PACK", + "ATTR_STRUCT", + "Action", + "iseof", + "pack_seq", + "unpack_seq", + "ATTR_ACTION_UNPACK", + "ATTR_BITS", + "ATTR_BYTEORDER", + "ATTR_SIGNED", + "ATTR_TEMPLATE", + "ATTR_TYPE", + "getstruct", + "hasstruct", + "MODE_PACK", + "MODE_UNPACK", + "typeof", +] diff --git a/src/caterpillar/py.typed b/src/caterpillar/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/src/caterpillar/registry.pyi b/src/caterpillar/registry.pyi new file mode 100755 index 00000000..c4117657 --- /dev/null +++ b/src/caterpillar/registry.pyi @@ -0,0 +1,32 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from caterpillar.abc import _StructLike +from typing import Any, Callable, Self + +class TypeConverter: + target: type + delegate: Callable[[Any, dict], _StructLike] + def __init__( + self, + target: type | None = None, + delegate: Callable[[Any, dict], _StructLike] | None = None, + ) -> None: ... + def matches(self, annotation: Any) -> bool: ... + def convert(self, annotation: Any, kwargs: dict) -> _StructLike: ... + def __call__(self, delegate: Callable[[Any, dict], _StructLike]) -> Self: ... + +annotation_registry: list[TypeConverter] + +def to_struct(obj: Any, **kwargs) -> _StructLike: ... diff --git a/src/caterpillar/shared.py b/src/caterpillar/shared.py index 0dcb3b17..c21e4df7 100644 --- a/src/caterpillar/shared.py +++ b/src/caterpillar/shared.py @@ -12,7 +12,7 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from typing import Any, Callable +from typing import Any from caterpillar.abc import _ContextLambda # --- Shared Concepts --- @@ -30,6 +30,11 @@ #: struct, bitfield, or sequence definition. The type of the stored value #: must be conforming to the _StructLike protocol. ATTR_STRUCT = "__struct__" +ATTR_BYTEORDER = "__byteorder__" +ATTR_TYPE = "__type__" +ATTR_BITS = "__bits__" +ATTR_SIGNED = "__signed__" +ATTR_TEMPLATE = "__template__" # TODO: add to reference # NEW CONCEPT: Actions @@ -94,12 +99,7 @@ class Action: __slots__ = (ATTR_ACTION_PACK, ATTR_ACTION_UNPACK) - def __init__( - self, - pack: _ContextLambda | None = None, - unpack: _ContextLambda | None = None, - both: _ContextLambda | None = None, - ) -> None: + def __init__(self, pack=None, unpack=None, both=None) -> None: self.__action_pack__ = pack self.__action_unpack__ = unpack if both is not None: @@ -147,3 +147,40 @@ def is_action(obj: Any) -> bool: return any( getattr(obj, attr, None) for attr in (ATTR_ACTION_PACK, ATTR_ACTION_UNPACK) ) + + +def hasstruct(obj) -> bool: + """ + Check if the given object has a structure attribute. + + :param obj: The object to check. + :return: True if the object has a structure attribute, else False. + """ + cls_dict = getattr(obj.__class__ if not isinstance(obj, type) else obj, "__dict__") + return ATTR_STRUCT in cls_dict + + +def getstruct(obj, /, __default=None): + """ + Get the structure attribute of the given object. + + :param obj: The object to get the structure attribute from. + :return: The structure attribute of the object. + """ + obj = obj.__class__ if not isinstance(obj, type) else obj + cls_dict = getattr(obj, "__dict__", None) + if cls_dict is None: + return getattr(obj, ATTR_STRUCT, None) + + return cls_dict.get(ATTR_STRUCT, __default) + + +def typeof(struct): + if hasstruct(struct): + struct = getstruct(struct) + + __type__ = getattr(struct, ATTR_TYPE, None) + if not __type__: + return Any + # this function must return a type + return __type__() or Any diff --git a/src/caterpillar/shared.pyi b/src/caterpillar/shared.pyi new file mode 100755 index 00000000..c871645d --- /dev/null +++ b/src/caterpillar/shared.pyi @@ -0,0 +1,45 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from caterpillar.abc import _ContextLambda, _StructLike, _ContainsStruct +from typing import Any, Optional, Union + +MODE_PACK: int = ... +MODE_UNPACK: int = ... +ATTR_STRUCT: str = ... +ATTR_TYPE: str = ... +ATTR_BYTEORDER: str = ... +ATTR_BITS: str = ... +ATTR_SIGNED: str = ... +ATTR_TEMPLATE: str = ... +ATTR_ACTION_PACK: str = ... +ATTR_ACTION_UNPACK: str = ... + +class Action: + __action_pack__: _ContextLambda + __action_unpack__: _ContextLambda + def __init__( + self, + pack: _ContextLambda | None = None, + unpack: _ContextLambda | None = None, + both: _ContextLambda | None = None, + ) -> None: ... + @staticmethod + def is_action(obj: Any) -> bool: ... + +def hasstruct(obj: Any) -> bool: ... +def getstruct( + obj: Any, /, __default: Optional[_StructLike] = None +) -> Optional[_StructLike]: ... +def typeof(struct: Union[_StructLike, _ContainsStruct]) -> type: ... diff --git a/src/caterpillar/shortcuts.py b/src/caterpillar/shortcuts.py index b004bd46..332e6d8c 100644 --- a/src/caterpillar/shortcuts.py +++ b/src/caterpillar/shortcuts.py @@ -12,15 +12,72 @@ # # You should have received a copy of the GNU General Public License # along with this program. If not, see . -from .model import pack, pack_file, pack_into -from .model import unpack, unpack_file -from .model import struct, union -from .model import bitfield, Sequence as Seq - -from .context import ContextPath, this, ctx, parent, ContextLength as lenof -from .byteorder import LittleEndian, BigEndian -from .byteorder import x86, x86_64, ARM, ARM64, AMD, AMD64, AARCH64 -from .byteorder import PowerPC, PowerPC64, RISC_V, RISC_V64 - -from .fields import Field as F +from .byteorder import ( + AARCH64, + AMD, + AMD64, + ARM, + ARM64, + BigEndian, + LittleEndian, + PowerPC, + PowerPC64, + RISC_V, + RISC_V64, + x86, + x86_64, +) +from .context import ContextPath, ctx, parent, this, ContextLength as lenof +from .model import ( + bitfield, + pack, + pack_file, + pack_into, + struct, + union, + unpack, + unpack_file, + sizeof, + Sequence as Seq, +) +from .shared import typeof, getstruct, hasstruct +from .registry import to_struct from . import options as opt +from .fields import Field as F + +__all__ = [ + "AARCH64", + "AMD", + "AMD64", + "ARM", + "ARM64", + "BigEndian", + "LittleEndian", + "PowerPC", + "PowerPC64", + "RISC_V", + "RISC_V64", + "x86", + "x86_64", + "bitfield", + "pack", + "pack_file", + "pack_into", + "struct", + "union", + "unpack", + "unpack_file", + "sizeof", + "Seq", + "typeof", + "getstruct", + "hasstruct", + "ContextPath", + "ctx", + "parent", + "this", + "lenof", + "to_struct", + "F", + "opt", +] diff --git a/src/caterpillar/shortcuts.pyi b/src/caterpillar/shortcuts.pyi new file mode 100755 index 00000000..332e6d8c --- /dev/null +++ b/src/caterpillar/shortcuts.pyi @@ -0,0 +1,83 @@ +# Copyright (C) MatrixEditor 2023-2025 +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +from .byteorder import ( + AARCH64, + AMD, + AMD64, + ARM, + ARM64, + BigEndian, + LittleEndian, + PowerPC, + PowerPC64, + RISC_V, + RISC_V64, + x86, + x86_64, +) +from .context import ContextPath, ctx, parent, this, ContextLength as lenof +from .model import ( + bitfield, + pack, + pack_file, + pack_into, + struct, + union, + unpack, + unpack_file, + sizeof, + Sequence as Seq, +) +from .shared import typeof, getstruct, hasstruct +from .registry import to_struct +from . import options as opt +from .fields import Field as F + +__all__ = [ + "AARCH64", + "AMD", + "AMD64", + "ARM", + "ARM64", + "BigEndian", + "LittleEndian", + "PowerPC", + "PowerPC64", + "RISC_V", + "RISC_V64", + "x86", + "x86_64", + "bitfield", + "pack", + "pack_file", + "pack_into", + "struct", + "union", + "unpack", + "unpack_file", + "sizeof", + "Seq", + "typeof", + "getstruct", + "hasstruct", + "ContextPath", + "ctx", + "parent", + "this", + "lenof", + "to_struct", + "F", + "opt", +] diff --git a/src/ccaterpillar/default.c b/src/ccaterpillar/default.c index 442f3f26..d4e83295 100644 --- a/src/ccaterpillar/default.c +++ b/src/ccaterpillar/default.c @@ -2,7 +2,7 @@ #include "caterpillar/caterpillar.h" -#if PY_3_13_PLUS +#if PY_3_14_PLUS #define _Py_IMMORTAL_REFCNT _Py_IMMORTAL_INITIAL_REFCNT #endif diff --git a/src/ccaterpillar/pyproject.toml b/src/ccaterpillar/pyproject.toml index 4b3983db..21f4a5dd 100644 --- a/src/ccaterpillar/pyproject.toml +++ b/src/ccaterpillar/pyproject.toml @@ -18,19 +18,22 @@ CP_ENABLE_NATIVE = "1" [project] name = "caterpillar" -version = "2.4.5" +version = "2.5.0" -description="Library to pack and unpack structurized binary data." -authors = [ - { name="MatrixEditor", email="not@supported.com" }, -] +description = "Library to pack and unpack structurized binary data." readme = "../../README.md" +authors = [{ name = "MatrixEditor" }] +maintainers = [{ name = "MatrixEditor" }] classifiers = [ - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: MIT License', + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", - 'Programming Language :: Python :: 3.12', - ] + 'Programming Language :: Python :: 3.12', + 'Programming Language :: Python :: 3.13', +] [project.urls] "Homepage" = "https://github.com/MatrixEditor/caterpillar" @@ -38,13 +41,6 @@ classifiers = [ [project.optional-dependencies] # compression -lzo = [ - "lzallright" -] -crypt = [ - "cryptography" -] -all = [ - "lzallright", - "cryptography" -] \ No newline at end of file +lzo = ["lzallright"] +crypt = ["cryptography"] +all = ["lzallright", "cryptography"] diff --git a/test/_C/test_context.py b/test/_C/test_context.py index ad3b9e64..ab5e4555 100644 --- a/test/_C/test_context.py +++ b/test/_C/test_context.py @@ -27,7 +27,7 @@ def test_context_getattr(): assert getattr(c, "foo") == 1 assert getattr(c, "bar") == 2 # This call will be transferred to the __context_getattr__ function - assert getattr(c, "foo.__class__") == int + assert getattr(c, "foo.__class__") is int with pytest.raises(AttributeError): # this class is strict when it comes to undefined diff --git a/test/_C/test_parsing.py b/test/_C/test_parsing.py index c143fff7..5d0a8068 100644 --- a/test/_C/test_parsing.py +++ b/test/_C/test_parsing.py @@ -4,7 +4,7 @@ import caterpillar # TODO: raise issue for fixes -if False: #caterpillar.native_support(): +if False:# caterpillar.native_support(): from caterpillar._C import atom, typeof, sizeof, patom, repeated from caterpillar._C import switch diff --git a/test/_Py/fields/test_py_digest.py b/test/_Py/fields/test_py_digest.py index a679b91d..49cf91c7 100644 --- a/test/_Py/fields/test_py_digest.py +++ b/test/_Py/fields/test_py_digest.py @@ -9,7 +9,6 @@ Md5, Md5_Field, Sha2_256, - _DigestValue, Md5_Algo, Sha2_256_Algo, Sha2_256_Field, diff --git a/test/_Py/fields/test_py_memory.py b/test/_Py/fields/test_py_memory.py index c8a8913a..f6dcb523 100644 --- a/test/_Py/fields/test_py_memory.py +++ b/test/_Py/fields/test_py_memory.py @@ -18,7 +18,7 @@ def test_memory_length(): with pytest.raises(ValidationError): pack(b"12345678901", memory, as_field=True) - dyn_memory = Memory(lambda ctx: ctx._root.length) + dyn_memory = Memory(lambda context: context._root.length) assert len(unpack(dyn_memory, b"123456789011", as_field=True, length=10)) == 10 greedy_memory = Memory(...) diff --git a/test/_Py/model/test_bitfield.py b/test/_Py/model/test_bitfield.py new file mode 100644 index 00000000..503a4ebe --- /dev/null +++ b/test/_Py/model/test_bitfield.py @@ -0,0 +1,175 @@ +# pyright: reportInvalidTypeForm=false, reportGeneralTypeIssues=false +import pytest +import enum + +from caterpillar.model import ( + Bitfield, + bitfield, + NewGroup, + EndGroup, + SetAlignment, + sizeof, + unpack, + CharFactory, + pack, +) +from caterpillar.options import ( + B_GROUP_END, + B_GROUP_KEEP, + B_GROUP_NEW, + B_NO_AUTO_BOOL, + B_OVERWRITE_ALIGNMENT, + S_REPLACE_TYPES, +) +from caterpillar.fields import uint16, uint24, uint32, Bytes, uint8 +from caterpillar.shared import getstruct + + +def test_bitfield_syntax__standard(): + # Syntax no. 1 + @bitfield + class FormatA: + a: 3 - uint16 + + groups = FormatA.__struct__.groups + assert len(groups) == 1 + assert not groups[0].is_empty() + # default alignment is 0x08 unless B_OVERWRITE_ALIGNMENT is set + assert groups[0].bit_count == 0x08 + assert groups[0].entries[0].width == 3 + + +def test_bitfield_syntax__align(): + # Syntax no. 1 + no. 2 + @bitfield + class FormatA: + a: 3 + _: 0 + b: 4 + + groups = FormatA.__struct__.groups + assert len(groups) == 2 + + # The alignment syntax finalizes the first group and start a new one + a = groups[0] + b = groups[1] + assert a.bit_count == 0x08 and b.bit_count == 0x08 + assert a.entries[0].width == 3 + assert b.entries[0].width == 4 + + +def test_bitfield_syntax__struct(): + # syntax no. 3 (generic struct) + @bitfield + class FormatA: + a: uint16 + b: uint32 + c: uint24 + + # just like a @struct definition + assert sizeof(FormatA) == 2 + 4 + 3 + + +def test_bitfield_syntax__field_factory(): + # syntax no. 4 + @bitfield + class FormatA: + a: (uint16, int) # this won't work + b: (5 - uint8, str) + + struct = getstruct(FormatA) + assert struct is not None + + groups = struct.groups + assert len(groups) == 2 + # The first definition will revert to a simple field, because + # no bits are given + assert groups[0].is_field() is True + # conversion to string + assert groups[1].entries[0].factory.target is str + + +def test_bitfield_syntax__extended(): + class SimpleEnum(enum.IntEnum): + A = 0 + B = 1 + C = 2 + + # syntax no. 5 + @bitfield + class FormatA: + # Explanation: + # 4bits converted to string + a1: (4, str) + # 2bits converted to Enum + a2: (2, SimpleEnum) + # alignment is 8 bits, finalize group and set alignment + # to 16bits for next group + _: (0, SetAlignment(16)) + # 10bits entry for current group, then finalize group + b1: (10, EndGroup) + # 12bits in new group + c1: 12 + + # We should see exactly three groups here + groups = FormatA.__struct__.groups + assert len(groups) == 3 + assert groups[0].bit_count == 8 + assert groups[1].bit_count == 16 + assert groups[2].bit_count == 16 + + assert len(groups[0].entries) == 2 + assert groups[0].entries[1].factory.target is SimpleEnum + + +def test_bitfield__replace_types(): + class SimpleEnum(enum.IntEnum): + A = 0 + B = 1 + C = 2 + + @bitfield(options={S_REPLACE_TYPES}) + class FormatA: + a1: (4, str) # a1: str + a2: (2, SimpleEnum) # a2: SimpleEnum + _: 0 + b1: Bytes(6) # b1: bytes + + annotations = FormatA.__annotations__ + assert annotations["a1"] is str + assert annotations["a2"] is SimpleEnum + assert annotations["b1"] is bytes + + +def test_bitfield__unpack(): + # same as before + class SimpleEnum(enum.IntEnum): + A = 0 + B = 1 + C = 2 + + @bitfield + class FormatA: + a1: (4, CharFactory) # a1: str + a2: (2, SimpleEnum) # a2: SimpleEnum + _: 0 + b1: Bytes(6) # b1: bytes + + data = 0b00110100.to_bytes() + b"12" * 3 + obj = unpack(FormatA, data) + assert obj.a1 == "\x03" + assert obj.a2 == SimpleEnum.B + assert obj.b1 == b"12" * 3 + + +def test_bitfield__pack(): + @bitfield + class FormatA: + a1: 1 + a2: 2 + a3: 3 + _: 0 + b1: uint16 + + obj = FormatA(a1=True, a2=3, a3=5, b1=0xFF00) + assert pack(obj) == 0b1_11_101_00.to_bytes() + b"\x00\xff"