fix(toml): resolve merge conflict with upstream main (#568)

ricaskew · ricaskew · commit 7739deac4c9b · 2026-05-03T16:07:04.000-05:00
diff --git a/README.md b/README.md
@@ -24,6 +24,7 @@ python-benedict is a dict subclass with **keylist/keypath/keyattr** support, **I
 -   **Keypath** support using **keypath-separator** *(dot syntax by default)*.
 -   Keypath **list-index** support  *(also negative)* using the standard `[n]` suffix.
 -   Normalized **I/O operations** with most common formats: `base64`, `cli`, `csv`, `html`, `ini`, `json`, `pickle`, `plist`, `query-string`, `toml`, `xls`, `xml`, `yaml`.
+-   `NEW` Optional **Pydantic v2 schema** validation and type coercion on all `from_*` / `to_*` I/O methods via the `schema` kwarg *(requires `python-benedict[schema]`).*
 -   Multiple **I/O operations** backends: `file-system` *(read/write)*, `url` *(read-only)*, `s3` *(read/write)*.
 -   Many **utility** and **parse methods** to retrieve data as needed *(check the [API](#api) section)*.
 -   Well **tested**. ;)
@@ -68,6 +69,7 @@ Here the hierarchy of possible installation targets available when running `pip
         - `[yaml]`
     - `[parse]`
     - `[s3]`
+    - `[schema]`
 
 ## Usage
 
@@ -614,6 +616,29 @@ d.unique()
 
 These methods are available for input/output operations.
 
+All `from_*` and `to_*` methods accept an optional `schema` keyword argument. When a [Pydantic v2](https://docs.pydantic.dev/) model class is passed, the data is validated and type-coerced through the model before being returned (on decode) or serialized (on encode). This requires the `python-benedict[schema]` extra.
+
+```
+pip install "python-benedict[schema]"
+```
+
+```python
+from benedict import benedict
+from pydantic import BaseModel
+
+class User(BaseModel):
+    name: str
+    age: int
+
+# validate and coerce types on decode
+d = benedict.from_json('{"name": "Alice", "age": "30"}', schema=User)
+assert d["age"] == 30  # coerced from str to int
+
+# validate and coerce types on encode
+d = benedict({"name": "Bob", "age": "25"})
+s = d.to_json(schema=User)  # age is coerced to int before serialization
+```
+
 #### `from_base64`
 
 ```python
@@ -666,6 +691,7 @@ d = benedict.from_html(s, **kwargs)
 # Accept as first argument: url, filepath or data-string.
 # It's possible to pass decoder specific options using kwargs:
 # https://docs.python.org/3/library/configparser.html
+# It's possible to pass a Pydantic v2 model class as schema= to validate and coerce data.
 # A ValueError is raised in case of failure.
 d = benedict.from_ini(s, **kwargs)
 ```
@@ -677,6 +703,7 @@ d = benedict.from_ini(s, **kwargs)
 # Accept as first argument: url, filepath or data-string.
 # It's possible to pass decoder specific options using kwargs:
 # https://docs.python.org/3/library/json.html
+# It's possible to pass a Pydantic v2 model class as schema= to validate and coerce data.
 # A ValueError is raised in case of failure.
 d = benedict.from_json(s, **kwargs)
 ```
@@ -753,6 +780,7 @@ d = benedict.from_xml(s, **kwargs)
 # Accept as first argument: url, filepath or data-string.
 # It's possible to pass decoder specific options using kwargs:
 # https://pyyaml.org/wiki/PyYAMLDocumentation
+# It's possible to pass a Pydantic v2 model class as schema= to validate and coerce data.
 # A ValueError is raised in case of failure.
 d = benedict.from_yaml(s, **kwargs)
 ```
@@ -795,6 +823,7 @@ s = d.to_ini(**kwargs)
 # Return the dict instance encoded in json format and optionally save it at the specified filepath.
 # It's possible to pass encoder specific options using kwargs:
 # https://docs.python.org/3/library/json.html
+# It's possible to pass a Pydantic v2 model class as schema= to validate and coerce data before encoding.
 # A ValueError is raised in case of failure.
 s = d.to_json(**kwargs)
 ```
diff --git a/benedict/core/items_sorted.py b/benedict/core/items_sorted.py
@@ -1,25 +1,24 @@
 from __future__ import annotations
 
 from collections.abc import Mapping
-
-from useful_types import SupportsRichComparisonT
+from typing import Any
 
 
 def _items_sorted_by_item_at_index(
-    d: Mapping[SupportsRichComparisonT, SupportsRichComparisonT],
+    d: Mapping[Any, Any],
     index: int,
     reverse: bool,
-) -> list[tuple[SupportsRichComparisonT, SupportsRichComparisonT]]:
+) -> list[tuple[Any, Any]]:
     return sorted(d.items(), key=lambda item: item[index], reverse=reverse)
 
 
 def items_sorted_by_keys(
-    d: Mapping[SupportsRichComparisonT, SupportsRichComparisonT], reverse: bool = False
-) -> list[tuple[SupportsRichComparisonT, SupportsRichComparisonT]]:
+    d: Mapping[Any, Any], reverse: bool = False
+) -> list[tuple[Any, Any]]:
     return _items_sorted_by_item_at_index(d, 0, reverse)
 
 
 def items_sorted_by_values(
-    d: Mapping[SupportsRichComparisonT, SupportsRichComparisonT], reverse: bool = False
-) -> list[tuple[SupportsRichComparisonT, SupportsRichComparisonT]]:
+    d: Mapping[Any, Any], reverse: bool = False
+) -> list[tuple[Any, Any]]:
     return _items_sorted_by_item_at_index(d, 1, reverse)
diff --git a/benedict/dicts/io/io_util.py b/benedict/dicts/io/io_util.py
@@ -12,17 +12,22 @@
     import boto3
 
     s3_installed = True
-except ModuleNotFoundError:
+except ModuleNotFoundError:  # pragma: no cover
     s3_installed = False
 
-import fsutil
+try:
+    import fsutil
+
+    fsutil_installed = True
+except ModuleNotFoundError:  # pragma: no cover
+    fsutil_installed = False
 
-from benedict.extras import require_s3
+from benedict.extras import require_fsutil, require_s3
 from benedict.serializers import (
     get_format_by_path,
     get_serializer_by_format,
 )
-from benedict.utils import type_util
+from benedict.utils import schema_util, type_util
 
 
 def autodetect_format(s: Any) -> str | None:
@@ -56,10 +61,13 @@ def decode(s: Any, format: str, **kwargs: Any) -> Any:
     if not serializer:
         raise ValueError(f"Invalid format: {format}.")
     options = kwargs.copy()
+    schema = options.pop("schema", None)
     if format in ["b64", "base64"]:
         options.setdefault("subformat", "json")
     content = read_content(s, format, options)
     data = serializer.decode(content, **options)
+    if schema is not None:
+        data = schema_util.apply_schema(data, schema)
     return data
 
 
@@ -68,6 +76,9 @@ def encode(d: Any, format: str, filepath: str | None = None, **kwargs: Any) -> A
     if not serializer:
         raise ValueError(f"Invalid format: {format}.")
     options = kwargs.copy()
+    schema = options.pop("schema", None)
+    if schema is not None:
+        d = schema_util.apply_schema(d, schema)
     content = serializer.encode(d, **options)
     if filepath:
         filepath = str(filepath)
@@ -88,7 +99,7 @@ def is_data(s: str | bytes) -> bool:
 
 
 def is_filepath(s: Path | str) -> bool:
-    if fsutil.is_file(s):
+    if fsutil_installed and fsutil.is_file(s):
         return True
     return bool(
         get_format_by_path(s)
@@ -147,15 +158,18 @@ def read_content(
 
 
 def read_content_from_file(filepath: str, format: str | None = None) -> str:
+    require_fsutil(installed=fsutil_installed)
     binary_format = is_binary_format(format)
     if binary_format:
         return filepath
-    return fsutil.read_file(filepath)  # type: ignore[no-any-return]
+    content = fsutil.read_file(filepath)
+    return str(content)
 
 
 def read_content_from_s3(
     url: str, s3_options: Mapping[str, Any], format: str | None = None
 ) -> str:
+    require_fsutil(installed=fsutil_installed)
     require_s3(installed=s3_installed)
     s3_url = parse_s3_url(url)
     dirpath = tempfile.gettempdir()
@@ -171,12 +185,14 @@ def read_content_from_s3(
 def read_content_from_url(
     url: str, requests_options: Mapping[str, Any], format: str | None = None
 ) -> str:
+    require_fsutil(installed=fsutil_installed)
     binary_format = is_binary_format(format)
     if binary_format:
         dirpath = tempfile.gettempdir()
         filepath = fsutil.download_file(url, dirpath=dirpath, **requests_options)
-        return filepath  # type: ignore[no-any-return]
-    return fsutil.read_file_from_url(url, **requests_options)  # type: ignore[no-any-return]
+        return str(filepath)
+    content = fsutil.read_file_from_url(url, **requests_options)
+    return str(content)
 
 
 def write_content(filepath: str, content: str, **options: Any) -> None:
@@ -187,12 +203,14 @@ def write_content(filepath: str, content: str, **options: Any) -> None:
 
 
 def write_content_to_file(filepath: str, content: str, **options: Any) -> None:
+    require_fsutil(installed=fsutil_installed)
     fsutil.write_file(filepath, content)
 
 
 def write_content_to_s3(
     url: str, content: str, s3_options: Mapping[str, Any], **options: Any
 ) -> None:
+    require_fsutil(installed=fsutil_installed)
     require_s3(installed=s3_installed)
     s3_url = parse_s3_url(url)
     dirpath = tempfile.gettempdir()
diff --git a/benedict/dicts/parse/parse_util.py b/benedict/dicts/parse/parse_util.py
@@ -12,7 +12,7 @@
     from phonenumbers import PhoneNumberFormat, phonenumberutil
 
     parse_installed = True
-except ModuleNotFoundError:
+except ModuleNotFoundError:  # pragma: no cover
     parse_installed = False
 
 
diff --git a/benedict/extras.py b/benedict/extras.py
@@ -1,9 +1,11 @@
 from benedict.exceptions import ExtrasRequireModuleNotFoundError
 
 __all__ = [
+    "require_fsutil",
     "require_html",
     "require_parse",
     "require_s3",
+    "require_schema",
     "require_toml",
     "require_xls",
     "require_xml",
@@ -20,6 +22,10 @@ def require_html(*, installed: bool) -> None:
     _require_optional_dependencies(target="html", installed=installed)
 
 
+def require_fsutil(*, installed: bool) -> None:
+    _require_optional_dependencies(target="io", installed=installed)
+
+
 def require_parse(*, installed: bool) -> None:
     _require_optional_dependencies(target="parse", installed=installed)
 
@@ -28,6 +34,10 @@ def require_s3(*, installed: bool) -> None:
     _require_optional_dependencies(target="s3", installed=installed)
 
 
+def require_schema(*, installed: bool) -> None:
+    _require_optional_dependencies(target="schema", installed=installed)
+
+
 def require_toml(*, installed: bool) -> None:
     _require_optional_dependencies(target="toml", installed=installed)
 
diff --git a/benedict/serializers/html.py b/benedict/serializers/html.py
@@ -4,7 +4,7 @@
     from bs4 import BeautifulSoup
 
     html_installed = True
-except ModuleNotFoundError:
+except ModuleNotFoundError:  # pragma: no cover
     html_installed = False
 
 from typing import Any, NoReturn
diff --git a/benedict/serializers/xls.py b/benedict/serializers/xls.py
@@ -1,21 +1,26 @@
 from __future__ import annotations
 
-import fsutil
+try:
+    import fsutil
+
+    fsutil_installed = True
+except ModuleNotFoundError:  # pragma: no cover
+    fsutil_installed = False
 
 try:
     from openpyxl import load_workbook
     from xlrd import open_workbook
 
     xls_installed = True
-except ModuleNotFoundError:
+except ModuleNotFoundError:  # pragma: no cover
     xls_installed = False
 
 from collections.abc import Sequence
 from typing import Any, NoReturn
 
 from slugify import slugify
 
-from benedict.extras import require_xls
+from benedict.extras import require_fsutil, require_xls
 from benedict.serializers.abstract import AbstractSerializer
 
 
@@ -175,6 +180,7 @@ def _decode(self, s: str, **kwargs: Any) -> list[dict[str, Any]]:
 
     def decode(self, s: str, **kwargs: Any) -> list[dict[str, Any]]:
         require_xls(installed=xls_installed)
+        require_fsutil(installed=fsutil_installed)
         extension = fsutil.get_file_extension(s)
         if extension in ["xlsx", "xlsm"]:
             return self._decode(s, **kwargs)
diff --git a/benedict/serializers/xml.py b/benedict/serializers/xml.py
@@ -4,7 +4,7 @@
     import xmltodict
 
     xml_installed = True
-except ModuleNotFoundError:
+except ModuleNotFoundError:  # pragma: no cover
     xml_installed = False
 
 
diff --git a/benedict/serializers/yaml.py b/benedict/serializers/yaml.py
@@ -6,7 +6,7 @@
     from yaml.representer import SafeRepresenter
 
     yaml_installed = True
-except ModuleNotFoundError:
+except ModuleNotFoundError:  # pragma: no cover
     yaml_installed = False
 
 
diff --git a/benedict/utils/schema_util.py b/benedict/utils/schema_util.py
@@ -0,0 +1,30 @@
+from __future__ import annotations
+
+from typing import Any
+
+try:
+    import pydantic
+
+    pydantic_installed = True
+except ImportError:  # pragma: no cover
+    pydantic_installed = False
+
+
+def apply_schema(data: Any, schema: Any) -> Any:
+    """
+    Validate and parse data using a Pydantic model class.
+    Returns the validated data as a plain dict.
+    Raises ExtrasRequireModuleNotFoundError if pydantic is not installed.
+    Raises TypeError if schema is not a pydantic BaseModel subclass.
+    """
+    from benedict.extras import require_schema
+
+    require_schema(installed=pydantic_installed)
+    if isinstance(schema, type) and issubclass(schema, pydantic.BaseModel):
+        schema_cls: type[pydantic.BaseModel] = schema
+    else:
+        raise TypeError(
+            f"schema must be a pydantic BaseModel subclass, got {type(schema)!r}"
+        )
+    instance = schema_cls.model_validate(data)
+    return instance.model_dump()
diff --git a/pyproject.toml b/pyproject.toml
diff --git a/requirements.txt b/requirements.txt
diff --git a/tests/dicts/io/test_io_dict_xls.py b/tests/dicts/io/test_io_dict_xls.py
diff --git a/tests/dicts/io/test_io_util.py b/tests/dicts/io/test_io_util.py
diff --git a/tests/dicts/io/test_schema.py b/tests/dicts/io/test_schema.py