fabiocaccamo · fabiocaccamo · Apr 27, 2026 · Apr 17, 2026 · Apr 17, 2026 · Apr 17, 2026
diff --git a/README.md b/README.md
@@ -24,6 +24,7 @@ python-benedict is a dict subclass with **keylist/keypath/keyattr** support, **I
 -   **Keypath** support using **keypath-separator** *(dot syntax by default)*.
 -   Keypath **list-index** support  *(also negative)* using the standard `[n]` suffix.
 -   Normalized **I/O operations** with most common formats: `base64`, `cli`, `csv`, `html`, `ini`, `json`, `pickle`, `plist`, `query-string`, `toml`, `xls`, `xml`, `yaml`.
+-   `NEW` Optional **Pydantic v2 schema** validation and type coercion on all `from_*` / `to_*` I/O methods via the `schema` kwarg *(requires `python-benedict[schema]`).*
 -   Multiple **I/O operations** backends: `file-system` *(read/write)*, `url` *(read-only)*, `s3` *(read/write)*.
 -   Many **utility** and **parse methods** to retrieve data as needed *(check the [API](#api) section)*.
 -   Well **tested**. ;)
@@ -67,6 +68,7 @@ Here the hierarchy of possible installation targets available when running `pip
         - `[yaml]`
     - `[parse]`
     - `[s3]`
+    - `[schema]`
 
 ## Usage
 
@@ -613,6 +615,29 @@ d.unique()
 
 These methods are available for input/output operations.
 
+All `from_*` and `to_*` methods accept an optional `schema` keyword argument. When a [Pydantic v2](https://docs.pydantic.dev/) model class is passed, the data is validated and type-coerced through the model before being returned (on decode) or serialized (on encode). This requires the `python-benedict[schema]` extra.
+
+```
+pip install "python-benedict[schema]"
+```
+
+```python
+from benedict import benedict
+from pydantic import BaseModel
+
+class User(BaseModel):
+    name: str
+    age: int
+
+# validate and coerce types on decode
+d = benedict.from_json('{"name": "Alice", "age": "30"}', schema=User)
+assert d["age"] == 30  # coerced from str to int
+
+# validate and coerce types on encode
+d = benedict({"name": "Bob", "age": "25"})
+s = d.to_json(schema=User)  # age is coerced to int before serialization
+```
+
 #### `from_base64`
 
 ```python
@@ -665,6 +690,7 @@ d = benedict.from_html(s, **kwargs)
 # Accept as first argument: url, filepath or data-string.
 # It's possible to pass decoder specific options using kwargs:
 # https://docs.python.org/3/library/configparser.html
+# It's possible to pass a Pydantic v2 model class as schema= to validate and coerce data.
 # A ValueError is raised in case of failure.
 d = benedict.from_ini(s, **kwargs)
 ```
@@ -676,6 +702,7 @@ d = benedict.from_ini(s, **kwargs)
 # Accept as first argument: url, filepath or data-string.
 # It's possible to pass decoder specific options using kwargs:
 # https://docs.python.org/3/library/json.html
+# It's possible to pass a Pydantic v2 model class as schema= to validate and coerce data.
 # A ValueError is raised in case of failure.
 d = benedict.from_json(s, **kwargs)
 ```
@@ -752,6 +779,7 @@ d = benedict.from_xml(s, **kwargs)
 # Accept as first argument: url, filepath or data-string.
 # It's possible to pass decoder specific options using kwargs:
 # https://pyyaml.org/wiki/PyYAMLDocumentation
+# It's possible to pass a Pydantic v2 model class as schema= to validate and coerce data.
 # A ValueError is raised in case of failure.
 d = benedict.from_yaml(s, **kwargs)
 ```
@@ -794,6 +822,7 @@ s = d.to_ini(**kwargs)
 # Return the dict instance encoded in json format and optionally save it at the specified filepath.
 # It's possible to pass encoder specific options using kwargs:
 # https://docs.python.org/3/library/json.html
+# It's possible to pass a Pydantic v2 model class as schema= to validate and coerce data before encoding.
 # A ValueError is raised in case of failure.
 s = d.to_json(**kwargs)
 ```

diff --git a/benedict/dicts/io/io_util.py b/benedict/dicts/io/io_util.py
@@ -22,7 +22,7 @@
     get_format_by_path,
     get_serializer_by_format,
 )
-from benedict.utils import type_util
+from benedict.utils import schema_util, type_util
 
 
 def autodetect_format(s: Any) -> str | None:
@@ -56,10 +56,13 @@ def decode(s: Any, format: str, **kwargs: Any) -> Any:
     if not serializer:
         raise ValueError(f"Invalid format: {format}.")
     options = kwargs.copy()
+    schema = options.pop("schema", None)
     if format in ["b64", "base64"]:
         options.setdefault("subformat", "json")
     content = read_content(s, format, options)
     data = serializer.decode(content, **options)
+    if schema is not None:
+        data = schema_util.apply_schema(data, schema)
     return data
 
 
@@ -68,6 +71,9 @@ def encode(d: Any, format: str, filepath: str | None = None, **kwargs: Any) -> A
     if not serializer:
         raise ValueError(f"Invalid format: {format}.")
     options = kwargs.copy()
+    schema = options.pop("schema", None)
+    if schema is not None:
+        d = schema_util.apply_schema(d, schema)
     content = serializer.encode(d, **options)
     if filepath:
         filepath = str(filepath)

diff --git a/benedict/extras.py b/benedict/extras.py
@@ -4,6 +4,7 @@
     "require_html",
     "require_parse",
     "require_s3",
+    "require_schema",
     "require_toml",
     "require_xls",
     "require_xml",
@@ -28,6 +29,10 @@ def require_s3(*, installed: bool) -> None:
     _require_optional_dependencies(target="s3", installed=installed)
 
 
+def require_schema(*, installed: bool) -> None:
+    _require_optional_dependencies(target="schema", installed=installed)
+
+
 def require_toml(*, installed: bool) -> None:
     _require_optional_dependencies(target="toml", installed=installed)
 

diff --git a/benedict/utils/schema_util.py b/benedict/utils/schema_util.py
@@ -0,0 +1,30 @@
+from __future__ import annotations
+
+from typing import Any
+
+try:
+    import pydantic
+
+    pydantic_installed = True
+except ImportError:
+    pydantic_installed = False
+
+
+def apply_schema(data: Any, schema: Any) -> Any:
+    """
+    Validate and parse data using a Pydantic model class.
+    Returns the validated data as a plain dict.
+    Raises ExtrasRequireModuleNotFoundError if pydantic is not installed.
+    Raises TypeError if schema is not a pydantic BaseModel subclass.
+    """
+    from benedict.extras import require_schema
+
+    require_schema(installed=pydantic_installed)
+    if isinstance(schema, type) and issubclass(schema, pydantic.BaseModel):
+        schema_cls: type[pydantic.BaseModel] = schema
+    else:
+        raise TypeError(
+            f"schema must be a pydantic BaseModel subclass, got {type(schema)!r}"
+        )
+    instance = schema_cls.model_validate(data)
+    return instance.model_dump()
diff --git a/pyproject.toml b/pyproject.toml
@@ -119,7 +119,7 @@ Twitter = "https://twitter.com/fabiocaccamo"
 
 [project.optional-dependencies]
 all = [
-    "python-benedict[io,parse,s3]",
+    "python-benedict[io,parse,s3,schema]",
 ]
 html = [
     "beautifulsoup4 >= 4.12.0, < 5.0.0",
@@ -137,6 +137,9 @@ parse = [
 s3 = [
     "boto3 >= 1.24.89, < 2.0.0",
 ]
+schema = [
+    "pydantic >= 2.0.0, < 3.0.0",
+]
 toml = [
     "toml >= 0.10.2, < 1.0.0",
 ]

diff --git a/requirements.txt b/requirements.txt
@@ -6,6 +6,7 @@ idna >= 3.7
 mailchecker == 6.0.20
 openpyxl == 3.1.5
 phonenumbers == 9.0.27
+pydantic >= 2.0.0, < 3.0.0
 python-dateutil == 2.9.0.post0
 python-fsutil == 0.16.1
 python-slugify == 8.0.4
@@ -17,3 +18,4 @@ urllib3 >= 2.6.3
 useful-types == 0.2.1
 xlrd == 2.0.2
 xmltodict == 1.0.4
+zipp >= 3.19.1