Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ python-benedict is a dict subclass with **keylist/keypath/keyattr** support, **I
- **Keypath** support using **keypath-separator** *(dot syntax by default)*.
- Keypath **list-index** support *(also negative)* using the standard `[n]` suffix.
- Normalized **I/O operations** with most common formats: `base64`, `cli`, `csv`, `html`, `ini`, `json`, `pickle`, `plist`, `query-string`, `toml`, `xls`, `xml`, `yaml`.
- `NEW` Optional **Pydantic v2 schema** validation and type coercion on all `from_*` / `to_*` I/O methods via the `schema` kwarg *(requires `python-benedict[schema]`).*
- Multiple **I/O operations** backends: `file-system` *(read/write)*, `url` *(read-only)*, `s3` *(read/write)*.
- Many **utility** and **parse methods** to retrieve data as needed *(check the [API](#api) section)*.
- Well **tested**. ;)
Expand Down Expand Up @@ -67,6 +68,7 @@ Here the hierarchy of possible installation targets available when running `pip
- `[yaml]`
- `[parse]`
- `[s3]`
- `[schema]`

## Usage

Expand Down Expand Up @@ -613,6 +615,29 @@ d.unique()

These methods are available for input/output operations.

All `from_*` and `to_*` methods accept an optional `schema` keyword argument. When a [Pydantic v2](https://docs.pydantic.dev/) model class is passed, the data is validated and type-coerced through the model before being returned (on decode) or serialized (on encode). This requires the `python-benedict[schema]` extra.

```
pip install "python-benedict[schema]"
```

```python
from benedict import benedict
from pydantic import BaseModel

class User(BaseModel):
name: str
age: int

# validate and coerce types on decode
d = benedict.from_json('{"name": "Alice", "age": "30"}', schema=User)
assert d["age"] == 30 # coerced from str to int

# validate and coerce types on encode
d = benedict({"name": "Bob", "age": "25"})
s = d.to_json(schema=User) # age is coerced to int before serialization
```

#### `from_base64`

```python
Expand Down Expand Up @@ -665,6 +690,7 @@ d = benedict.from_html(s, **kwargs)
# Accept as first argument: url, filepath or data-string.
# It's possible to pass decoder specific options using kwargs:
# https://docs.python.org/3/library/configparser.html
# It's possible to pass a Pydantic v2 model class as schema= to validate and coerce data.
# A ValueError is raised in case of failure.
d = benedict.from_ini(s, **kwargs)
```
Expand All @@ -676,6 +702,7 @@ d = benedict.from_ini(s, **kwargs)
# Accept as first argument: url, filepath or data-string.
# It's possible to pass decoder specific options using kwargs:
# https://docs.python.org/3/library/json.html
# It's possible to pass a Pydantic v2 model class as schema= to validate and coerce data.
# A ValueError is raised in case of failure.
d = benedict.from_json(s, **kwargs)
```
Expand Down Expand Up @@ -752,6 +779,7 @@ d = benedict.from_xml(s, **kwargs)
# Accept as first argument: url, filepath or data-string.
# It's possible to pass decoder specific options using kwargs:
# https://pyyaml.org/wiki/PyYAMLDocumentation
# It's possible to pass a Pydantic v2 model class as schema= to validate and coerce data.
# A ValueError is raised in case of failure.
d = benedict.from_yaml(s, **kwargs)
```
Expand Down Expand Up @@ -794,6 +822,7 @@ s = d.to_ini(**kwargs)
# Return the dict instance encoded in json format and optionally save it at the specified filepath.
# It's possible to pass encoder specific options using kwargs:
# https://docs.python.org/3/library/json.html
# It's possible to pass a Pydantic v2 model class as schema= to validate and coerce data before encoding.
# A ValueError is raised in case of failure.
s = d.to_json(**kwargs)
```
Expand Down
8 changes: 7 additions & 1 deletion benedict/dicts/io/io_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
get_format_by_path,
get_serializer_by_format,
)
from benedict.utils import type_util
from benedict.utils import schema_util, type_util


def autodetect_format(s: Any) -> str | None:
Expand Down Expand Up @@ -56,10 +56,13 @@ def decode(s: Any, format: str, **kwargs: Any) -> Any:
if not serializer:
raise ValueError(f"Invalid format: {format}.")
options = kwargs.copy()
schema = options.pop("schema", None)
if format in ["b64", "base64"]:
options.setdefault("subformat", "json")
content = read_content(s, format, options)
data = serializer.decode(content, **options)
if schema is not None:
data = schema_util.apply_schema(data, schema)
return data
Comment thread
fabiocaccamo marked this conversation as resolved.


Expand All @@ -68,6 +71,9 @@ def encode(d: Any, format: str, filepath: str | None = None, **kwargs: Any) -> A
if not serializer:
raise ValueError(f"Invalid format: {format}.")
options = kwargs.copy()
schema = options.pop("schema", None)
if schema is not None:
d = schema_util.apply_schema(d, schema)
content = serializer.encode(d, **options)
if filepath:
filepath = str(filepath)
Expand Down
5 changes: 5 additions & 0 deletions benedict/extras.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"require_html",
"require_parse",
"require_s3",
"require_schema",
"require_toml",
"require_xls",
"require_xml",
Expand All @@ -28,6 +29,10 @@ def require_s3(*, installed: bool) -> None:
_require_optional_dependencies(target="s3", installed=installed)


def require_schema(*, installed: bool) -> None:
_require_optional_dependencies(target="schema", installed=installed)


def require_toml(*, installed: bool) -> None:
_require_optional_dependencies(target="toml", installed=installed)

Expand Down
30 changes: 30 additions & 0 deletions benedict/utils/schema_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from __future__ import annotations

from typing import Any

try:
import pydantic

pydantic_installed = True
except ImportError:
Comment thread
fabiocaccamo marked this conversation as resolved.
Outdated
pydantic_installed = False


def apply_schema(data: Any, schema: Any) -> Any:
"""
Validate and parse data using a Pydantic model class.
Returns the validated data as a plain dict.
Raises ExtrasRequireModuleNotFoundError if pydantic is not installed.
Raises TypeError if schema is not a pydantic BaseModel subclass.
"""
from benedict.extras import require_schema

require_schema(installed=pydantic_installed)
if isinstance(schema, type) and issubclass(schema, pydantic.BaseModel):
schema_cls: type[pydantic.BaseModel] = schema
else:
raise TypeError(
f"schema must be a pydantic BaseModel subclass, got {type(schema)!r}"
)
instance = schema_cls.model_validate(data)
return instance.model_dump()
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ Twitter = "https://twitter.com/fabiocaccamo"

[project.optional-dependencies]
all = [
"python-benedict[io,parse,s3]",
"python-benedict[io,parse,s3,schema]",
]
html = [
"beautifulsoup4 >= 4.12.0, < 5.0.0",
Expand All @@ -137,6 +137,9 @@ parse = [
s3 = [
"boto3 >= 1.24.89, < 2.0.0",
]
schema = [
"pydantic >= 2.0.0, < 3.0.0",
]
Comment thread
fabiocaccamo marked this conversation as resolved.
toml = [
"toml >= 0.10.2, < 1.0.0",
]
Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ idna >= 3.7
mailchecker == 6.0.20
openpyxl == 3.1.5
phonenumbers == 9.0.27
pydantic >= 2.0.0, < 3.0.0
python-dateutil == 2.9.0.post0
python-fsutil == 0.16.1
python-slugify == 8.0.4
Expand All @@ -17,3 +18,4 @@ urllib3 >= 2.6.3
useful-types == 0.2.1
xlrd == 2.0.2
xmltodict == 1.0.4
zipp >= 3.19.1
Comment thread
fabiocaccamo marked this conversation as resolved.
Outdated
Loading
Loading