Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions src/py_avro_schema/_schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,13 +405,16 @@ def handles_type(cls, py_type: Type) -> bool:


@register_schema
class DictAsJSONSchema(Schema):
"""An Avro string schema representing a Python Dict[str, Any] or List[Dict[str, Any]] assuming JSON serialization"""
class TypeAsJSONSchema(Schema):
"""
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: should we rename the DictAsJSONSchema to something like TypeAsJSONSchema to indicates it can map more?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good suggestion! Done it.

An Avro string schema representing a Python Dict[str, Any], List[Dict[str, Any]] or List[Any] assuming
JSON serialization
"""

@classmethod
def handles_type(cls, py_type: Type) -> bool:
"""Whether this schema class can represent a given Python class"""
return _is_dict_str_any(py_type) or _is_list_dict_str_any(py_type)
return is_logically_json(py_type)

def data(self, names: NamesType) -> JSONObj:
"""Return the schema data"""
Expand Down Expand Up @@ -1280,6 +1283,17 @@ def _is_list_dict_str_any(py_type: Type) -> bool:
return False


def _is_list_any(py_type: Type) -> bool:
"""Return whether a given type is ``List[Any]``"""
origin = get_origin(py_type)
return inspect.isclass(origin) and issubclass(origin, list) and get_args(py_type) == (Any,)


def is_logically_json(py_type: Type) -> bool:
"""Returns whether a given type is logically a JSON and can be serialized as such"""
return _is_list_any(py_type) or _is_list_dict_str_any(py_type) or _is_dict_str_any(py_type)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: is it worth keeping the 3 different functions as they are? Here, if we're to match the last condition, we will call origin = get_origin(py_type) three times. Would it be worth to call those things only once and pass it to those functions? Or are they used elsewhere?

If they're not used, I could see them being merged into one and we could use recursion on them?

Something like:

def is_logically_json(py_type: Type) -> bool:
    origin = get_origin(py_type)
    if not inspect.isclass(origin):
        return False

    args = get_args(py_type)
    if issubclass(origin, list):
        if not args:
            return False
        elif args == (Any,):
            return True
        return is_logically_json(args[0])

    elif issubclass(origin, dict):
        return args == (str, Any)

Or something like that? I'm not sure it's really clearer, with some comments it would be better but at least we're calling the methods only once every time and we can add more case more easily, and the recursion would handle all cases of "logically JSON"? it might be the case already though

I feel like this would match the Dict[str, List[Any]] for example which I'm not sure currently it would work



def _is_class(py_type: Any, of_types: Union[Type, Tuple[Type, ...]], include_subclasses: bool = True) -> bool:
"""Return whether the given type is a (sub) class of a type or types"""
py_type = _type_from_annotated(py_type)
Expand Down
6 changes: 6 additions & 0 deletions tests/test_logicals.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,3 +285,9 @@ def test_list_json_logical_bytes_field():
"logicalType": "json",
}
assert_schema(py_type, expected)


def test_list_json_logical_list_any():
py_type = List[Any]
expected = {"type": "bytes", "logicalType": "json"}
assert_schema(py_type, expected)