From d1e1bd356087bbcf6a32c4404c814449acdc3826 Mon Sep 17 00:00:00 2001
From: Giovanni Grano <me@giograno.com>
Date: Tue, 2 Dec 2025 18:12:23 +0100
Subject: [PATCH 1/4] wip

---
 src/py_avro_schema/_schemas.py | 11 +++++++++++
 tests/test_typed_dict.py       | 21 +++++++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/src/py_avro_schema/_schemas.py b/src/py_avro_schema/_schemas.py
index 8ecd499..f34961c 100644
--- a/src/py_avro_schema/_schemas.py
+++ b/src/py_avro_schema/_schemas.py
@@ -1168,6 +1168,8 @@ def handles_type(cls, py_type: Type) -> bool:
             not dataclasses.is_dataclass(py_type)
             # Pydantic models are handled above
             and not hasattr(py_type, "__pydantic_private__")
+            # typed_dict handled separately
+            and not is_typeddict(py_type)
             # If we are subclassing a string, used the "named string" approach
             and (inspect.isclass(py_type) and not issubclass(py_type, str))
             # and any other class with typed annotations
@@ -1240,12 +1242,21 @@ def __init__(self, py_type: Type, namespace: str | None = None, options: Option
         """
         super().__init__(py_type, namespace=namespace, options=options)
         py_type = _type_from_annotated(py_type)
+        self.is_total = py_type.__dict__.get("__total__", True)
         self.py_fields: dict[str, Type] = get_type_hints(py_type, include_extras=True)
         self.record_fields = [self._record_field(field) for field in self.py_fields.items()]
 
     def _record_field(self, py_field: tuple[str, Type]) -> RecordField:
         """Return an Avro record field object for a given TypedDict field"""
         aliases, actual_type = get_field_aliases_and_actual_type(py_field[1])
+
+        if not self.is_total:
+            # If a TypedDict is marked as total=None, it does not need to contain all the field. However, we need to
+            # be able to distinguish between the fields that are missing from the ones that are present but set to None.
+            # To do that, we extend the original type with str. We will later add a special string (e.g., __td_missing__)
+            # as a marker at deserialization time.
+            actual_type = Union[actual_type, str]
+
         field_obj = RecordField(
             py_type=actual_type,
             name=py_field[0],
diff --git a/tests/test_typed_dict.py b/tests/test_typed_dict.py
index acbbeb7..d90d4f8 100644
--- a/tests/test_typed_dict.py
+++ b/tests/test_typed_dict.py
@@ -85,3 +85,24 @@ class User(TypedDict):
     }
 
     assert_schema(User, expected)
+
+
+def test_non_total_typed_dict():
+
+    class PyType(TypedDict, total=False):
+        name: str
+        age: int | None
+
+    expected = {
+        "type": "record",
+        "name": "PyType",
+        "fields": [
+            {
+                "name": "name",
+                "type":"string",
+            },
+            {"name": "age", "type": ["long", "null", "string"]},
+        ]
+    }
+    assert_schema(PyType, expected)
+

From 41185f03a6b585de4d987de85838e60658b299f0 Mon Sep 17 00:00:00 2001
From: Giovanni Grano <me@giograno.com>
Date: Wed, 3 Dec 2025 09:12:18 +0100
Subject: [PATCH 2/4] handle strenum with invalid strings

---
 src/py_avro_schema/_schemas.py | 19 +++++++++++++++----
 tests/test_typed_dict.py       | 10 +++++++---
 2 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/src/py_avro_schema/_schemas.py b/src/py_avro_schema/_schemas.py
index f34961c..c4866a5 100644
--- a/src/py_avro_schema/_schemas.py
+++ b/src/py_avro_schema/_schemas.py
@@ -804,7 +804,18 @@ def data(self, names: NamesType) -> JSONType:
         schemas = (item_schema.data(names=names) for item_schema in self.item_schemas)
         # We need to deduplicate the schemas **after** rendering. This is because **different** Python types might
         # result in the **same** Avro schema. Preserving order as order may be significant in an Avro schema.
-        unique_schemas = list(more_itertools.unique_everseen(schemas))
+
+        def normalize_string_duplicates(_schema):
+            """We might have cases in which we have a schema both for ``StrSubclassSchema`` (e.g., a ``StrEnum`` with
+            invalid names is represented as a ``StrSubclassSchema``) and a string. These are technically duplicates,
+            but ``unique_everseen`` won't remove them by default."""
+            if _schema == "string":
+                return "string"
+            elif isinstance(_schema, dict) and _schema.get("type") == "string":
+                return "string"
+            return _schema
+
+        unique_schemas = list(more_itertools.unique_everseen(schemas, key=normalize_string_duplicates))
         if len(unique_schemas) > 1:
             return unique_schemas
         else:
@@ -1253,9 +1264,9 @@ def _record_field(self, py_field: tuple[str, Type]) -> RecordField:
         if not self.is_total:
             # If a TypedDict is marked as total=None, it does not need to contain all the field. However, we need to
             # be able to distinguish between the fields that are missing from the ones that are present but set to None.
-            # To do that, we extend the original type with str. We will later add a special string (e.g., __td_missing__)
-            # as a marker at deserialization time.
-            actual_type = Union[actual_type, str]
+            # To do that, we extend the original type with str. We will later add a special string
+            # (e.g., __td_missing__) as a marker at deserialization time.
+            actual_type = Union[actual_type, str]  # type: ignore
 
         field_obj = RecordField(
             py_type=actual_type,
diff --git a/tests/test_typed_dict.py b/tests/test_typed_dict.py
index d90d4f8..195c61a 100644
--- a/tests/test_typed_dict.py
+++ b/tests/test_typed_dict.py
@@ -1,3 +1,4 @@
+from enum import StrEnum
 from typing import Annotated, TypedDict
 
 from py_avro_schema._alias import Alias, register_type_alias
@@ -88,10 +89,13 @@ class User(TypedDict):
 
 
 def test_non_total_typed_dict():
+    class Opt(StrEnum):
+        val = "invalid-val"
 
     class PyType(TypedDict, total=False):
         name: str
         age: int | None
+        opt: Opt | None
 
     expected = {
         "type": "record",
@@ -99,10 +103,10 @@ class PyType(TypedDict, total=False):
         "fields": [
             {
                 "name": "name",
-                "type":"string",
+                "type": "string",
             },
             {"name": "age", "type": ["long", "null", "string"]},
-        ]
+            {"name": "opt", "type": [{"namedString": "Opt", "type": "string"}, "null"]},
+        ],
     }
     assert_schema(PyType, expected)
-

From 77b523b4c387a8fea19c974fc64301dd82b6b7da Mon Sep 17 00:00:00 2001
From: Giovanni Grano <me@giograno.com>
Date: Wed, 3 Dec 2025 09:30:03 +0100
Subject: [PATCH 3/4] adding option

---
 src/py_avro_schema/_schemas.py | 8 +++++++-
 tests/test_typed_dict.py       | 3 ++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/py_avro_schema/_schemas.py b/src/py_avro_schema/_schemas.py
index c4866a5..eee82bb 100644
--- a/src/py_avro_schema/_schemas.py
+++ b/src/py_avro_schema/_schemas.py
@@ -131,6 +131,12 @@ class Option(enum.Flag):
     #: See https://docs.pydantic.dev/dev/api/fields/#pydantic.fields.Field
     USE_FIELD_ALIAS = enum.auto()
 
+    #: TypedDict marked with ``total=False`` are valid structures when a field is missing. When of the field is also
+    # optional, we need to have a way to distinguish between a `None` and a non-set field. With this option, the type
+    # of each field is extended with `string`. This way, clients can add markers (e.g., `__td_missing__`) to discern
+    # the two cases.
+    MARK_NON_TOTAL_TYPED_DICTS = enum.auto()
+
 
 JSON_OPTIONS = [opt for opt in Option if opt.name and opt.name.startswith("JSON_")]
 
@@ -1261,7 +1267,7 @@ def _record_field(self, py_field: tuple[str, Type]) -> RecordField:
         """Return an Avro record field object for a given TypedDict field"""
         aliases, actual_type = get_field_aliases_and_actual_type(py_field[1])
 
-        if not self.is_total:
+        if Option.MARK_NON_TOTAL_TYPED_DICTS in self.options and not self.is_total:
             # If a TypedDict is marked as total=None, it does not need to contain all the field. However, we need to
             # be able to distinguish between the fields that are missing from the ones that are present but set to None.
             # To do that, we extend the original type with str. We will later add a special string
diff --git a/tests/test_typed_dict.py b/tests/test_typed_dict.py
index 195c61a..3b73413 100644
--- a/tests/test_typed_dict.py
+++ b/tests/test_typed_dict.py
@@ -1,6 +1,7 @@
 from enum import StrEnum
 from typing import Annotated, TypedDict
 
+import py_avro_schema as pas
 from py_avro_schema._alias import Alias, register_type_alias
 from py_avro_schema._testing import assert_schema
 
@@ -109,4 +110,4 @@ class PyType(TypedDict, total=False):
             {"name": "opt", "type": [{"namedString": "Opt", "type": "string"}, "null"]},
         ],
     }
-    assert_schema(PyType, expected)
+    assert_schema(PyType, expected, options=pas.Option.MARK_NON_TOTAL_TYPED_DICTS)

From c514ae055c20b12e72bbcfe5fc93f89bef7b3841 Mon Sep 17 00:00:00 2001
From: Giovanni Grano <me@giograno.com>
Date: Fri, 5 Dec 2025 08:01:16 +0100
Subject: [PATCH 4/4] add str | None field in the test

---
 tests/test_typed_dict.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/test_typed_dict.py b/tests/test_typed_dict.py
index 3b73413..3e1248b 100644
--- a/tests/test_typed_dict.py
+++ b/tests/test_typed_dict.py
@@ -95,6 +95,7 @@ class Opt(StrEnum):
 
     class PyType(TypedDict, total=False):
         name: str
+        nickname: str | None
         age: int | None
         opt: Opt | None
 
@@ -106,6 +107,7 @@ class PyType(TypedDict, total=False):
                 "name": "name",
                 "type": "string",
             },
+            {"name": "nickname", "type": ["string", "null"]},
             {"name": "age", "type": ["long", "null", "string"]},
             {"name": "opt", "type": [{"namedString": "Opt", "type": "string"}, "null"]},
         ],