@@ -147,6 +147,10 @@ class Option(enum.Flag):
147147 #: Add an __id field to records to track the id of mutable objects
148148 ADD_REFERENCE_ID = enum .auto ()
149149
150+ # Use deterministic default values for volatile types like datetime or strings like UUIDs. Non-deterministic
151+ # factories might a problem when comparing schemas, as they change every time a schema is generated by definition.
152+ DETERMINISTIC_DEFAULTS = enum .auto ()
153+
150154
151155JSON_OPTIONS = [opt for opt in Option if opt .name and opt .name .startswith ("JSON_" )]
152156
@@ -221,6 +225,7 @@ def _schema_obj(py_type: Type, namespace: Optional[str] = None, options: Option
221225
222226# See https://avro.apache.org/docs/1.11.1/specification/#names
223227_AVRO_NAME_PATTERN = re .compile (r"^[A-Za-z]([A-Za-z0-9_])*$" )
228+ _UUID_PATTERN = re .compile (r"^[0-9a-f]{8}(?:-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})?$" , re .IGNORECASE )
224229
225230
226231def validate_name (value : str ) -> str :
@@ -487,6 +492,8 @@ def data(self, names: NamesType) -> JSONObj:
487492
488493 def make_default (self , py_default : datetime .date ) -> int :
489494 """Return an Avro schema compliant default value for a given Python value"""
495+ if Option .DETERMINISTIC_DEFAULTS in self .options :
496+ return 0
490497 return (py_default - datetime .date (1970 , 1 , 1 )).days
491498
492499
@@ -510,6 +517,8 @@ def data(self, names: NamesType) -> JSONObj:
510517
511518 def make_default (self , py_default : datetime .time ) -> int :
512519 """Return an Avro schema compliant default value for a given Python value"""
520+ if Option .DETERMINISTIC_DEFAULTS in self .options :
521+ return 0
513522 # Force UTC as we're concerned only about time diffs
514523 dt1 = datetime .datetime (1 , 1 , 1 , tzinfo = datetime .timezone .utc )
515524 dt2 = datetime .datetime .combine (datetime .datetime (1 , 1 , 1 ), py_default , tzinfo = datetime .timezone .utc )
@@ -532,6 +541,8 @@ def data(self, names: NamesType) -> JSONObj:
532541
533542 def make_default (self , py_default : datetime .datetime ) -> int :
534543 """Return an Avro schema compliant default value for a given Python value"""
544+ if Option .DETERMINISTIC_DEFAULTS in self .options :
545+ return 0
535546 if not py_default .tzinfo :
536547 raise TypeError (f"Default { py_default !r} must be timezone-aware" )
537548 return int ((py_default - datetime .datetime .fromtimestamp (0 , tz = datetime .timezone .utc )).total_seconds () * 1e6 )
@@ -1079,7 +1090,18 @@ def data(self, names: NamesType) -> JSONObj:
10791090 if self .aliases :
10801091 field_data ["aliases" ] = sorted (self .aliases )
10811092 if self .default != dataclasses .MISSING :
1082- field_data ["default" ] = self .schema .make_default (self .default )
1093+ default_value = self .schema .make_default (self .default )
1094+
1095+ # When a field is a string, but it's default value produces a UUID-like, we do not pass through the UUID
1096+ # schema (which already sets an empty default). We need to catch here the strings that look like a UUID
1097+ # and set a deterministic default.
1098+ if (
1099+ Option .DETERMINISTIC_DEFAULTS in self .options
1100+ and isinstance (default_value , str )
1101+ and _UUID_PATTERN .match (default_value )
1102+ ):
1103+ default_value = ""
1104+ field_data ["default" ] = default_value
10831105 if self .docs and Option .NO_DOC not in self .options :
10841106 field_data ["doc" ] = self .docs
10851107 return field_data
0 commit comments