Change bias initialization from 'embed' to 'heads'

csgoogle · web-flow · commit 33164c677100 · 2026-04-06T15:39:16.000+05:30
Fix the bias sharding axis, it should be output axis instead of input one.
diff --git a/src/maxdiffusion/models/attention_flax.py b/src/maxdiffusion/models/attention_flax.py
@@ -979,7 +979,7 @@ def __init__(
         precision=precision,
         bias_init=nnx.with_partitioning(
             nnx.initializers.zeros,
-            ("embed",),
+            ("heads",),
         ),
     )
 
@@ -993,7 +993,7 @@ def __init__(
         precision=precision,
         bias_init=nnx.with_partitioning(
             nnx.initializers.zeros,
-            ("embed",),
+            ("heads",),
         ),
     )
 
@@ -1007,7 +1007,7 @@ def __init__(
         precision=precision,
         bias_init=nnx.with_partitioning(
             nnx.initializers.zeros,
-            ("embed",),
+            ("heads",),
         ),
     )
 
@@ -1021,7 +1021,7 @@ def __init__(
         precision=precision,
         bias_init=nnx.with_partitioning(
             nnx.initializers.zeros,
-            ("heads",),
+            ("embed",),
         ),
     )