SqueezeBits
diff --git a/‎max/python/max/dtype/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎max/python/max/dtype/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎max/python/max/dtype/dtype_extension.py‎
Lines changed: 56 additions & 0 deletions b/‎max/python/max/dtype/dtype_extension.py‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎max/python/max/nn/norm/group_norm.py‎
Lines changed: 3 additions & 2 deletions b/‎max/python/max/nn/norm/group_norm.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎max/python/max/nn/norm/layer_norm.py‎
Lines changed: 32 additions & 13 deletions b/‎max/python/max/nn/norm/layer_norm.py‎
Lines changed: 32 additions & 13 deletions
diff --git a/‎max/python/max/pipelines/architectures/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎max/python/max/pipelines/architectures/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎max/python/max/pipelines/architectures/autoencoder_kl/__init__.py‎
Lines changed: 14 additions & 0 deletions b/‎max/python/max/pipelines/architectures/autoencoder_kl/__init__.py‎
Lines changed: 14 additions & 0 deletions
@@ -11,4 +11,5 @@
 # limitations under the License.
 # ===----------------------------------------------------------------------=== #
 
+from . import dtype_extension
 from .dtype import DType
@@ -0,0 +1,56 @@
+# ===----------------------------------------------------------------------=== #
+# Copyright (c) 2025, Modular Inc. All rights reserved.
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions:
+# https://llvm.org/LICENSE.txt
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===----------------------------------------------------------------------=== #
+
+"""Extension for max.dtype to support additional attributes."""
+
+from numpy import finfo as np_finfo
+
+from .dtype import DType
+
+
+class finfo:
+    """A numerical properties of a floating point max.dtype.DType.
+
+    This class mimics torch.finfo behavior without torch dependency,
+    including support for bfloat16.
+
+    NOTE: Currently, it's applied through patching.
+    This extension is better to be implemented in dtype library itself.
+    """
+
+    def __init__(self, dtype: DType):
+        """Initialize finfo for a given max.dtype.DType.
+
+        Args:
+            dtype: The data type to get limits for.
+        """
+        if dtype == DType.bfloat16:
+            self.min = -3.38953e38
+            self.max = 3.38953e38
+            self.bits = 16
+            self.eps = 0.0078125
+            self.resolution = 0.01
+            self.tiny = 1.17549e-38
+            self.dtype = "bfloat16"
+        else:
+            np_finfo_obj = np_finfo(dtype.to_numpy())
+            self.min = float(np_finfo_obj.min)
+            self.max = float(np_finfo_obj.max)
+            self.bits = np_finfo_obj.bits
+            self.eps = float(np_finfo_obj.eps)
+            self.resolution = float(np_finfo_obj.resolution)
+            self.tiny = float(np_finfo_obj.tiny)
+            self.dtype = str(np_finfo_obj.dtype)
+
+
+DType.finfo = finfo
@@ -45,6 +45,7 @@ def __init__(
         eps: float = 1e-5,
         affine: bool = True,
         device: DeviceRef = DeviceRef.GPU(),
+        dtype: DType = DType.float32,
     ) -> None:
         super().__init__()
         self.num_groups = num_groups
@@ -65,13 +66,13 @@ def __init__(
             self.weight = Weight(
                 name="weight",
                 shape=(self.num_channels,),
-                dtype=DType.float32,
+                dtype=dtype,
                 device=device,
             )
             self.bias = Weight(
                 name="bias",
                 shape=(self.num_channels,),
-                dtype=DType.float32,
+                dtype=dtype,
                 device=device,
             )
 
 
@@ -36,37 +36,56 @@ def __init__(
         dtype: DType,
         eps: float = 1e-5,
         use_bias: bool = True,
+        keep_dtype: bool = False,
+        elementwise_affine: bool = True,
     ) -> None:
         super().__init__()
         self.devices = devices
-        self.weight = Weight("weight", dtype, (dims,), device=self.devices[0])
-        self.bias = (
-            Weight("bias", dtype, (dims,), device=self.devices[0])
-            if use_bias
-            else None
-        )
+        if elementwise_affine:
+            self.weight = Weight(
+                "weight", dtype, (dims,), device=self.devices[0]
+            )
+            self.bias = (
+                Weight("bias", dtype, (dims,), device=self.devices[0])
+                if use_bias
+                else None
+            )
+        else:
+            self.weight = None
+            self.bias = None
         self.eps = eps
         self.dim = dims
         self.dtype = dtype
+        self.keep_dtype = keep_dtype
         self._sharding_strategy: ShardingStrategy | None = None
 
     def __call__(self, input: TensorValue):
         # TODO: AIPIPE-95 Replace with a broadcasting rmo.layer_norm
         bias = (
-            ops.cast(self.bias, DType.float32)
+            self.bias
             if self.bias
             # If bias wasn't passed then use bias-less layer norm (beta = 0).
             else ops.broadcast_to(
-                ops.constant(0.0, DType.float32, self.weight.device),
+                ops.constant(0.0, self.dtype, input.device),
+                shape=(input.shape[-1],),
+            )
+        )
+        gamma = (
+            self.weight
+            if self.weight
+            else ops.broadcast_to(
+                ops.constant(1.0, self.dtype, input.device),
                 shape=(input.shape[-1],),
             )
         )
-        return ops.layer_norm(
-            input.cast(DType.float32),
-            gamma=ops.cast(self.weight, DType.float32),
-            beta=bias,
+
+        output = ops.layer_norm(
+            input=input if self.keep_dtype else input.cast(DType.float32),
+            gamma=gamma if self.keep_dtype else ops.cast(gamma, DType.float32),
+            beta=bias if self.keep_dtype else ops.cast(bias, DType.float32),
             epsilon=self.eps,
-        ).cast(input.dtype)
+        )
+        return output if self.keep_dtype else output.cast(input.dtype)
 
     @property
     def sharding_strategy(self) -> ShardingStrategy | None:
 
@@ -28,6 +28,7 @@ def register_all_models() -> None:
     from .deepseekV3 import deepseekV3_arch
     from .eagle_llama3 import eagle_llama_arch
     from .exaone import exaone_arch
+    from .flux1 import flux1_arch
     from .gemma3 import gemma3_arch
     from .gemma3multimodal import gemma3_multimodal_arch
     from .gpt_oss import gpt_oss_arch
@@ -54,6 +55,7 @@ def register_all_models() -> None:
         deepseekV2_arch,
         deepseekV3_arch,
         eagle_llama_arch,
+        flux1_arch,
         gemma3_arch,
         gemma3_multimodal_arch,
         granite_arch,
 
@@ -0,0 +1,14 @@
+# ===----------------------------------------------------------------------=== #
+# Copyright (c) 2025, Modular Inc. All rights reserved.
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions:
+# https://llvm.org/LICENSE.txt
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ===----------------------------------------------------------------------=== #
+
+from .model import AutoencoderKLModel