need unreduced kl loss for the metacontroller project

lucidrains · lucidrains · commit 7a4e13c4776a · 2026-01-25T11:14:27.000-08:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "vector-quantize-pytorch"
-version = "1.27.19"
+version = "1.27.20"
 description = "Vector Quantization - Pytorch"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }
diff --git a/vector_quantize_pytorch/binary_mapper.py b/vector_quantize_pytorch/binary_mapper.py
@@ -77,6 +77,7 @@ def forward(
         calc_aux_loss = None,
         deterministic = None,
         return_indices = False,
+        reduce_aux_kl_loss = True
     ):
         deterministic = default(deterministic, self.deterministic_on_eval and not self.training)
 
@@ -112,7 +113,14 @@ def forward(
             # calculate negative entropy
 
             kl_div = self.bits * NAT - binary_entropy(logits)
-            aux_kl_loss = F.relu(kl_div - self.kl_loss_threshold).mean()
+            aux_kl_loss = F.relu(kl_div - self.kl_loss_threshold)
+
+            # able to return unreduced kl loss, for use in another project (metacontroller)
+
+            if reduce_aux_kl_loss:
+                aux_kl_loss = aux_kl_loss.mean()
+            else:
+                aux_kl_loss = inverse_pack_lead_dims(aux_kl_loss, '*')
 
         # maybe straight through
 
@@ -150,11 +158,11 @@ def forward(
 
     logits = torch.randn(3, 4, 8)
 
-    sparse_one_hot, indices, aux_loss = binary_mapper(logits, return_indices = True)
+    sparse_one_hot, indices, aux_loss = binary_mapper(logits, return_indices = True, reduce_aux_kl_loss = False)
 
     assert sparse_one_hot.shape == (3, 4, 2 ** 8)
     assert indices.shape == (3, 4)
-    assert aux_loss.numel() == 1
+    assert aux_loss.shape == (3, 4)
 
     binary_mapper.eval()
     sparse_one_hot1, _ = binary_mapper(logits, deterministic = True)