Finish switching to offset and shape

ValerianRey · ValerianRey · commit 6ada56495e03 · 2025-12-03T15:55:21.000+01:00
diff --git a/src/torchjd/sparse/_aten_function_overrides/backward.py b/src/torchjd/sparse/_aten_function_overrides/backward.py
@@ -11,7 +11,7 @@ def threshold_backward_default(
     new_physical = aten.threshold_backward.default(grad_output.physical, self, threshold)
 
     return SparseLatticedTensor(
-        new_physical, grad_output.basis, grad_output.offset, grad_output.size
+        new_physical, grad_output.basis, grad_output.offset, grad_output.shape_t
     )
 
 
@@ -27,7 +27,7 @@ def hardtanh_backward_default(
 
     new_physical = aten.hardtanh_backward.default(grad_output.physical, self, min_val, max_val)
     return SparseLatticedTensor(
-        new_physical, grad_output.basis, grad_output.offset, grad_output.size
+        new_physical, grad_output.basis, grad_output.offset, grad_output.shape_t
     )
 
 
@@ -38,5 +38,5 @@ def hardswish_backward_default(grad_output: SparseLatticedTensor, self: Tensor):
 
     new_physical = aten.hardswish_backward.default(grad_output.physical, self)
     return SparseLatticedTensor(
-        new_physical, grad_output.basis, grad_output.offset, grad_output.size
+        new_physical, grad_output.basis, grad_output.offset, grad_output.shape_t
     )
diff --git a/src/torchjd/sparse/_aten_function_overrides/einsum.py b/src/torchjd/sparse/_aten_function_overrides/einsum.py
@@ -165,7 +165,7 @@ def mul_Tensor(t1: Tensor | int | float, t2: Tensor | int | float) -> Tensor:
 @impl(aten.div.Tensor)
 def div_Tensor(t1: Tensor | int | float, t2: Tensor | int | float) -> Tensor:
     t1_, t2_ = prepare_for_elementwise_op(t1, t2)
-    t2_ = SparseLatticedTensor(1.0 / t2_.physical, t2_.basis, t2_.offset, t2_.size)
+    t2_ = SparseLatticedTensor(1.0 / t2_.physical, t2_.basis, t2_.offset, t2_.shape_t)
     all_dims = list(range(t1_.ndim))
     return einsum((t1_, all_dims), (t2_, all_dims), output=all_dims)
 
@@ -177,7 +177,7 @@ def mul_Scalar(t: SparseLatticedTensor, scalar) -> SparseLatticedTensor:
 
     assert isinstance(t, SparseLatticedTensor)
     new_physical = aten.mul.Scalar(t.physical, scalar)
-    return SparseLatticedTensor(new_physical, t.basis, t.offset, t.size)
+    return SparseLatticedTensor(new_physical, t.basis, t.offset, t.shape_t)
 
 
 @impl(aten.add.Tensor)
@@ -189,10 +189,10 @@ def add_Tensor(
     if (
         torch.equal(t1_.basis, t2_.basis)
         and torch.equal(t1_.offset, t2_.offset)
-        and torch.equal(t1_.size, t2_.size)
+        and torch.equal(t1_.shape_t, t2_.shape_t)
     ):
         new_physical = t1_.physical + t2_.physical * alpha
-        return SparseLatticedTensor(new_physical, t1_.basis, t1_.offset, t1_.size)
+        return SparseLatticedTensor(new_physical, t1_.basis, t1_.offset, t1_.shape_t)
     else:
         raise NotImplementedError()
 
diff --git a/src/torchjd/sparse/_aten_function_overrides/shape.py b/src/torchjd/sparse/_aten_function_overrides/shape.py
@@ -1,7 +1,6 @@
 import operator
 from itertools import accumulate
 from math import prod
-from typing import cast
 
 import torch
 from torch import Tensor, arange, cat, tensor
@@ -41,6 +40,9 @@ def view_default(t: SparseLatticedTensor, shape: list[int]) -> Tensor:
 
     assert isinstance(t, SparseLatticedTensor)
 
+    if not torch.equal(t.padding, torch.zeros_like(t.padding)):
+        raise NotImplementedError()
+
     shape = infer_shape(shape, t.numel())
 
     if prod(shape) != t.numel():
@@ -51,7 +53,9 @@ def view_default(t: SparseLatticedTensor, shape: list[int]) -> Tensor:
     c = _reverse_cumulative_product(vshape)
     c_prime = _reverse_cumulative_product(shape)
     new_basis = ((c @ S).unsqueeze(0) // c_prime.unsqueeze(1)) % tensor(shape).unsqueeze(1)
-    return to_most_efficient_tensor(t.physical, new_basis)
+
+    new_offset = torch.zeros(len(shape), dtype=torch.int64)
+    return to_most_efficient_tensor(t.physical, new_basis, new_offset, shape)
 
 
 def _reverse_cumulative_product(values: list[int]) -> Tensor:
@@ -87,7 +91,7 @@ def unsqueeze_default(t: SparseLatticedTensor, dim: int) -> SparseLatticedTensor
     pdims = t.basis.shape[1]
     new_basis = cat([t.basis[:dim], torch.zeros(1, pdims, dtype=torch.int64), t.basis[dim:]])
     new_offset = cat([t.offset[:dim], torch.zeros(1, dtype=torch.int64), t.offset[dim:]])
-    new_size = cat([t.size[:dim], torch.zeros(1, dtype=torch.int64), t.size[dim:]])
+    new_size = cat([t.shape_t[:dim], torch.ones(1, dtype=torch.int64), t.shape_t[dim:]])
     return SparseLatticedTensor(t.physical, new_basis, new_offset, new_size)
 
 
@@ -106,15 +110,15 @@ def squeeze_dims(t: SparseLatticedTensor, dims: list[int] | int | None) -> Tenso
     is_row_kept = [i not in excluded for i in range(t.ndim)]
     new_basis = t.basis[is_row_kept]
     new_offset = t.offset[is_row_kept]
-    new_size = t.size[is_row_kept]
+    new_size = t.shape_t[is_row_kept]
     return to_most_efficient_tensor(t.physical, new_basis, new_offset, new_size)
 
 
 @impl(aten.permute.default)
 def permute_default(t: SparseLatticedTensor, dims: list[int]) -> SparseLatticedTensor:
     new_basis = t.basis[dims]
     new_offset = t.offset[dims]
-    new_size = t.size[dims]
+    new_size = t.shape_t[dims]
     return SparseLatticedTensor(t.physical, new_basis, new_offset, new_size)
 
 
@@ -124,56 +128,10 @@ def cat_default(tensors: list[Tensor], dim: int = 0) -> Tensor:
         print_fallback(aten.cat.default, (tensors, dim), {})
         return aten.cat.default([unwrap_to_dense(t) for t in tensors])
 
-    tensors_ = [cast(SparseLatticedTensor, t) for t in tensors]
-    ref_tensor = tensors_[0]
-    ref_basis = ref_tensor.basis
-    if any(not torch.equal(t.basis, ref_basis) for t in tensors_[1:]):
-        raise NotImplementedError(
-            "Override for aten.cat.default does not support SLTs that do not all have the same "
-            f"basis. Found the following tensors:\n{[repr(t) for t in tensors_]} and the following "
-            f"dim: {dim}."
-        )
-    if any(t.physical.shape != ref_tensor.physical.shape for t in tensors_[1:]):
-        # This can happen in the following example:
-        # t1 = SLT([1 2 3], [[2]])
-        # t2 = SLT([4 5 6 7], [[2]])
-        # The expected result would be 1 0 2 0 3 4 0 5 0 6 0 7, but this is not representable
-        # efficiently as an SLT (because there is no 0 between 3 and 4, and both physicals have a
-        # different shape so we can't just stack them).
-
-        # TODO: Maybe a partial densify is possible rather than a full densify.
-        print_fallback(aten.cat.default, (tensors, dim), {})
-        return aten.cat.default([unwrap_to_dense(t) for t in tensors])
-
-    # We need to try to find the (pretty sure it either does not exist or is unique) physical
-    # dimension that makes us only move on virtual dimension dim. It also needs to be such that
-    # traversing it entirely brings us exactly to the end of virtual dimension dim.
-
-    ref_virtual_dim_size = ref_tensor.shape[dim]
-    indices = torch.argwhere(
-        torch.eq(ref_basis[dim] * tensor(ref_tensor.physical.shape), ref_virtual_dim_size)
-        & torch.eq(ref_basis.sum(dim=0) * tensor(ref_tensor.physical.shape), ref_virtual_dim_size)
-    )
-    assert len(indices) <= 1
-
-    if len(indices) == 0:
-        # Add a physical dimension pdim on which we can concatenate the physicals such that this
-        # translates into a concatenation of the virtuals on virtual dimension dim.
-
-        pdim = ref_tensor.physical.ndim
-        physicals = [t.physical.unsqueeze(-1) for t in tensors_]
-        new_basis_vector = torch.zeros(ref_tensor.ndim, 1, dtype=torch.int64)
-        new_basis_vector[dim, 0] = ref_virtual_dim_size
-        new_basis = torch.concatenate([ref_tensor.basis, new_basis_vector], dim=1)
-    else:
-        # Such a physical dimension already exists. Note that an alternative implementation would be
-        # to simply always add the physical dimension, and squash it if it ends up being not needed.
-        physicals = [t.physical for t in tensors_]
-        pdim = cast(int, indices[0, 0].item())
-        new_basis = ref_tensor.basis
+    print_fallback(aten.cat.default, (tensors, dim), {})
+    return aten.cat.default([unwrap_to_dense(t) for t in tensors])
 
-    new_physical = aten.cat.default(physicals, dim=pdim)
-    return SparseLatticedTensor(new_physical, new_basis)
+    # TODO: add implementation based on adding some margin to tensors and summing them
 
 
 @impl(aten.expand.default)
@@ -190,7 +148,7 @@ def expand_default(t: SparseLatticedTensor, sizes: list[int]) -> SparseLatticedT
     # Try to expand each dimension to its new size
     new_physical = t.physical
     new_basis = t.basis
-    new_sizes = t.size
+    new_sizes = t.shape_t
     for d, (v, orig_size, new_size) in enumerate(zip(t.basis, t.shape, sizes, strict=True)):
         if v.sum() > 0 and orig_size != new_size and new_size != -1:
             raise ValueError(
diff --git a/src/torchjd/sparse/_sparse_latticed_tensor.py b/src/torchjd/sparse/_sparse_latticed_tensor.py
@@ -17,7 +17,7 @@ def __new__(
         physical: Tensor,
         basis: Tensor,
         offset: Tensor | None = None,
-        size: list[int] | tuple[int, ...] | torch.Size | Tensor | None = None,
+        shape: list[int] | tuple[int, ...] | torch.Size | Tensor | None = None,
     ):
         assert basis.dtype == torch.int64
 
@@ -31,20 +31,20 @@ def __new__(
         # (which is bad!)
         assert not physical.requires_grad or not torch.is_grad_enabled()
 
-        if size is None:
+        if shape is None:
             pshape = tensor(physical.shape, dtype=torch.int64)
-            size = basis @ (pshape - 1) + 1
+            shape = basis @ (pshape - 1) + 1
 
         return Tensor._make_wrapper_subclass(
-            cls, list(size), dtype=physical.dtype, device=physical.device
+            cls, list(shape), dtype=physical.dtype, device=physical.device
         )
 
     def __init__(
         self,
         physical: Tensor,
         basis: Tensor,
         offset: Tensor | None,
-        size: list[int] | tuple[int, ...] | torch.Size | Tensor | None,
+        shape: list[int] | tuple[int, ...] | torch.Size | Tensor | None,
     ):
         """
         This constructor is made for specifying physical and basis exactly. It should not modify
@@ -58,15 +58,15 @@ def __init__(
             the linear transformation between an index in the physical tensor and the corresponding
             index in the virtual tensor, i.e. v_index = basis @ p_index + offset.
         :param offset: Offset for the virtual index, i.e. v_index = basis @ p_index + offset.
-        :param size: Size of the sparse tensor. If not provided, the size will be inferred as the
+        :param shape: Size of the sparse tensor. If not provided, the size will be inferred as the
             minimum size big enough to hold all non-zero elements.
 
         # TODO: make a nicer interface where it's possible to provide lists or sizes instead of
             always having to provide int tensors
         """
 
         if offset is None:
-            offset = torch.zeros(len(self.shape))
+            offset = torch.zeros(len(self.shape), dtype=torch.int64)
 
         if any(s == 1 for s in physical.shape):
             raise ValueError(
@@ -95,7 +95,16 @@ def __init__(
         self.physical = physical
         self.basis = basis
         self.offset = offset
-        self.size = tensor(size, dtype=torch.int64)
+
+        if shape is None:
+            pshape = tensor(physical.shape, dtype=torch.int64)
+            shape = basis @ (pshape - 1) + 1
+        if isinstance(shape, torch.Tensor):
+            self.shape_t = shape
+        else:
+            self.shape_t = tensor(shape, dtype=torch.int64)
+
+        self.pshape_t = tensor(physical.shape, dtype=torch.int64)
 
     def to_dense(
         self, dtype: torch.dtype | None = None, *, masked_grad: bool | None = None
@@ -110,7 +119,9 @@ def to_dense(
         p_indices_grid = stack(meshgrid(*p_index_ranges, indexing="ij"))
 
         # addmm_cuda not implemented for Long tensors => gotta have these tensors on cpu
-        v_indices_grid = tensordot(self.basis, p_indices_grid, dims=1) + self.offset
+        reshaped_offset = self.offset.reshape([-1] + [1] * self.physical.ndim)
+        v_indices_grid = tensordot(self.basis, p_indices_grid, dims=1) + reshaped_offset
+        # v_indices_grid is of shape [n_virtual_dims] + physical_shape
         res = zeros(self.shape, device=self.device, dtype=self.dtype)
         res[tuple(v_indices_grid)] = self.physical
         return res
@@ -128,7 +139,7 @@ def __torch_dispatch__(cls, func, types, args=(), kwargs=None):
         return func(*unwrapped_args, **unwrapped_kwargs)
 
     def __repr__(self, *, tensor_contents=None) -> str:
-        return f"SparseLatticedTensor(physical={self.physical}, basis={self.basis}, offset={self.offset}, size={self.size})"
+        return f"SparseLatticedTensor(physical={self.physical}, basis={self.basis}, offset={self.offset}, size={self.shape_t})"
 
     @classmethod
     def implements(cls, torch_function):
@@ -141,6 +152,61 @@ def decorator(func):
 
         return decorator
 
+    @property
+    def start_padding(self) -> Tensor:
+        """
+        Returns the number of zeros of padding at the start of each virtual dimension.
+
+        The result is an int tensor of shape [virtual_ndim].
+        """
+
+        return self.offset
+
+    @property
+    def end_padding(self) -> Tensor:
+        """
+        Returns the number of zeros of padding at the end of each virtual dimension.
+
+        The result is an int tensor of shape [virtual_ndim].
+        """
+
+        return self.shape_t - self.physical_image_size - self.offset
+
+    @property
+    def padding(self) -> Tensor:
+        """
+        Returns the number of zeros of padding at the start and end of each virtual dimension.
+
+        The result is an int tensor of shape [virtual_ndim, 2].
+        """
+
+        return torch.stack([self.start_padding, self.end_padding], dim=1)
+
+    @property
+    def min_natural_virtual_indices(self) -> Tensor:
+        # Basis where each positive element is replaced by 0
+        non_positive_basis = torch.min(self.basis, torch.zeros_like(self.basis))
+        max_physical_index = self.pshape_t - 1
+        return (non_positive_basis * max_physical_index.unsqueeze(0)).sum(dim=1)
+
+    @property
+    def max_natural_virtual_indices(self) -> Tensor:
+        # Basis where each negative element is replaced by 0
+        non_negative = torch.max(self.basis, torch.zeros_like(self.basis))
+        max_physical_index = self.pshape_t - 1
+        return (non_negative * max_physical_index.unsqueeze(0)).sum(dim=1)
+
+    @property
+    def physical_image_size(self) -> Tensor:
+        """
+        Returns the shape of the image of the physical through the basis transform.
+
+        The result is an int tensor of shape [virtual_ndim].
+        """
+
+        one = torch.ones(self.ndim, dtype=torch.int64)
+        return self.max_natural_virtual_indices - self.min_natural_virtual_indices + one
+
 
 impl = SparseLatticedTensor.implements
 
diff --git a/tests/unit/sparse/test_sparse_latticed_tensor.py b/tests/unit/sparse/test_sparse_latticed_tensor.py

Original file line number	Diff line number	Diff line change
`@@ -11,7 +11,7 @@ def threshold_backward_default(`
`11`	`11`	`new_physical = aten.threshold_backward.default(grad_output.physical, self, threshold)`
`12`	`12`
`13`	`13`	`return SparseLatticedTensor(`
`14`		`- new_physical, grad_output.basis, grad_output.offset, grad_output.size`
	`14`	`+ new_physical, grad_output.basis, grad_output.offset, grad_output.shape_t`
`15`	`15`	`)`
`16`	`16`
`17`	`17`
`@@ -27,7 +27,7 @@ def hardtanh_backward_default(`
`27`	`27`
`28`	`28`	`new_physical = aten.hardtanh_backward.default(grad_output.physical, self, min_val, max_val)`
`29`	`29`	`return SparseLatticedTensor(`
`30`		`- new_physical, grad_output.basis, grad_output.offset, grad_output.size`
	`30`	`+ new_physical, grad_output.basis, grad_output.offset, grad_output.shape_t`
`31`	`31`	`)`
`32`	`32`
`33`	`33`
`@@ -38,5 +38,5 @@ def hardswish_backward_default(grad_output: SparseLatticedTensor, self: Tensor):`
`38`	`38`
`39`	`39`	`new_physical = aten.hardswish_backward.default(grad_output.physical, self)`
`40`	`40`	`return SparseLatticedTensor(`
`41`		`- new_physical, grad_output.basis, grad_output.offset, grad_output.size`
	`41`	`+ new_physical, grad_output.basis, grad_output.offset, grad_output.shape_t`
`42`	`42`	`)`