update the in/out shapes

farhadrgh · farhadrgh · commit c14f4336c92e · 2025-04-02T18:29:40.000Z
Signed-off-by: Farhad Ramezanghorbani &lt;farhadr@nvidia.com&gt;
diff --git a/sub-packages/bionemo-evo2/tests/bionemo/evo2/test_hyena_operators.py b/sub-packages/bionemo-evo2/tests/bionemo/evo2/test_hyena_operators.py
@@ -68,14 +68,14 @@ def test_gpu_forward(self, operator: ParallelHyenaOperator):
         g = operator.num_groups
         dg = operator.group_dim
 
-        x1 = torch.ones((batch_size, seq_len, g, dg), device=device)
-        x2 = torch.ones((batch_size, seq_len, g, dg), device=device)
-        v = torch.ones((batch_size, seq_len, g, dg), device=device)
+        x1 = torch.ones((batch_size, (g * dg), seq_len), device=device)
+        x2 = torch.ones((batch_size, (g * dg), seq_len), device=device)
+        v = torch.ones((batch_size, (g * dg), seq_len), device=device)
 
         output = operator(x1, x2, v)
         assert output.shape[0] == batch_size
-        assert output.shape[1] == seq_len
-        assert output.shape[2] == operator.hidden_size
+        assert output.shape[1] == operator.hidden_size
+        assert output.shape[2] == seq_len
 
 
 class TestParallelShortHyenaOperator:
@@ -108,14 +108,14 @@ def test_gpu_forward(self, operator: ParallelShortHyenaOperator):
         g = operator.num_groups
         dg = operator.group_dim
 
-        x1 = torch.ones((batch_size, seq_len, g, dg), device=device)
-        x2 = torch.ones((batch_size, seq_len, g, dg), device=device)
-        v = torch.ones((batch_size, seq_len, g, dg), device=device)
+        x1 = torch.ones((batch_size, (g * dg), seq_len), device=device)
+        x2 = torch.ones((batch_size, (g * dg), seq_len), device=device)
+        v = torch.ones((batch_size, (g * dg), seq_len), device=device)
 
         output = operator(x1, x2, v)
         assert output.shape[0] == batch_size
-        assert output.shape[1] == seq_len
-        assert output.shape[2] == operator.hidden_size
+        assert output.shape[1] == operator.hidden_size
+        assert output.shape[2] == seq_len
 
 
 class TestParallelShortHyenaOperatorWithConvBias:
@@ -148,14 +148,14 @@ def test_gpu_forward(self, operator: ParallelShortHyenaOperator):
         g = operator.num_groups
         dg = operator.group_dim
 
-        x1 = torch.ones((batch_size, seq_len, g, dg), device=device)
-        x2 = torch.ones((batch_size, seq_len, g, dg), device=device)
-        v = torch.ones((batch_size, seq_len, g, dg), device=device)
+        x1 = torch.ones((batch_size, (g * dg), seq_len), device=device)
+        x2 = torch.ones((batch_size, (g * dg), seq_len), device=device)
+        v = torch.ones((batch_size, (g * dg), seq_len), device=device)
 
         output = operator(x1, x2, v)
         assert output.shape[0] == batch_size
-        assert output.shape[1] == seq_len
-        assert output.shape[2] == operator.hidden_size
+        assert output.shape[1] == operator.hidden_size
+        assert output.shape[2] == seq_len
 
 
 class TestParallelCausalDepthwiseConv1d: