Rename rope to rotary_position_embedding

voltjia · voltjia · commit c8ae164cf33d · 2025-05-19T13:57:38.000+08:00
diff --git a/compare_code_metrics.py b/compare_code_metrics.py
@@ -131,7 +131,7 @@ def _key_from_kernel_name(path, kernel_name):
         return str(path / f"{kernel_name}.py").removeprefix(str(_PARENT_PATH))[1:]
 
     data = {
-        f"{_BACKSLASH_CHAR}texttt{{{kernel_name.replace('scaled_dot_product_attention', 'sdpa').replace('_', f'{_BACKSLASH_CHAR}_')}}}": {
+        f"{_BACKSLASH_CHAR}texttt{{{kernel_name.replace('scaled_dot_product_attention', 'sdpa').replace('rotary_position_embedding', 'rope').replace('_', f'{_BACKSLASH_CHAR}_')}}}": {
             "Triton": {
                 metric_name: data[
                     _key_from_kernel_name(_TRITON_KERNELS_PATH, kernel_name)
diff --git a/infer.py b/infer.py
@@ -9,7 +9,7 @@
 from linear import Linear, bmm_backend
 from scaled_dot_product_attention import (
     Attention,
-    rope_backend,
+    rotary_position_embedding_backend,
     scaled_dot_product_attention_backend,
 )
 from silu import SiLU, silu_backend
@@ -94,7 +94,7 @@
     with (
         bmm_backend(backend),
         rms_norm_backend(backend),
-        rope_backend(backend),
+        rotary_position_embedding_backend(backend),
         scaled_dot_product_attention_backend(backend),
         silu_backend(backend),
     ):
diff --git a/ops/ninetoothed/kernels/rotary_position_embedding.py b/ops/ninetoothed/kernels/rotary_position_embedding.py
diff --git a/ops/ninetoothed/torch.py b/ops/ninetoothed/torch.py
@@ -9,7 +9,7 @@
 import ops.ninetoothed.kernels.fused_rms_norm
 import ops.ninetoothed.kernels.mm
 import ops.ninetoothed.kernels.rms_norm
-import ops.ninetoothed.kernels.rope
+import ops.ninetoothed.kernels.rotary_position_embedding
 import ops.ninetoothed.kernels.scaled_dot_product_attention
 import ops.ninetoothed.kernels.silu
 import ops.ninetoothed.kernels.softmax
@@ -92,14 +92,16 @@ def rms_norm(input, eps=None):
     return output
 
 
-def rope(input, sin_table, cos_table, interleaved=True):
+def rotary_position_embedding(input, sin_table, cos_table, interleaved=True):
     batch_size, _, num_heads, _ = input.shape
 
     output = input.clone()
     sin_table = sin_table[None, :, None, :].expand(batch_size, -1, num_heads, -1)
     cos_table = cos_table[None, :, None, :].expand(batch_size, -1, num_heads, -1)
 
-    ops.ninetoothed.kernels.rope.kernel(output, sin_table, cos_table, interleaved)
+    ops.ninetoothed.kernels.rotary_position_embedding.kernel(
+        output, sin_table, cos_table, interleaved
+    )
 
     return output
 
diff --git a/ops/triton/kernels/rotary_position_embedding.py b/ops/triton/kernels/rotary_position_embedding.py
diff --git a/ops/triton/torch.py b/ops/triton/torch.py
@@ -10,7 +10,7 @@
 import ops.triton.kernels.fused_rms_norm
 import ops.triton.kernels.mm
 import ops.triton.kernels.rms_norm
-import ops.triton.kernels.rope
+import ops.triton.kernels.rotary_position_embedding
 import ops.triton.kernels.scaled_dot_product_attention
 import ops.triton.kernels.silu
 import ops.triton.kernels.softmax
@@ -195,14 +195,16 @@ def rms_norm(input, eps=None):
     return output
 
 
-def rope(input, sin_table, cos_table, interleaved=True):
+def rotary_position_embedding(input, sin_table, cos_table, interleaved=True):
     batch_size, seq_len, num_heads, emb_dim = input.shape
 
     BLOCK_SIZE = triton.next_power_of_2(emb_dim // 2)
 
     output = input.clone()
 
-    ops.triton.kernels.rope.kernel[(batch_size, seq_len, num_heads)](
+    ops.triton.kernels.rotary_position_embedding.kernel[
+        (batch_size, seq_len, num_heads)
+    ](
         output,
         sin_table,
         cos_table,
diff --git a/rotary_position_embedding.py b/rotary_position_embedding.py
@@ -5,7 +5,7 @@
 import ops.triton.torch
 
 
-def torch_rope(input, sin_table, cos_table, interleaved=True):
+def torch_rotary_position_embedding(input, sin_table, cos_table, interleaved=True):
     batch_size, seq_len, num_heads, emb_dim = input.shape
 
     assert emb_dim % 2 == 0, "The embedding dimension must be even."
@@ -55,11 +55,15 @@ def _generate_sin_and_cos_tables(
     sin_table, cos_table = _generate_sin_and_cos_tables(seq_len, emb_dim)
     x = torch.randn(batch_size, seq_len, num_heads, emb_dim, dtype=dtype, device=device)
 
-    ninetoothed_output = ops.ninetoothed.torch.rope(
+    ninetoothed_output = ops.ninetoothed.torch.rotary_position_embedding(
+        x, sin_table, cos_table, interleaved=False
+    )
+    torch_output = torch_rotary_position_embedding(
+        x, sin_table, cos_table, interleaved=False
+    )
+    triton_output = ops.triton.torch.rotary_position_embedding(
         x, sin_table, cos_table, interleaved=False
     )
-    torch_output = torch_rope(x, sin_table, cos_table, interleaved=False)
-    triton_output = ops.triton.torch.rope(x, sin_table, cos_table, interleaved=False)
 
     print(ninetoothed_output)
     print(torch_output)
@@ -83,7 +87,7 @@ def _generate_sin_and_cos_tables(
             line_names=["NineToothed", "PyTorch", "Triton"],
             styles=[("blue", "-"), ("green", "-"), ("orange", "-")],
             ylabel="ms",
-            plot_name="rope-performance",
+            plot_name="rotary_position_embedding-performance",
             args={},
         )
     )
@@ -98,13 +102,19 @@ def benchmark(seq_len, provider):
 
         if provider == "ninetoothed":
             ms = triton.testing.do_bench(
-                lambda: ops.ninetoothed.torch.rope(x, sin_table, cos_table)
+                lambda: ops.ninetoothed.torch.rotary_position_embedding(
+                    x, sin_table, cos_table
+                )
             )
         elif provider == "torch":
-            ms = triton.testing.do_bench(lambda: torch_rope(x, sin_table, cos_table))
+            ms = triton.testing.do_bench(
+                lambda: torch_rotary_position_embedding(x, sin_table, cos_table)
+            )
         elif provider == "triton":
             ms = triton.testing.do_bench(
-                lambda: ops.triton.torch.rope(x, sin_table, cos_table)
+                lambda: ops.triton.torch.rotary_position_embedding(
+                    x, sin_table, cos_table
+                )
             )
 
         return ms
diff --git a/scaled_dot_product_attention.py b/scaled_dot_product_attention.py
@@ -8,13 +8,13 @@
 
 import ops.ninetoothed.torch
 import ops.triton.torch
-from rope import torch_rope
+from rotary_position_embedding import torch_rotary_position_embedding
 
 
 class Attention(nn.Module):
     scaled_dot_product_attention = None
 
-    rope = None
+    rotary_position_embedding = None
 
     def __init__(self, other):
         super().__init__()
@@ -41,8 +41,12 @@ def forward(
         sin_table = sin_table[0]
         cos_table = cos_table[0]
 
-        query_states = type(self).rope(query_states, sin_table, cos_table)
-        key_states = type(self).rope(key_states, sin_table, cos_table)
+        query_states = type(self).rotary_position_embedding(
+            query_states, sin_table, cos_table
+        )
+        key_states = type(self).rotary_position_embedding(
+            key_states, sin_table, cos_table
+        )
 
         query_states = query_states.transpose(1, 2)
         key_states = key_states.transpose(1, 2)
@@ -94,24 +98,24 @@ def scaled_dot_product_attention_backend(backend_name):
 
 
 @contextmanager
-def rope_backend(backend_name):
-    _prev_impl = Attention.rope
+def rotary_position_embedding_backend(backend_name):
+    _prev_impl = Attention.rotary_position_embedding
 
     if backend_name == "ninetoothed":
-        impl = ops.ninetoothed.torch.rope
+        impl = ops.ninetoothed.torch.rotary_position_embedding
     elif backend_name == "triton":
-        impl = ops.triton.torch.rope
+        impl = ops.triton.torch.rotary_position_embedding
     elif backend_name == "torch":
-        impl = torch_rope
+        impl = torch_rotary_position_embedding
     else:
         raise ValueError(f"unknown backend: `{backend_name}`")
 
-    Attention.rope = impl
+    Attention.rotary_position_embedding = impl
 
     try:
         yield
     finally:
-        Attention.rope = _prev_impl
+        Attention.rotary_position_embedding = _prev_impl
 
 
 if __name__ == "__main__":