diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
new file mode 100644
index 00000000..115fd96e
--- /dev/null
+++ b/.github/pull_request_template.md
@@ -0,0 +1,59 @@
+## Important: Read before submitting
+
+> **New contributors:** Please open or comment on an issue **before** submitting a
+> PR to discuss the change you'd like to make. This helps us align on approach and
+> avoids wasted effort on changes we may not be able to merge. Please only create
+> a PR once one of the project maintainers agrees on your outlined approach.
+>
+> PRs of contributors who are not vouched for are automatically closed. Regular
+> contributors are added to the vouch list.
+>
+> The kernels-community repository is for:
+>
+> 1. Kernels developed by Hugging Face and partners.
+> 2. Kernels developed by third parties, that have not been 'kernelized' yet, but
+>    are used by Hugging Face projects such as diffusers and transformers.
+>
+> We cannot accept PRs for the following:
+>
+> - New kernels that are not in categories (1) and (2).
+> - Changes to kernels in category (2), these should be submitted upstream.
+>
+> For, LLM-generated changes, we prefer that you write your prompt in an issue
+> over a PR with LLM-generated changes.
+
+## Related issue
+
+<!-- Link the issue this PR addresses. Every PR should have a corresponding issue. -->
+
+Closes #
+
+## What does this PR do?
+
+<!-- A brief description of the changes. -->
+
+## Motivation
+
+<!-- Why is this change needed? What context is important for reviewers? -->
+
+## Changes
+
+<!-- Bulleted list of key changes. -->
+
+-
+
+## Testing
+
+<!-- How were these changes tested? Include commands, test output, or benchmarks. -->
+
+-
+
+## Checklist
+
+- [ ] This PR is linked to an issue that was discussed and approved
+- [ ] I have tested these changes locally
+- [ ] New/changed functionality has test coverage
+- LLM disclosure:
+  - [ ] I did not use an LLM to create this PR.
+  - [ ] I used and LLM for assistance while creating this PR.
+  - [ ] This PR was mostly or completely generated by an LLM.
diff --git a/activation/torch-ext/activation/__init__.py b/activation/torch-ext/activation/__init__.py
index 1a9cd15a..cfce59ba 100644
--- a/activation/torch-ext/activation/__init__.py
+++ b/activation/torch-ext/activation/__init__.py
@@ -5,56 +5,56 @@
 from . import layers
 
 
-def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
+def silu_and_mul(out: torch.Tensor, x: torch.Tensor) -> torch.Tensor:
     ops.silu_and_mul(out, x)
     return out
 
 
-def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> None:
+def mul_and_silu(out: torch.Tensor, x: torch.Tensor) -> torch.Tensor:
     ops.mul_and_silu(out, x)
     return out
 
 
-def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
+def gelu_and_mul(out: torch.Tensor, x: torch.Tensor) -> torch.Tensor:
     ops.gelu_and_mul(out, x)
     return out
 
 
-def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> None:
+def gelu_tanh_and_mul(out: torch.Tensor, x: torch.Tensor) -> torch.Tensor:
     ops.gelu_tanh_and_mul(out, x)
     return out
 
 
-def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> None:
+def fatrelu_and_mul(out: torch.Tensor, x: torch.Tensor, threshold: float = 0.0) -> torch.Tensor:
     ops.fatrelu_and_mul(out, x, threshold)
     return out
 
 
-def gelu(out: torch.Tensor, x: torch.Tensor) -> None:
+def gelu(out: torch.Tensor, x: torch.Tensor) -> torch.Tensor:
     ops.gelu(out, x)
     return out
 
-def silu(out: torch.Tensor, x: torch.Tensor) -> None:
+def silu(out: torch.Tensor, x: torch.Tensor) -> torch.Tensor:
     ops.silu(out, x)
     return out
 
 
-def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> None:
+def gelu_tanh(out: torch.Tensor, x: torch.Tensor) -> torch.Tensor:
     ops.gelu_tanh(out, x)
     return out
 
 
-def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> None:
+def gelu_fast(out: torch.Tensor, x: torch.Tensor) -> torch.Tensor:
     ops.gelu_fast(out, x)
     return out
 
 
-def gelu_new(out: torch.Tensor, x: torch.Tensor) -> None:
+def gelu_new(out: torch.Tensor, x: torch.Tensor) -> torch.Tensor:
     ops.gelu_new(out, x)
     return out
 
 
-def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> None:
+def gelu_quick(out: torch.Tensor, x: torch.Tensor) -> torch.Tensor:
     ops.gelu_quick(out, x)
     return out