Skip to content

Commit 6070b0f

Browse files
committed
Merge branch 'main' into cleanup-huggingface-hub-integration
2 parents dc6ff64 + 0959766 commit 6070b0f

10 files changed

Lines changed: 25 additions & 67 deletions

File tree

flake.nix

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,8 +158,12 @@
158158
pytest
159159
pytest-benchmark
160160
pyyaml
161+
tabulate
162+
tomlkit
161163
torch
162164
types-pyyaml
165+
types-requests
166+
types-tabulate
163167
venvShellHook
164168
]);
165169

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,15 @@
66

77
from kernels.compat import tomllib
88
from kernels.lockfile import KernelLock, get_kernel_locks
9-
from kernels.upload import upload_kernels_dir
9+
from kernels.cli.upload import upload_kernels_dir
1010
from kernels.utils import (
1111
install_kernel,
1212
install_kernel_all_variants,
1313
KNOWN_BACKENDS,
1414
)
15-
from kernels.versions_cli import print_kernel_versions
16-
from kernels.init import run_init, parse_kernel_name
17-
18-
from .doc import generate_readme_for_kernel
15+
from kernels.cli.init import run_init, parse_kernel_name
16+
from kernels.cli.versions import print_kernel_versions
17+
from kernels.cli.doc import generate_readme_for_kernel
1918

2019

2120
def main():
@@ -260,15 +259,15 @@ def check_kernel(
260259
*, macos: str, manylinux: str, python_abi: str, repo_id: str, revision: str
261260
):
262261
try:
263-
import kernels.check
262+
from kernels.cli import check
264263
except ImportError:
265264
print(
266265
"`kernels check` requires the `kernel-abi-check` package: pip install kernel-abi-check",
267266
file=sys.stderr,
268267
)
269268
sys.exit(1)
270269

271-
kernels.check.check_kernel(
270+
check.check_kernel(
272271
macos=macos,
273272
manylinux=manylinux,
274273
python_abi=python_abi,
@@ -278,7 +277,7 @@ def check_kernel(
278277

279278

280279
def run_benchmark(args):
281-
from kernels import benchmark
280+
from kernels.cli import benchmark
282281

283282
benchmark.run_benchmark(
284283
repo_id=args.repo_id,
File renamed without changes.
Lines changed: 8 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
from huggingface_hub.utils import build_hf_headers, disable_progress_bars, get_session, hf_raise_for_status
1616

17+
from kernels.benchmark import Benchmark
1718
from kernels.utils import _get_hf_api, backend
1819

1920
MISSING_DEPS: list[str] = []
@@ -63,43 +64,6 @@ def _calculate_iqr_and_outliers(
6364
return q1, q3, iqr, outliers
6465

6566

66-
class Benchmark:
67-
"""Base class for kernel benchmarks.
68-
69-
Subclass this to create a benchmark script with automatic timing,
70-
verification, and reproducibility support. The kernel is loaded
71-
automatically from the repo_id specified in the CLI command.
72-
73-
Example:
74-
class MyBenchmark(Benchmark):
75-
seed = 42
76-
77-
def setup(self):
78-
self.x = torch.randn(128, 1024, device=self.device, dtype=torch.float16)
79-
self.out = torch.empty(128, 512, device=self.device, dtype=torch.float16)
80-
81-
def benchmark_silu(self):
82-
self.kernel.silu_and_mul(self.out, self.x)
83-
84-
def verify_silu(self) -> torch.Tensor:
85-
# Return reference tensor; runner compares with self.out
86-
return torch.nn.functional.silu(self.x[..., :512]) * self.x[..., 512:]
87-
88-
Run with: kernels benchmark <repo_id>
89-
"""
90-
91-
seed: int | None = None # Optional: seed for reproducibility
92-
device: str = "cpu" # Set automatically by runner
93-
94-
def __init__(self) -> None:
95-
self.kernel: Any = None
96-
self.out: Any = None # Output tensor, set by setup methods
97-
98-
def setup(self) -> None:
99-
"""Override to set up tensors as instance attributes."""
100-
pass
101-
102-
10367
@dataclass
10468
class TimingResults:
10569
mean_ms: float
@@ -319,9 +283,7 @@ def _get_macos_gpu() -> tuple[str | None, int | None]:
319283
from ctypes import POINTER, byref, c_char_p, c_int, c_int64, c_uint32, c_void_p
320284

321285
iokit = ctypes.CDLL("/System/Library/Frameworks/IOKit.framework/IOKit")
322-
cf = ctypes.CDLL(
323-
"/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation"
324-
)
286+
cf = ctypes.CDLL("/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation")
325287

326288
iokit.IOServiceMatching.restype = c_void_p
327289
iokit.IOServiceMatching.argtypes = [c_char_p]
@@ -382,9 +344,7 @@ def _get_macos_gpu() -> tuple[str | None, int | None]:
382344
cf.CFRelease(key)
383345

384346
# Get GPU core count
385-
key = cf.CFStringCreateWithCString(
386-
None, b"gpu-core-count", kCFStringEncodingUTF8
387-
)
347+
key = cf.CFStringCreateWithCString(None, b"gpu-core-count", kCFStringEncodingUTF8)
388348
if key:
389349
prop = iokit.IORegistryEntryCreateCFProperty(service, key, None, 0)
390350
if prop:
@@ -425,9 +385,7 @@ def collect_machine_info() -> MachineInfo:
425385
if hasattr(torch.version, "hip") and torch.version.hip:
426386
backend_type = f"ROCm {torch.version.hip}"
427387
else:
428-
backend_type = (
429-
f"CUDA {torch.version.cuda}" if torch.version.cuda else "CUDA"
430-
)
388+
backend_type = f"CUDA {torch.version.cuda}" if torch.version.cuda else "CUDA"
431389
elif backend_name == "xpu":
432390
gpu = torch.xpu.get_device_name(0)
433391
backend_type = "XPU"
@@ -479,16 +437,14 @@ def run_benchmark_class(
479437

480438
# Find all benchmark_* methods
481439
benchmark_methods = [
482-
name
483-
for name in dir(benchmark_cls)
484-
if name.startswith("benchmark_") and callable(getattr(benchmark_cls, name))
440+
name for name in dir(benchmark_cls) if name.startswith("benchmark_") and callable(getattr(benchmark_cls, name))
485441
]
486442

487443
if not benchmark_methods:
488444
raise RuntimeError(f"No benchmark_* methods found in {benchmark_cls.__name__}")
489445

490446
# Load kernel once for all workloads
491-
from kernels import get_local_kernel, get_kernel
447+
from kernels import get_kernel, get_local_kernel
492448

493449
if is_local:
494450
kernel = get_local_kernel(Path(repo_id), "activation")
@@ -663,9 +619,7 @@ def run_benchmark_script(
663619
raise RuntimeError(f"No Benchmark subclasses found in {script_path}")
664620

665621
machine_info = collect_machine_info()
666-
gpu_cores_str = (
667-
f" ({machine_info.gpu_cores} cores)" if machine_info.gpu_cores else ""
668-
)
622+
gpu_cores_str = f" ({machine_info.gpu_cores} cores)" if machine_info.gpu_cores else ""
669623
print(file=sys.stderr)
670624
print(f" GPU {machine_info.gpu}{gpu_cores_str}", file=sys.stderr)
671625
print(f" CPU {machine_info.cpu}", file=sys.stderr)
@@ -736,8 +690,7 @@ def run_benchmark(
736690
if is_local:
737691
if repo_id.count("/") == 1 and not repo_id.startswith(("./", "../")):
738692
warnings.warn(
739-
f"'{repo_id}' exists locally but looks like a repo_id. "
740-
f"Use './{repo_id}' to be explicit.",
693+
f"'{repo_id}' exists locally but looks like a repo_id. Use './{repo_id}' to be explicit.",
741694
stacklevel=2,
742695
)
743696
branch = "local"
@@ -765,7 +718,6 @@ def run_benchmark(
765718
assert revision is not None # Guaranteed by parsing logic above
766719

767720
print(f"Downloading {repo_id}@{revision}...", file=sys.stderr)
768-
769721
if is_local:
770722
repo_path = repo_id_path.resolve()
771723
else:
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import yaml
77

88
from ._vendored.convert_rst_to_mdx import convert_rst_docstring_to_mdx
9-
from .utils import get_kernel
9+
from kernels.utils import get_kernel
1010

1111
_RE_PARAMETERS = re.compile(
1212
r"<parameters>(((?!<parameters>).)*)</parameters>", re.DOTALL

kernels/tests/test_init.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@
33
import argparse
44
import os
55

6-
from kernels.init import run_init, parse_kernel_name
6+
from kernels.cli.init import run_init, parse_kernel_name
77
from kernels.utils import KNOWN_BACKENDS
88

9+
910
def e2e_init(backends: list[str]) -> None:
1011
kernel_name = "testuser/test-kernel"
1112
template_repo = "drbh/template"
@@ -16,7 +17,9 @@ def e2e_init(backends: list[str]) -> None:
1617
overwrite=False,
1718
)
1819
expected_normalized_name = "test_kernel"
19-
expected_backend_dirs = {Path(f"{expected_normalized_name}_{backend}") for backend in args.backends}
20+
expected_backend_dirs = {
21+
Path(f"{expected_normalized_name}_{backend}") for backend in args.backends
22+
}
2023

2124
# Replacement logic
2225
# special case for "rocm" backend since it uses "cuda" source

0 commit comments

Comments
 (0)