|
398 | 398 | "id": "test_slow_resnet", |
399 | 399 | "metadata": {}, |
400 | 400 | "outputs": [], |
401 | | - "source": [ |
402 | | - "#| hide\n", |
403 | | - "#| slow\n", |
404 | | - "from torchvision.models import resnet18\n", |
405 | | - "\n", |
406 | | - "_model = resnet18()\n", |
407 | | - "_sample = torch.randn(1, 3, 64, 64)\n", |
408 | | - "_synth = HardwarePeaks(\n", |
409 | | - " peak_flops=1e13, peak_bandwidth=5e11, ridge_point=20.0,\n", |
410 | | - " device=\"cpu\", dtype=\"torch.float32\",\n", |
411 | | - " tf32_enabled=False, cudnn_benchmark=False,\n", |
412 | | - ")\n", |
413 | | - "_ra = RooflineAnalyzer(_model, _sample, peaks=_synth)\n", |
414 | | - "_results = _ra.profile(device=\"cpu\", warmup=2, steps=3)\n", |
415 | | - "assert len(_results) > 0\n", |
416 | | - "assert all(r.bound in {\"memory\", \"compute\", \"undefined\"} for r in _results)\n", |
417 | | - "_ra.summary(top=5)\n", |
418 | | - "_fig = _ra.plot()\n", |
419 | | - "assert isinstance(_fig, go.Figure)" |
420 | | - ] |
| 401 | + "source": "#| hide\n#| notest\nfrom torchvision.models import resnet18\n\n_model = resnet18()\n_sample = torch.randn(1, 3, 64, 64)\n_synth = HardwarePeaks(\n peak_flops=1e13, peak_bandwidth=5e11, ridge_point=20.0,\n device=\"cpu\", dtype=\"torch.float32\",\n tf32_enabled=False, cudnn_benchmark=False,\n)\n_ra = RooflineAnalyzer(_model, _sample, peaks=_synth)\n_results = _ra.profile(device=\"cpu\", warmup=2, steps=3)\nassert len(_results) > 0\nassert all(r.bound in {\"memory\", \"compute\", \"undefined\"} for r in _results)\n_ra.summary(top=5)\n_fig = _ra.plot()\nassert isinstance(_fig, go.Figure)" |
421 | 402 | }, |
422 | 403 | { |
423 | 404 | "cell_type": "code", |
424 | 405 | "execution_count": null, |
425 | 406 | "id": "test_slow_cuda", |
426 | 407 | "metadata": {}, |
427 | 408 | "outputs": [], |
428 | | - "source": [ |
429 | | - "#| hide\n", |
430 | | - "#| slow\n", |
431 | | - "if torch.cuda.is_available():\n", |
432 | | - " _p = measure_peaks(device=\"cuda\", matmul_size=512, bandwidth_mb=64, steps=3, warmup=1, cache=False)\n", |
433 | | - " assert _p.peak_flops > 0\n", |
434 | | - " assert _p.peak_bandwidth > 0\n", |
435 | | - " assert _p.ridge_point > 0" |
436 | | - ] |
| 409 | + "source": "#| hide\n#| notest\nif torch.cuda.is_available():\n _p = measure_peaks(device=\"cuda\", matmul_size=512, bandwidth_mb=64, steps=3, warmup=1, cache=False)\n assert _p.peak_flops > 0\n assert _p.peak_bandwidth > 0\n assert _p.ridge_point > 0" |
437 | 410 | }, |
438 | 411 | { |
439 | 412 | "cell_type": "markdown", |
|
0 commit comments