Skip to content

Commit 225f2f6

Browse files
committed
Add smart_chunking flag and config
1 parent f0ac0db commit 225f2f6

5 files changed

Lines changed: 425 additions & 46 deletions

File tree

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
benchmarks:
2+
- benchmark_name: host_device
3+
num_runs: 20
4+
benchmark_sweep_params:
5+
# Single Chip (1 Chip, 2 Devices)
6+
- {mesh_shape: "1x2", data_size_mb_list: [1, 16, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768], smart_chunking: true}
7+
8+
csv_path: "../microbenchmarks/host_device/single_chip_smart"
9+
trace_dir: "../microbenchmarks/host_device/single_chip_smart/trace"

Ironwood/guides/host_device/host_device.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@ To run the microbenchmarks, apply the following Kubernetes configuration:
1616
kubectl apply -f tpu7x-host-device-benchmark.yaml
1717
```
1818

19+
To run with "Smart Chunking" enabled (advanced optimizations):
20+
```bash
21+
kubectl apply -f tpu7x-host-device-benchmark-smart-chunking.yaml
22+
```
23+
1924
To extract the log of the microbenchmark, use `kubectl logs`:
2025
```bash
2126
kubectl logs tpu7x-host-device-benchmark
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
apiVersion: v1
2+
kind: Pod
3+
metadata:
4+
name: tpu7x-host-device-benchmark-smart-chunking
5+
spec:
6+
restartPolicy: Never
7+
nodeSelector:
8+
cloud.google.com/gke-tpu-accelerator: tpu7x
9+
cloud.google.com/gke-tpu-topology: 2x2x1
10+
containers:
11+
- name: tpu-job
12+
image: python:3.12
13+
ports:
14+
- containerPort: 8431
15+
securityContext:
16+
privileged: false
17+
command:
18+
- bash
19+
- -c
20+
- |
21+
set -ex
22+
23+
git clone https://github.com/AI-Hypercomputer/accelerator-microbenchmarks.git
24+
cd accelerator-microbenchmarks
25+
pip install -r requirements.txt
26+
27+
echo "Running Smart Chunking Benchmark..."
28+
bash Ironwood/scripts/run_host_device_benchmark.sh --smart-chunking
29+
30+
resources:
31+
requests:
32+
google.com/tpu: 4
33+
limits:
34+
google.com/tpu: 4

Ironwood/scripts/run_host_device_benchmark.sh

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,15 @@
44
CONFIG_DIR="Ironwood/configs/host_device"
55
SPECIFIC_CONFIG=""
66
INTERLEAVED=false
7+
SMART_CHUNKING=false
78

89
# Helper function for usage
910
usage() {
1011
echo "Usage: $0 [OPTIONS]"
1112
echo "Options:"
1213
echo " --config <path> Path to specific config file (optional)"
1314
echo " --interleaved Run with numactl --interleave=all"
15+
echo " --smart-chunking Run with smart chunking enabled (uses *_smart_chunking.yaml configs)"
1416
echo " --help Show this help message"
1517
exit 1
1618
}
@@ -20,6 +22,7 @@ while [[ "$#" -gt 0 ]]; do
2022
case $1 in
2123
--config) SPECIFIC_CONFIG="$2"; shift ;;
2224
--interleaved) INTERLEAVED=true ;;
25+
--smart-chunking) SMART_CHUNKING=true ;;
2326
--help) usage ;;
2427
*) echo "Unknown parameter passed: $1"; usage ;;
2528
esac
@@ -33,14 +36,26 @@ echo "********************************************************"
3336
echo ""
3437
echo "Configuration:"
3538
echo " Interleaved: $INTERLEAVED"
39+
echo " Smart Chunking: $SMART_CHUNKING"
3640
echo ""
3741

3842
if [ -n "$SPECIFIC_CONFIG" ]; then
3943
CONFIGS=("$SPECIFIC_CONFIG")
4044
else
4145
# Use nullglob to handle case where no files match (though unlikely here)
4246
shopt -s nullglob
43-
CONFIGS=("$CONFIG_DIR"/*.yaml)
47+
if [ "$SMART_CHUNKING" = true ]; then
48+
CONFIGS=("$CONFIG_DIR"/*smart_chunking.yaml)
49+
else
50+
# Exclude smart_chunking configs by default
51+
ALL_CONFIGS=("$CONFIG_DIR"/*.yaml)
52+
CONFIGS=()
53+
for cfg in "${ALL_CONFIGS[@]}"; do
54+
if [[ "$cfg" != *"smart_chunking.yaml" ]]; then
55+
CONFIGS+=("$cfg")
56+
fi
57+
done
58+
fi
4459
shopt -u nullglob
4560
fi
4661

0 commit comments

Comments
 (0)