Skip to content

Commit c352cad

Browse files
Merge pull request #17 from stackav-oss/feature/jmanning/more-vllm-0.8.5-cleanup-v3
More vLLM 0.8.5 cleanup
2 parents f575c64 + 999d786 commit c352cad

46 files changed

Lines changed: 247 additions & 453 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

benchmarks/bnb_dequantize_blockwise_benchmark.py

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -69,47 +69,35 @@ def _to_torch_dtype(dtype_str: str) -> torch.dtype:
6969
)
7070
@click.option(
7171
"--compress-statistics",
72-
required=False,
73-
type=bool,
7472
is_flag=True,
75-
default=False,
7673
help="Flag for double-quantization",
7774
)
7875
@click.option(
7976
"--enable-bnb",
80-
required=False,
81-
type=bool,
8277
is_flag=True,
8378
default=envs.CONCH_BENCH_ENABLE_ALL_REF,
8479
help="Flag to enable BNB reference impl",
8580
)
8681
@click.option(
87-
"-i",
8882
"--num-iterations",
8983
required=False,
9084
type=int,
9185
default=100,
9286
help="Number of iterations",
9387
)
9488
@click.option(
95-
"-w",
9689
"--num-warmup-iterations",
9790
required=False,
9891
type=int,
9992
default=10,
10093
help="Number of warmup iterations",
10194
)
10295
@click.option(
103-
"-v",
10496
"--verbose",
105-
required=False,
106-
type=bool,
10797
is_flag=True,
108-
default=False,
10998
help="Flag for printing verbose output",
11099
)
111100
@click.option(
112-
"-g",
113101
"--gpu",
114102
required=False,
115103
type=str,
@@ -118,10 +106,7 @@ def _to_torch_dtype(dtype_str: str) -> torch.dtype:
118106
)
119107
@click.option(
120108
"--csv",
121-
required=False,
122-
type=bool,
123109
is_flag=True,
124-
default=False,
125110
help="Flag for printing results in CSV format",
126111
)
127112
def main( # noqa: PLR0913
@@ -202,7 +187,9 @@ def main( # noqa: PLR0913
202187
error_msg = "bitsandbytes must be installed and enabled via CONCH_ENABLE_BNB=1"
203188
raise NotImplementedError(error_msg)
204189

205-
from bitsandbytes.functional import dequantize_4bit as bnb_dequantize_4bit # type: ignore[import-untyped]
190+
from bitsandbytes.functional import ( # type: ignore[import-not-found, import-untyped, unused-ignore] # isort:skip
191+
dequantize_4bit as bnb_dequantize_4bit,
192+
)
206193
from bitsandbytes.functional import quantize_4bit as bnb_quantize_4bit
207194

208195
bnb_quantized, bnb_state = bnb_quantize_4bit(

benchmarks/bnb_quantize_blockwise_benchmark.py

Lines changed: 3 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -68,47 +68,35 @@ def _to_torch_dtype(dtype_str: str) -> torch.dtype:
6868
)
6969
@click.option(
7070
"--compress-statistics",
71-
required=False,
72-
type=bool,
7371
is_flag=True,
74-
default=False,
7572
help="Flag for double-quantization",
7673
)
7774
@click.option(
7875
"--enable-bnb",
79-
required=False,
80-
type=bool,
8176
is_flag=True,
8277
default=envs.CONCH_BENCH_ENABLE_ALL_REF,
8378
help="Flag to enable BNB reference impl",
8479
)
8580
@click.option(
86-
"-i",
8781
"--num-iterations",
8882
required=False,
8983
type=int,
9084
default=100,
9185
help="Number of iterations",
9286
)
9387
@click.option(
94-
"-w",
9588
"--num-warmup-iterations",
9689
required=False,
9790
type=int,
9891
default=10,
9992
help="Number of warmup iterations",
10093
)
10194
@click.option(
102-
"-v",
10395
"--verbose",
104-
required=False,
105-
type=bool,
10696
is_flag=True,
107-
default=False,
10897
help="Flag for printing verbose output",
10998
)
11099
@click.option(
111-
"-g",
112100
"--gpu",
113101
required=False,
114102
type=str,
@@ -117,10 +105,7 @@ def _to_torch_dtype(dtype_str: str) -> torch.dtype:
117105
)
118106
@click.option(
119107
"--csv",
120-
required=False,
121-
type=bool,
122108
is_flag=True,
123-
default=False,
124109
help="Flag for printing results in CSV format",
125110
)
126111
def main( # noqa: PLR0913
@@ -192,7 +177,9 @@ def main( # noqa: PLR0913
192177
error_msg = "bitsandbytes must be installed and enabled via CONCH_ENABLE_BNB=1"
193178
raise NotImplementedError(error_msg)
194179

195-
from bitsandbytes.functional import quantize_4bit as bnb_quantize_4bit # type: ignore[import-untyped]
180+
from bitsandbytes.functional import ( # type: ignore[import-not-found, import-untyped, unused-ignore] # isort:skip
181+
quantize_4bit as bnb_quantize_4bit,
182+
)
196183

197184
bnb_output, bnb_state = bnb_quantize_4bit(
198185
x,

benchmarks/copy_blocks_benchmark.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,88 +18,74 @@
1818

1919
@click.command()
2020
@click.option(
21-
"-h",
2221
"--head-dim",
2322
required=True,
2423
type=int,
2524
default=256,
2625
help="Head dimension",
2726
)
2827
@click.option(
29-
"-l",
3028
"--num-layers",
3129
required=True,
3230
type=int,
3331
default=8,
3432
help="Number of layers",
3533
)
3634
@click.option(
37-
"-c",
3835
"--cache-block-size",
3936
required=True,
4037
type=int,
4138
default=32,
4239
help="Number of KV vectors in each cache block",
4340
)
4441
@click.option(
45-
"-k",
4642
"--num-kv-heads",
4743
required=False,
4844
type=int,
4945
default=8,
5046
help="Number of kv heads",
5147
)
5248
@click.option(
53-
"-b",
5449
"--num-blocks",
5550
required=False,
5651
type=int,
5752
default=2000,
5853
help="Number of blocks in cache",
5954
)
6055
@click.option(
61-
"-m",
6256
"--num-mappings",
6357
required=False,
6458
type=int,
6559
default=512,
6660
help="Number of mappings to copy",
6761
)
6862
@click.option(
69-
"-i",
7063
"--num-iterations",
7164
required=False,
7265
type=int,
7366
default=100,
7467
help="Number of iterations",
7568
)
7669
@click.option(
77-
"-w",
7870
"--num-warmup-iterations",
7971
required=False,
8072
type=int,
8173
default=10,
8274
help="Number of warmup iterations",
8375
)
8476
@click.option(
85-
"-a",
8677
"--absolute-tolerance",
8778
required=False,
8879
type=float,
8980
default=1e-3,
9081
help="Absolute tolerance to match with",
9182
)
9283
@click.option(
93-
"-v",
9484
"--verbose",
95-
required=False,
96-
type=bool,
9785
is_flag=True,
98-
default=False,
9986
help="Flag for printing verbose output",
10087
)
10188
@click.option(
102-
"-g",
10389
"--gpu",
10490
required=False,
10591
type=str,
@@ -108,10 +94,7 @@
10894
)
10995
@click.option(
11096
"--csv",
111-
required=False,
112-
type=bool,
11397
is_flag=True,
114-
default=False,
11598
help="Flag for printing results in CSV format",
11699
)
117100
def main(

benchmarks/fused_add_rms_norm_benchmark.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,48 +17,39 @@
1717

1818
@click.command()
1919
@click.option(
20-
"-d",
2120
"--hidden-size",
2221
required=True,
2322
type=int,
2423
default=2048,
2524
help="Dimension",
2625
)
2726
@click.option(
28-
"-t",
2927
"--num-tokens",
3028
required=True,
3129
type=int,
3230
default=4096,
3331
help="Number of tokens",
3432
)
3533
@click.option(
36-
"-i",
3734
"--num-iterations",
3835
required=False,
3936
type=int,
4037
default=100,
4138
help="Number of iterations",
4239
)
4340
@click.option(
44-
"-w",
4541
"--num-warmup-iterations",
4642
required=False,
4743
type=int,
4844
default=10,
4945
help="Number of warmup iterations",
5046
)
5147
@click.option(
52-
"-v",
5348
"--verbose",
54-
required=False,
55-
type=bool,
5649
is_flag=True,
57-
default=False,
5850
help="Flag for printing verbose output",
5951
)
6052
@click.option(
61-
"-g",
6253
"--gpu",
6354
required=False,
6455
type=str,
@@ -67,10 +58,7 @@
6758
)
6859
@click.option(
6960
"--csv",
70-
required=False,
71-
type=bool,
7261
is_flag=True,
73-
default=False,
7462
help="Flag for printing results in CSV format",
7563
)
7664
def main( # noqa: PLR0913

benchmarks/gelu_tanh_and_mul_benchmark.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,56 +17,46 @@
1717

1818
@click.command()
1919
@click.option(
20-
"-s",
2120
"--hidden-size",
2221
required=False,
2322
type=int,
2423
default=13824,
2524
help="Feedforward hidden size",
2625
)
2726
@click.option(
28-
"-t",
2927
"--num-tokens",
3028
required=False,
3129
type=int,
3230
default=8192,
3331
help="Number of tokens",
3432
)
3533
@click.option(
36-
"-i",
3734
"--num-iterations",
3835
required=False,
3936
type=int,
4037
default=100,
4138
help="Number of iterations",
4239
)
4340
@click.option(
44-
"-w",
4541
"--num-warmup-iterations",
4642
required=False,
4743
type=int,
4844
default=10,
4945
help="Number of warmup iterations",
5046
)
5147
@click.option(
52-
"-a",
5348
"--absolute-tolerance",
5449
required=False,
5550
type=float,
5651
default=1e-3,
5752
help="Absolute tolerance to match with",
5853
)
5954
@click.option(
60-
"-v",
6155
"--verbose",
62-
required=False,
63-
type=bool,
6456
is_flag=True,
65-
default=False,
6657
help="Flag for printing verbose output",
6758
)
6859
@click.option(
69-
"-g",
7060
"--gpu",
7161
required=False,
7262
type=str,
@@ -75,10 +65,7 @@
7565
)
7666
@click.option(
7767
"--csv",
78-
required=False,
79-
type=bool,
8068
is_flag=True,
81-
default=False,
8269
help="Flag for printing results in CSV format",
8370
)
8471
def main(

0 commit comments

Comments
 (0)