Skip to content

Commit 30d299f

Browse files
committed
ref: reuse load patches
Signed-off-by: Alexander Droste <alexander.droste@protonmail.com>
1 parent e3f0aee commit 30d299f

5 files changed

Lines changed: 106 additions & 153 deletions

File tree

vortex-cuda/benches/dynamic_dispatch_cuda.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ struct BenchRunner {
123123
}
124124

125125
impl BenchRunner {
126-
fn new(array: &vortex::array::ArrayRef, len: usize, cuda_ctx: &CudaExecutionCtx) -> Self {
126+
fn new(array: &vortex::array::ArrayRef, len: usize, cuda_ctx: &mut CudaExecutionCtx) -> Self {
127127
let plan = match DispatchPlan::new(array).vortex_expect("build_dyn_dispatch_plan") {
128128
DispatchPlan::Fused(plan) => plan,
129129
_ => unreachable!("encoding not fusable"),
@@ -201,7 +201,7 @@ fn bench_for_bitpacked(c: &mut Criterion) {
201201
let mut cuda_ctx =
202202
CudaSession::create_execution_ctx(&VortexSession::empty()).vortex_expect("ctx");
203203

204-
let bench_runner = BenchRunner::new(&array, n, &cuda_ctx);
204+
let bench_runner = BenchRunner::new(&array, n, &mut cuda_ctx);
205205

206206
b.iter_custom(|iters| {
207207
let mut total_time = Duration::ZERO;
@@ -246,7 +246,7 @@ fn bench_dict_bp_codes(c: &mut Criterion) {
246246
let mut cuda_ctx =
247247
CudaSession::create_execution_ctx(&VortexSession::empty()).vortex_expect("ctx");
248248

249-
let bench_runner = BenchRunner::new(&array, n, &cuda_ctx);
249+
let bench_runner = BenchRunner::new(&array, n, &mut cuda_ctx);
250250

251251
b.iter_custom(|iters| {
252252
let mut total_time = Duration::ZERO;
@@ -290,7 +290,7 @@ fn bench_runend(c: &mut Criterion) {
290290
let mut cuda_ctx =
291291
CudaSession::create_execution_ctx(&VortexSession::empty()).vortex_expect("ctx");
292292

293-
let bench_runner = BenchRunner::new(&array, n, &cuda_ctx);
293+
let bench_runner = BenchRunner::new(&array, n, &mut cuda_ctx);
294294

295295
b.iter_custom(|iters| {
296296
let mut total_time = Duration::ZERO;
@@ -344,7 +344,7 @@ fn bench_dict_bp_codes_bp_for_values(c: &mut Criterion) {
344344
let mut cuda_ctx =
345345
CudaSession::create_execution_ctx(&VortexSession::empty()).vortex_expect("ctx");
346346

347-
let bench_runner = BenchRunner::new(&array, n, &cuda_ctx);
347+
let bench_runner = BenchRunner::new(&array, n, &mut cuda_ctx);
348348

349349
b.iter_custom(|iters| {
350350
let mut total_time = Duration::ZERO;
@@ -409,7 +409,7 @@ fn bench_alp_for_bitpacked(c: &mut Criterion) {
409409
let mut cuda_ctx =
410410
CudaSession::create_execution_ctx(&VortexSession::empty()).vortex_expect("ctx");
411411

412-
let bench_runner = BenchRunner::new(&array, n, &cuda_ctx);
412+
let bench_runner = BenchRunner::new(&array, n, &mut cuda_ctx);
413413

414414
b.iter_custom(|iters| {
415415
let mut total_time = Duration::ZERO;

vortex-cuda/src/dynamic_dispatch/mod.rs

Lines changed: 53 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -557,7 +557,7 @@ mod tests {
557557

558558
fn dispatch_plan(
559559
array: &vortex::array::ArrayRef,
560-
ctx: &CudaExecutionCtx,
560+
ctx: &mut CudaExecutionCtx,
561561
) -> VortexResult<MaterializedPlan> {
562562
match DispatchPlan::new(array)? {
563563
DispatchPlan::Fused(plan) => plan.materialize(ctx),
@@ -578,7 +578,7 @@ mod tests {
578578
.collect();
579579

580580
let bitpacked = bitpacked_array_u32(bit_width, len);
581-
let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
581+
let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
582582
let packed = bitpacked.packed().clone();
583583
let device_input = futures::executor::block_on(cuda_ctx.ensure_on_device(packed))?;
584584
let input_ptr = device_input.cuda_device_ptr()?;
@@ -689,7 +689,7 @@ mod tests {
689689
})
690690
.collect();
691691

692-
let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
692+
let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
693693
let (input_ptr, _di) = copy_raw_to_device(&cuda_ctx, &data)?;
694694

695695
let plan = CudaDispatchPlan::new(
@@ -785,8 +785,8 @@ mod tests {
785785
.collect();
786786

787787
let bp = bitpacked_array_u32(bit_width, len);
788-
let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
789-
let plan = dispatch_plan(&bp.into_array(), &cuda_ctx)?;
788+
let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
789+
let plan = dispatch_plan(&bp.into_array(), &mut cuda_ctx)?;
790790

791791
let actual =
792792
run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?;
@@ -810,8 +810,8 @@ mod tests {
810810
let bp = bitpacked_array_u32(bit_width, len);
811811
let for_arr = FoR::try_new(bp.into_array(), Scalar::from(reference))?;
812812

813-
let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
814-
let plan = dispatch_plan(&for_arr.into_array(), &cuda_ctx)?;
813+
let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
814+
let plan = dispatch_plan(&for_arr.into_array(), &mut cuda_ctx)?;
815815

816816
let actual =
817817
run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?;
@@ -837,7 +837,7 @@ mod tests {
837837
let values_arr = PrimitiveArray::new(Buffer::from(values), NonNullable).into_array();
838838
let re = RunEnd::new(ends_arr, values_arr, cuda_ctx.execution_ctx());
839839

840-
let plan = dispatch_plan(&re.into_array(), &cuda_ctx)?;
840+
let plan = dispatch_plan(&re.into_array(), &mut cuda_ctx)?;
841841

842842
let actual =
843843
run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?;
@@ -877,8 +877,8 @@ mod tests {
877877

878878
let dict = DictArray::try_new(codes_bp.into_array(), dict_for.into_array())?;
879879

880-
let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
881-
let plan = dispatch_plan(&dict.into_array(), &cuda_ctx)?;
880+
let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
881+
let plan = dispatch_plan(&dict.into_array(), &mut cuda_ctx)?;
882882

883883
let actual =
884884
run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?;
@@ -914,8 +914,8 @@ mod tests {
914914
None,
915915
);
916916

917-
let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
918-
let plan = dispatch_plan(&tree.into_array(), &cuda_ctx)?;
917+
let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
918+
let plan = dispatch_plan(&tree.into_array(), &mut cuda_ctx)?;
919919

920920
let actual =
921921
run_dispatch_plan_f32(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?;
@@ -947,8 +947,8 @@ mod tests {
947947
)?;
948948
let zz = ZigZag::try_new(bp.into_array())?;
949949

950-
let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
951-
let plan = dispatch_plan(&zz.into_array(), &cuda_ctx)?;
950+
let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
951+
let plan = dispatch_plan(&zz.into_array(), &mut cuda_ctx)?;
952952

953953
let actual =
954954
run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?;
@@ -977,7 +977,7 @@ mod tests {
977977
let re = RunEnd::new(ends_arr, values_arr, cuda_ctx.execution_ctx());
978978
let for_arr = FoR::try_new(re.into_array(), Scalar::from(reference))?;
979979

980-
let plan = dispatch_plan(&for_arr.into_array(), &cuda_ctx)?;
980+
let plan = dispatch_plan(&for_arr.into_array(), &mut cuda_ctx)?;
981981

982982
let actual =
983983
run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?;
@@ -1005,8 +1005,8 @@ mod tests {
10051005
let dict = DictArray::try_new(codes_prim.into_array(), values_prim.into_array())?;
10061006
let for_arr = FoR::try_new(dict.into_array(), Scalar::from(reference))?;
10071007

1008-
let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
1009-
let plan = dispatch_plan(&for_arr.into_array(), &cuda_ctx)?;
1008+
let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
1009+
let plan = dispatch_plan(&for_arr.into_array(), &mut cuda_ctx)?;
10101010

10111011
let actual =
10121012
run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?;
@@ -1037,8 +1037,8 @@ mod tests {
10371037
let values_prim = PrimitiveArray::new(Buffer::from(dict_values), NonNullable);
10381038
let dict = DictArray::try_new(codes_for.into_array(), values_prim.into_array())?;
10391039

1040-
let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
1041-
let plan = dispatch_plan(&dict.into_array(), &cuda_ctx)?;
1040+
let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
1041+
let plan = dispatch_plan(&dict.into_array(), &mut cuda_ctx)?;
10421042

10431043
let actual =
10441044
run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?;
@@ -1066,8 +1066,8 @@ mod tests {
10661066

10671067
let dict = DictArray::try_new(codes_bp.into_array(), values_prim.into_array())?;
10681068

1069-
let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
1070-
let plan = dispatch_plan(&dict.into_array(), &cuda_ctx)?;
1069+
let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
1070+
let plan = dispatch_plan(&dict.into_array(), &mut cuda_ctx)?;
10711071

10721072
let actual =
10731073
run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?;
@@ -1210,8 +1210,8 @@ mod tests {
12101210

12111211
let expected: Vec<u32> = data[slice_start..slice_end].to_vec();
12121212

1213-
let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
1214-
let plan = dispatch_plan(&sliced, &cuda_ctx)?;
1213+
let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
1214+
let plan = dispatch_plan(&sliced, &mut cuda_ctx)?;
12151215

12161216
let actual = run_dynamic_dispatch_plan(
12171217
&cuda_ctx,
@@ -1265,8 +1265,8 @@ mod tests {
12651265
let sliced = zz.into_array().slice(slice_start..slice_end)?;
12661266
let expected: Vec<u32> = all_decoded[slice_start..slice_end].to_vec();
12671267

1268-
let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
1269-
let plan = dispatch_plan(&sliced, &cuda_ctx)?;
1268+
let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
1269+
let plan = dispatch_plan(&sliced, &mut cuda_ctx)?;
12701270

12711271
let actual = run_dynamic_dispatch_plan(
12721272
&cuda_ctx,
@@ -1315,8 +1315,8 @@ mod tests {
13151315
.map(|&c| dict_values[c as usize])
13161316
.collect();
13171317

1318-
let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
1319-
let plan = dispatch_plan(&sliced, &cuda_ctx)?;
1318+
let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
1319+
let plan = dispatch_plan(&sliced, &mut cuda_ctx)?;
13201320

13211321
let actual = run_dynamic_dispatch_plan(
13221322
&cuda_ctx,
@@ -1364,8 +1364,8 @@ mod tests {
13641364
let sliced = bp.into_array().slice(slice_start..slice_end)?;
13651365
let expected: Vec<u32> = data[slice_start..slice_end].to_vec();
13661366

1367-
let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
1368-
let plan = dispatch_plan(&sliced, &cuda_ctx)?;
1367+
let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
1368+
let plan = dispatch_plan(&sliced, &mut cuda_ctx)?;
13691369

13701370
let actual = run_dynamic_dispatch_plan(
13711371
&cuda_ctx,
@@ -1417,8 +1417,8 @@ mod tests {
14171417
let sliced = for_arr.into_array().slice(slice_start..slice_end)?;
14181418
let expected: Vec<u32> = all_decoded[slice_start..slice_end].to_vec();
14191419

1420-
let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
1421-
let plan = dispatch_plan(&sliced, &cuda_ctx)?;
1420+
let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
1421+
let plan = dispatch_plan(&sliced, &mut cuda_ctx)?;
14221422

14231423
let actual = run_dynamic_dispatch_plan(
14241424
&cuda_ctx,
@@ -1482,8 +1482,8 @@ mod tests {
14821482
let sliced = dict.into_array().slice(slice_start..slice_end)?;
14831483
let expected: Vec<u32> = all_decoded[slice_start..slice_end].to_vec();
14841484

1485-
let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
1486-
let plan = dispatch_plan(&sliced, &cuda_ctx)?;
1485+
let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
1486+
let plan = dispatch_plan(&sliced, &mut cuda_ctx)?;
14871487

14881488
let actual = run_dynamic_dispatch_plan(
14891489
&cuda_ctx,
@@ -1514,8 +1514,8 @@ mod tests {
15141514

15151515
let seq = Sequence::try_new_typed(base, multiplier, Nullability::NonNullable, len)?;
15161516

1517-
let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
1518-
let plan = dispatch_plan(&seq.into_array(), &cuda_ctx)?;
1517+
let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
1518+
let plan = dispatch_plan(&seq.into_array(), &mut cuda_ctx)?;
15191519

15201520
let actual = run_dynamic_dispatch_plan(
15211521
&cuda_ctx,
@@ -1547,8 +1547,8 @@ mod tests {
15471547

15481548
let seq = Sequence::try_new_typed(base, multiplier, Nullability::NonNullable, len)?;
15491549

1550-
let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
1551-
let plan = dispatch_plan(&seq.into_array(), &cuda_ctx)?;
1550+
let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
1551+
let plan = dispatch_plan(&seq.into_array(), &mut cuda_ctx)?;
15521552

15531553
let actual_u32 = run_dynamic_dispatch_plan(
15541554
&cuda_ctx,
@@ -1865,7 +1865,7 @@ mod tests {
18651865
/// (the bit-pattern for i32(-1)), not u32(0x000000FF) = 255.
18661866
#[crate::test]
18671867
fn test_load_element_sign_extends_i8_to_u32() -> VortexResult<()> {
1868-
let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
1868+
let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
18691869

18701870
let i8_values: Vec<i8> = vec![-1, -2, -3, 127, -128, 0, 1, 42];
18711871
let len = i8_values.len();
@@ -1898,7 +1898,7 @@ mod tests {
18981898
/// Same as above but for i16 → u32 widening.
18991899
#[crate::test]
19001900
fn test_load_element_sign_extends_i16_to_u32() -> VortexResult<()> {
1901-
let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
1901+
let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
19021902

19031903
let i16_values: Vec<i16> = vec![-1, -256, -32768, 32767, 0, 1, -100, 12345];
19041904
let len = i16_values.len();
@@ -2176,8 +2176,8 @@ mod tests {
21762176
(bp.into_array(), values)
21772177
};
21782178

2179-
let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
2180-
let plan = dispatch_plan(&array, &cuda_ctx)?;
2179+
let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
2180+
let plan = dispatch_plan(&array, &mut cuda_ctx)?;
21812181
let actual = run_dynamic_dispatch_plan(
21822182
&cuda_ctx,
21832183
expected.len(),
@@ -2226,8 +2226,8 @@ mod tests {
22262226
(for_arr.into_array(), all_values)
22272227
};
22282228

2229-
let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
2230-
let plan = dispatch_plan(&array, &cuda_ctx)?;
2229+
let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
2230+
let plan = dispatch_plan(&array, &mut cuda_ctx)?;
22312231
let actual = run_dynamic_dispatch_plan(
22322232
&cuda_ctx,
22332233
expected.len(),
@@ -2279,8 +2279,8 @@ mod tests {
22792279
.execute::<PrimitiveArray>(&mut LEGACY_SESSION.create_execution_ctx())?;
22802280
let expected: Vec<f32> = cpu_decoded.as_slice::<f32>().to_vec();
22812281

2282-
let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
2283-
let plan = dispatch_plan(&array, &cuda_ctx)?;
2282+
let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
2283+
let plan = dispatch_plan(&array, &mut cuda_ctx)?;
22842284
let actual = run_dispatch_plan_f32(
22852285
&cuda_ctx,
22862286
expected.len(),
@@ -2432,8 +2432,8 @@ mod tests {
24322432
let values_prim = PrimitiveArray::new(Buffer::from(dict_values), NonNullable);
24332433
let dict = DictArray::try_new(codes_bp.into_array(), values_prim.into_array())?;
24342434

2435-
let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
2436-
let plan = dispatch_plan(&dict.into_array(), &cuda_ctx)?;
2435+
let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
2436+
let plan = dispatch_plan(&dict.into_array(), &mut cuda_ctx)?;
24372437
let actual =
24382438
run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?;
24392439
assert_eq!(actual, expected);
@@ -2461,8 +2461,8 @@ mod tests {
24612461
)?;
24622462
assert!(bp.patches().is_some(), "expected patches");
24632463

2464-
let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
2465-
let plan = dispatch_plan(&bp.into_array(), &cuda_ctx)?;
2464+
let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
2465+
let plan = dispatch_plan(&bp.into_array(), &mut cuda_ctx)?;
24662466
let actual =
24672467
run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?;
24682468
assert_eq!(actual, values);
@@ -2493,8 +2493,8 @@ mod tests {
24932493
)?;
24942494
assert!(bp.patches().is_some(), "expected patches");
24952495

2496-
let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
2497-
let plan = dispatch_plan(&bp.into_array(), &cuda_ctx)?;
2496+
let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
2497+
let plan = dispatch_plan(&bp.into_array(), &mut cuda_ctx)?;
24982498
let actual =
24992499
run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?;
25002500
assert_eq!(actual, values);
@@ -2561,8 +2561,8 @@ mod tests {
25612561
)?;
25622562
assert!(bp.patches().is_some(), "expected patches");
25632563

2564-
let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
2565-
let plan = dispatch_plan(&bp.into_array(), &cuda_ctx)?;
2564+
let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?;
2565+
let plan = dispatch_plan(&bp.into_array(), &mut cuda_ctx)?;
25662566
let actual =
25672567
run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?;
25682568
assert_eq!(actual, values);

0 commit comments

Comments
 (0)