Skip to content

Commit 0952b2b

Browse files
committed
added credit; added readme to bench; tried to clean whitespace
1 parent 4afd0ce commit 0952b2b

4 files changed

Lines changed: 115 additions & 28 deletions

File tree

eidos/eidos_functions_math.cpp

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ EidosValue_SP Eidos_ExecuteFunction_abs(const std::vector<EidosValue_SP> &p_argu
8888
EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count);
8989
double *float_result_data = float_result->data_mutable();
9090
result_SP = EidosValue_SP(float_result);
91-
91+
9292
#ifdef _OPENMP
9393
EIDOS_THREAD_COUNT(gEidos_OMP_threads_ABS_FLOAT);
9494
#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_ABS_FLOAT) num_threads(thread_count)
@@ -98,9 +98,9 @@ EidosValue_SP Eidos_ExecuteFunction_abs(const std::vector<EidosValue_SP> &p_argu
9898
Eidos_SIMD::abs_float64(float_data, float_result_data, x_count);
9999
#endif
100100
}
101-
101+
102102
result_SP->CopyDimensionsFromValue(x_value);
103-
103+
104104
return result_SP;
105105
}
106106

@@ -195,14 +195,14 @@ EidosValue_SP Eidos_ExecuteFunction_atan2(const std::vector<EidosValue_SP> &p_ar
195195
EidosValue_SP Eidos_ExecuteFunction_ceil(const std::vector<EidosValue_SP> &p_arguments, __attribute__((unused)) EidosInterpreter &p_interpreter)
196196
{
197197
EidosValue_SP result_SP(nullptr);
198-
198+
199199
EidosValue *x_value = p_arguments[0].get();
200200
int x_count = x_value->Count();
201201
const double *float_data = x_value->FloatData();
202202
EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count);
203203
double *float_result_data = float_result->data_mutable();
204204
result_SP = EidosValue_SP(float_result);
205-
205+
206206
#ifdef _OPENMP
207207
EIDOS_THREAD_COUNT(gEidos_OMP_threads_CEIL);
208208
#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_CEIL) num_threads(thread_count)
@@ -211,9 +211,9 @@ EidosValue_SP Eidos_ExecuteFunction_ceil(const std::vector<EidosValue_SP> &p_arg
211211
#else
212212
Eidos_SIMD::ceil_float64(float_data, float_result_data, x_count);
213213
#endif
214-
214+
215215
result_SP->CopyDimensionsFromValue(x_value);
216-
216+
217217
return result_SP;
218218
}
219219

@@ -368,14 +368,14 @@ EidosValue_SP Eidos_ExecuteFunction_exp(const std::vector<EidosValue_SP> &p_argu
368368
EidosValue_SP Eidos_ExecuteFunction_floor(const std::vector<EidosValue_SP> &p_arguments, __attribute__((unused)) EidosInterpreter &p_interpreter)
369369
{
370370
EidosValue_SP result_SP(nullptr);
371-
371+
372372
EidosValue *x_value = p_arguments[0].get();
373373
int x_count = x_value->Count();
374374
const double *float_data = x_value->FloatData();
375375
EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count);
376376
double *float_result_data = float_result->data_mutable();
377377
result_SP = EidosValue_SP(float_result);
378-
378+
379379
#ifdef _OPENMP
380380
EIDOS_THREAD_COUNT(gEidos_OMP_threads_FLOOR);
381381
#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_FLOOR) num_threads(thread_count)
@@ -384,9 +384,9 @@ EidosValue_SP Eidos_ExecuteFunction_floor(const std::vector<EidosValue_SP> &p_ar
384384
#else
385385
Eidos_SIMD::floor_float64(float_data, float_result_data, x_count);
386386
#endif
387-
387+
388388
result_SP->CopyDimensionsFromValue(x_value);
389-
389+
390390
return result_SP;
391391
}
392392

@@ -802,25 +802,25 @@ EidosValue_SP Eidos_ExecuteFunction_product(const std::vector<EidosValue_SP> &p_
802802
{
803803
const double *float_data = x_value->FloatData();
804804
double product = Eidos_SIMD::product_float64(float_data, x_count);
805-
805+
806806
result_SP = EidosValue_SP(new (gEidosValuePool->AllocateChunk()) EidosValue_Float(product));
807807
}
808-
808+
809809
return result_SP;
810810
}
811811

812812
// (float)round(float x)
813813
EidosValue_SP Eidos_ExecuteFunction_round(const std::vector<EidosValue_SP> &p_arguments, __attribute__((unused)) EidosInterpreter &p_interpreter)
814814
{
815815
EidosValue_SP result_SP(nullptr);
816-
816+
817817
EidosValue *x_value = p_arguments[0].get();
818818
int x_count = x_value->Count();
819819
const double *float_data = x_value->FloatData();
820820
EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count);
821821
double *float_result_data = float_result->data_mutable();
822822
result_SP = EidosValue_SP(float_result);
823-
823+
824824
#ifdef _OPENMP
825825
EIDOS_THREAD_COUNT(gEidos_OMP_threads_ROUND);
826826
#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_ROUND) num_threads(thread_count)
@@ -829,9 +829,9 @@ EidosValue_SP Eidos_ExecuteFunction_round(const std::vector<EidosValue_SP> &p_ar
829829
#else
830830
Eidos_SIMD::round_float64(float_data, float_result_data, x_count);
831831
#endif
832-
832+
833833
result_SP->CopyDimensionsFromValue(x_value);
834-
834+
835835
return result_SP;
836836
}
837837

@@ -2440,7 +2440,7 @@ EidosValue_SP Eidos_ExecuteFunction_sqrt(const std::vector<EidosValue_SP> &p_arg
24402440
EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count);
24412441
double *float_result_data = float_result->data_mutable();
24422442
result_SP = EidosValue_SP(float_result);
2443-
2443+
24442444
#ifdef _OPENMP
24452445
EIDOS_THREAD_COUNT(gEidos_OMP_threads_SQRT_FLOAT);
24462446
#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_SQRT_FLOAT) num_threads(thread_count)
@@ -2450,9 +2450,9 @@ EidosValue_SP Eidos_ExecuteFunction_sqrt(const std::vector<EidosValue_SP> &p_arg
24502450
Eidos_SIMD::sqrt_float64(float_data, float_result_data, x_count);
24512451
#endif
24522452
}
2453-
2453+
24542454
result_SP->CopyDimensionsFromValue(x_value);
2455-
2455+
24562456
return result_SP;
24572457
}
24582458

@@ -2514,12 +2514,12 @@ EidosValue_SP Eidos_ExecuteFunction_sum(const std::vector<EidosValue_SP> &p_argu
25142514
// case across multiple threads seems excessively complex; instead we look for an overflow afterwards
25152515
const int64_t *int_data = x_value->IntData();
25162516
double sum_d = 0;
2517-
2517+
25182518
EIDOS_THREAD_COUNT(gEidos_OMP_threads_SUM_INTEGER);
25192519
#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(int_data) reduction(+: sum_d) if(parallel:x_count >= EIDOS_OMPMIN_SUM_INTEGER) num_threads(thread_count)
25202520
for (int value_index = 0; value_index < x_count; ++value_index)
25212521
sum_d += int_data[value_index];
2522-
2522+
25232523
// 2^53 is the largest integer such that it and all smaller integers can be represented in double losslessly
25242524
int64_t sum = (int64_t)sum_d;
25252525
bool fits_in_integer = (((double)sum == sum_d) && (sum < 9007199254740992L) && (sum > -9007199254740992L));
@@ -2609,14 +2609,14 @@ EidosValue_SP Eidos_ExecuteFunction_tan(const std::vector<EidosValue_SP> &p_argu
26092609
EidosValue_SP Eidos_ExecuteFunction_trunc(const std::vector<EidosValue_SP> &p_arguments, __attribute__((unused)) EidosInterpreter &p_interpreter)
26102610
{
26112611
EidosValue_SP result_SP(nullptr);
2612-
2612+
26132613
EidosValue *x_value = p_arguments[0].get();
26142614
int x_count = x_value->Count();
26152615
const double *float_data = x_value->FloatData();
26162616
EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count);
26172617
double *float_result_data = float_result->data_mutable();
26182618
result_SP = EidosValue_SP(float_result);
2619-
2619+
26202620
#ifdef _OPENMP
26212621
EIDOS_THREAD_COUNT(gEidos_OMP_threads_TRUNC);
26222622
#pragma omp parallel for simd schedule(simd:static) default(none) shared(x_count) firstprivate(float_data, float_result_data) if(parallel:x_count >= EIDOS_OMPMIN_TRUNC) num_threads(thread_count)
@@ -2625,9 +2625,9 @@ EidosValue_SP Eidos_ExecuteFunction_trunc(const std::vector<EidosValue_SP> &p_ar
26252625
#else
26262626
Eidos_SIMD::trunc_float64(float_data, float_result_data, x_count);
26272627
#endif
2628-
2628+
26292629
result_SP->CopyDimensionsFromValue(x_value);
2630-
2630+
26312631
return result_SP;
26322632
}
26332633

eidos/eidos_simd.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
// eidos_simd.h
33
// Eidos
44
//
5-
// Created by Ben Haller on 11/26/2024.
5+
// Created by Andrew Kern on 11/26/2025.
66
// Copyright (c) 2024-2025 Philipp Messer. All rights reserved.
77
// A product of the Messer Lab, http://messerlab.org/slim/
88
//

eidos/eidos_test_functions_other.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@ void _RunFunctionMatrixArrayTests(void)
378378
EidosAssertScriptSuccess_L("x = (rbinom(100, 1, 0.4) == 1); y = matrix(x, nrow=10); identical(rowSums(y), apply(y, 0, 'sum(applyValue);'));", true);
379379
EidosAssertScriptSuccess_L("x = rdunif(100, -1000, 1000); y = matrix(x, nrow=10); identical(rowSums(y), apply(y, 0, 'sum(applyValue);'));", true);
380380
EidosAssertScriptSuccess_L("x = runif(100); y = matrix(x, nrow=10); all(abs(rowSums(y) - apply(y, 0, 'sum(applyValue);')) < 1e-10);", true); // tolerance for SIMD
381-
381+
382382
// colSums()
383383
EidosAssertScriptSuccess_L("x = c(T,T,F,F,T,F,F,T,T,F,F,T); y = matrix(x, nrow=3); identical(colSums(y), c(2, 1, 2, 1));", true);
384384
EidosAssertScriptSuccess_L("x = 1:12; y = matrix(x, nrow=3); identical(colSums(y), c(6, 15, 24, 33));", true);

simd_benchmarks/README.md

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
# SIMD Benchmarks
2+
3+
This directory contains benchmark scripts used during the development of SIMD optimizations for SLiM. These files are provided for internal development use and are **not used in the build of SLiM**.
4+
5+
## Contents
6+
7+
- **`run_benchmarks.sh`** - Shell script that builds SLiM with and without SIMD, runs both benchmark scripts, and reports speedup comparisons.
8+
9+
- **`simd_benchmark.eidos`** - Eidos script that benchmarks SIMD-optimized math functions (`sqrt`, `abs`, `floor`, `ceil`, `round`, `trunc`, `sum`, `product`) on large arrays.
10+
11+
- **`slim_benchmark.slim`** - SLiM simulation benchmark (N=5000, 1Mb chromosome, 5000 generations with selection) for measuring overall simulation performance.
12+
13+
## Author
14+
15+
These benchmarks were developed by Andrew Kern as part of SIMD optimization work for SLiM.
16+
17+
## Usage
18+
19+
These files are not part of the SLiM build system. To run the benchmarks:
20+
21+
```bash
22+
cd simd_benchmarks
23+
./run_benchmarks.sh [num_runs]
24+
```
25+
26+
This will build both SIMD-enabled and scalar versions of SLiM, run the benchmarks, and report the speedup.
27+
28+
## Results
29+
30+
Benchmark results look like the following (example output):
31+
32+
```
33+
$ simd_benchmarks/run_benchmarks.sh
34+
============================================
35+
SIMD Benchmark Runner
36+
============================================
37+
SLiM root: /home/adkern/SLiM
38+
Runs per benchmark: 3
39+
40+
Building with SIMD enabled...
41+
Done.
42+
Building with SIMD disabled...
43+
Done.
44+
45+
============================================
46+
Eidos Math Function Benchmarks
47+
============================================
48+
49+
SIMD Build:
50+
Running Eidos benchmark (SIMD)...
51+
sqrt(): 0.105 sec
52+
abs(): 0.171 sec
53+
floor(): 0.164 sec
54+
ceil(): 0.166 sec
55+
round(): 0.164 sec
56+
trunc(): 0.165 sec
57+
sum(): 0.032 sec
58+
product(): 0.003 sec (1000 elements, 10000 iters)
59+
60+
Scalar Build:
61+
Running Eidos benchmark (Scalar)...
62+
sqrt(): 0.108 sec
63+
abs(): 0.166 sec
64+
floor(): 0.231 sec
65+
ceil(): 0.246 sec
66+
round(): 0.473 sec
67+
trunc(): 0.246 sec
68+
sum(): 0.166 sec
69+
product(): 0.017 sec (1000 elements, 10000 iters)
70+
71+
============================================
72+
SLiM Simulation Benchmark
73+
(N=5000, 5000 generations, selection)
74+
============================================
75+
76+
Running 3 iterations each...
77+
78+
SIMD Build: 12.756s (avg)
79+
Scalar Build: 12.316s (avg)
80+
81+
Speedup: .96x
82+
83+
============================================
84+
Benchmark complete
85+
============================================
86+
```
87+
so the takeaway is that SIMD provided significant speedups for eidos math functions, while the overall SLiM simulation speedup was minimal in this specific benchmark scenario.

0 commit comments

Comments
 (0)