Skip to content

Commit 342bea1

Browse files
Merge pull request #10 from TimGoTheCreator/SoA
Update to SoA
2 parents c17e9ff + 685018a commit 342bea1

16 files changed

Lines changed: 504 additions & 384 deletions

CMakeLists.txt

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,16 @@ add_executable(next ${SRC_FILES} ${ARGPARSE_FILES})
6868
find_package(OpenMP QUIET)
6969
if(OpenMP_CXX_FOUND)
7070
message(STATUS "OpenMP detected — enabling multithreading.")
71-
target_link_libraries(next PRIVATE OpenMP::OpenMP_CXX)
71+
72+
if(MSVC)
73+
# Force MSVC to use the modern LLVM backend (supports OpenMP 3.0+ and size_t)
74+
# We add it as a compiler option because MSVC's default find_package
75+
# often defaults to the legacy /openmp flag.
76+
target_compile_options(next PRIVATE /openmp:llvm)
77+
else()
78+
target_link_libraries(next PRIVATE OpenMP::OpenMP_CXX)
79+
endif()
80+
7281
else()
7382
message(STATUS "OpenMP not found — building in single-threaded mode.")
7483
endif()

docs/install_linux.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ cd ..
4040
```bash
4141
cd examples/TwoBodies
4242
python two_body.py
43-
../../next two_body.txt 8 0.01 0.1 vtu
43+
../../next two_body.txt 8 0.001 0.1 vtu
4444
```
4545

4646
5. **View results**

docs/install_mac.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ cd ..
4040
```bash
4141
cd examples/TwoBodies
4242
python two_body.py
43-
../../next two_body.txt 8 0.01 0.1 vtu
43+
../../next two_body.txt 8 0.001 0.1 vtu
4444
```
4545

4646
5. **View results**

docs/install_mingw.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,14 @@ cd ..
4242
```bash
4343
cd examples/TwoBodies
4444
python two_body.py
45-
../../next.exe two_body.txt 8 0.01 0.1 vtu
4645
```
4746

47+
#### If the copy command worked (copies to project root by default in CMake):
48+
../../next.exe two_body.txt 8 0.001 0.1 vtu
49+
50+
#### If your executable is in build/
51+
../../build/next.exe two_body.txt 8 0.001 0.1 vtu
52+
4853
5. **View results**
4954

5055
Open the `.vtu` output in ParaView.

docs/install_msvc.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
- vcpkg
99
- HDF5 (installed via vcpkg)
1010
- Python 3
11+
- LLVM OpenMP Component (C++ Clang-cl for v143 build tools (x64/x86)
1112

1213
## Steps
1314

@@ -41,9 +42,14 @@ cd ..
4142
```powershell
4243
cd examples/TwoBodies
4344
python two_body.py
44-
..\..\next.exe two_body.txt 8 0.01 0.1 vtu
4545
```
4646

47+
#### If the copy command worked (copies to project root by default in CMake):
48+
../../next.exe two_body.txt 8 0.001 0.1 vtu
49+
50+
#### If your executable is in build/
51+
../../build/Release/next.exe two_body.txt 8 0.001 0.1 vtu
52+
4753
5. **View results**
4854

4955
Open the `.vtu` output in ParaView.

src/begrun.cpp

Lines changed: 36 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -28,76 +28,80 @@ using next::OutputFormat;
2828
int main(int argc, char **argv) {
2929
auto args = next::parse_arguments(argc, argv);
3030

31-
// ASCII banner (raw string literal preserves backslashes and spacing)
3231
static constexpr const char *BANNER = R"NEXTBANNER(
33-
_ _ ________ _________
32+
_ _ ________ _________
3433
| \ | | ____\ \ / /__ __|
3534
| \| | |__ \ V / | |
3635
| . ` | __| > < | |
3736
| |\ | |____ / . \ | |
38-
|_| \_|______/_/ \_\ |_|
37+
|_| \_|______/_/ \_\ |_|
3938
)NEXTBANNER";
4039

41-
// Print banner once at startup
4240
std::cout << BANNER << '\n';
4341

4442
// Set threads and report
4543
omp_set_num_threads(args.threads);
46-
47-
std::cout << " Threads: " << args.threads << "\n";
44+
std::cout << " Threads: " << args.threads << "\n";
4845

4946
#ifdef NEXT_FP64
5047
std::cout << " Precision: FP64\n";
5148
#elif defined(NEXT_FP32)
5249
std::cout << " Precision: FP32\n";
5350
#endif
5451

55-
// Load particles
56-
std::vector<Particle> particles = LoadParticlesFromFile(args.input_file);
52+
// --- SoA UPDATE ---
53+
// LoadParticlesFromFile now returns a single 'Particle' struct
54+
// which internally contains all the parallel vectors.
55+
Particle particles = LoadParticlesFromFile(args.input_file);
56+
57+
std::cout << " Particles: " << particles.size() << "\n";
5758

5859
real simTime = 0;
5960
real nextDump = 0;
6061
int step = 0;
6162
char command;
6263

6364
while (true) {
65+
// Both computeAdaptiveDt and Step now take the SoA object by reference
6466
real dtAdaptive = computeAdaptiveDt(particles, args.dt);
6567
Step(particles, dtAdaptive);
68+
6669
simTime += dtAdaptive;
6770

68-
if (simTime >= nextDump) {
69-
std::string out = "dump_" + std::to_string(step);
71+
if (simTime >= nextDump) {
72+
std::string out = "dump_" + std::to_string(step);
7073

71-
switch (args.format) {
72-
case OutputFormat::VTK:
73-
out += ".vtk";
74-
SaveVTK(particles, out);
75-
break;
74+
switch (args.format) {
75+
case OutputFormat::VTK:
76+
out += ".vtk";
77+
SaveVTK(particles, out);
78+
break;
7679

77-
case OutputFormat::VTU:
78-
out += ".vtu";
79-
SaveVTU(particles, out);
80-
break;
80+
case OutputFormat::VTU:
81+
out += ".vtu";
82+
SaveVTU(particles, out);
83+
break;
8184

82-
case OutputFormat::HDF5:
83-
out += ".hdf5";
84-
SaveHDF5(particles, out);
85-
break;
86-
}
85+
case OutputFormat::HDF5:
86+
out += ".hdf5";
87+
SaveHDF5(particles, out);
88+
break;
89+
}
8790

88-
std::cout << "[Dump " << step << "] t = " << simTime
89-
<< ", file: " << out << "\n";
90-
91-
nextDump += args.dump_interval;
92-
step++;
93-
}
91+
std::cout << "[Dump " << step << "] t = " << simTime
92+
<< ", file: " << out << "\n";
9493

94+
nextDump += args.dump_interval;
95+
step++;
96+
}
9597

98+
// Non-blocking exit check
9699
if (std::cin.rdbuf()->in_avail() > 0) {
97100
std::cin >> command;
98-
if (command == 'q' || command == 'Q')
101+
if (command == 'q' || command == 'Q') {
99102
std::cout << "Exiting...\n";
100-
break;
103+
break;
104+
}
101105
}
102106
}
103107

src/dt/adaptive.h

Lines changed: 33 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -16,27 +16,37 @@
1616
#include <cmath>
1717
#include <vector>
1818

19-
real computeAdaptiveDt(const std::vector<Particle> &p, real base_dt) {
20-
real maxSpeed = 0;
21-
22-
for (const auto &a : p) {
23-
real speed = std::sqrt(a.vx * a.vx + a.vy * a.vy + a.vz * a.vz);
24-
if (speed > maxSpeed)
25-
maxSpeed = speed;
26-
}
27-
28-
maxSpeed = std::min(maxSpeed, real(1e4));
29-
30-
// If everything is basically stationary, use base dt
31-
if (maxSpeed < real(1e-8))
32-
return base_dt;
33-
34-
// Smaller dt when speeds are high
35-
real dt = base_dt / (1 + maxSpeed);
36-
37-
// Clamp dt to a reasonable range
38-
dt = std::max(dt, base_dt * real(0.01)); // never smaller than 1% of base
39-
dt = std::min(dt, base_dt * real(1.0)); // never larger than base
40-
41-
return dt;
19+
/**
20+
* @brief Computes a global adaptive time-step based on the maximum velocity in the system.
21+
* Updated for SoA (Structure of Arrays) for better cache performance.
22+
*/
23+
real computeAdaptiveDt(const Particle &p, real base_dt) {
24+
real maxSpeedSq = 0;
25+
const size_t N = p.size();
26+
27+
// High-speed linear scan through velocity arrays
28+
// The compiler can easily vectorize this with SIMD (AVX/SSE)
29+
for (size_t i = 0; i < N; ++i) {
30+
real speedSq = p.vx[i] * p.vx[i] + p.vy[i] * p.vy[i] + p.vz[i] * p.vz[i];
31+
if (speedSq > maxSpeedSq)
32+
maxSpeedSq = speedSq;
33+
}
34+
35+
real maxSpeed = std::sqrt(maxSpeedSq);
36+
37+
// Safety clamp to prevent dt from exploding or becoming zero
38+
maxSpeed = std::min(maxSpeed, real(1e4));
39+
40+
// If everything is basically stationary, use base dt
41+
if (maxSpeed < real(1e-8))
42+
return base_dt;
43+
44+
// Standard Courant-like condition: smaller dt when speeds are high
45+
real dt = base_dt / (1 + maxSpeed);
46+
47+
// Clamp dt to a reasonable range
48+
dt = std::max(dt, base_dt * real(0.01)); // never smaller than 1% of base
49+
dt = std::min(dt, base_dt * real(1.0)); // never larger than base
50+
51+
return dt;
4252
}

src/dt/softening.h

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -10,43 +10,43 @@
1010
// along with this program. If not, see <http://www.gnu.org/licenses/>.
1111

1212
#pragma once
13+
#include "floatdef.h"
1314
#include <cmath>
15+
#include <algorithm>
1416

15-
/* Softening for BarnesHut */
17+
/**
18+
* @brief Softening for Barnes-Hut node interactions.
19+
* Note: nodeMass and dist are passed as individual reals from the SoA arrays.
20+
*/
1621
inline real nextSoftening(real nodeSize, real nodeMass, real dist) {
17-
// Base softening from node size
18-
real eps_size = nodeSize * real(0.015);
22+
// std::pow(x, 1/3) or std::cbrt is slow.
23+
// In SoA loops, this is often the bottleneck.
24+
real eps_size = nodeSize * real(0.015);
25+
real eps_mass = std::cbrt(nodeMass) * real(0.002);
1926

20-
// Mass-based softening (physical radius proxy)
21-
real eps_mass = std::cbrt(nodeMass) * real(0.002);
27+
// Distance taper: strong at r->0, fades smoothly
28+
real eps_taper = real(1.0) / (real(1.0) + dist * real(10.0));
2229

23-
// Distance taper: strong at r→0, fades smoothly
24-
real eps_taper = real(1.0) / (real(1.0) + dist * real(10.0));
30+
// Combine
31+
real eps = (eps_size + eps_mass) * eps_taper;
2532

26-
// Combine
27-
real eps = (eps_size + eps_mass) * eps_taper;
28-
29-
// Minimum floor
30-
const real eps_min = real(1e-4);
31-
if (eps < eps_min)
32-
eps = eps_min;
33-
34-
return eps;
33+
// Minimum floor - using std::max is cleaner and easier for the compiler to optimize
34+
return std::max(eps, real(1e-4));
3535
}
3636

37-
/* Softening for Gravity kernel */
37+
/**
38+
* @brief Softening for direct particle-particle gravity kernels.
39+
* ma and mb are the masses pulled from the ps.m[i] and ps.m[j] arrays.
40+
*/
3841
inline real pairSoftening(real ma, real mb) {
39-
// Physical radius proxy
40-
real ea = std::cbrt(ma) * real(0.002);
41-
real eb = std::cbrt(mb) * real(0.002);
42-
43-
// Symmetric combination (quadrature)
44-
real eps = std::sqrt(ea * ea + eb * eb);
42+
// Physical radius proxy
43+
real ea = std::cbrt(ma) * real(0.002);
44+
real eb = std::cbrt(mb) * real(0.002);
4545

46-
// Minimum floor
47-
const real eps_min = real(1e-4);
48-
if (eps < eps_min)
49-
eps = eps_min;
46+
// Symmetric combination (quadrature)
47+
real epsSq = ea * ea + eb * eb;
48+
real eps = std::sqrt(epsSq);
5049

51-
return eps; // return epsilon
50+
// Minimum floor
51+
return std::max(eps, real(1e-4));
5252
}

0 commit comments

Comments
 (0)