|
10 | 10 | // along with this program. If not, see <http://www.gnu.org/licenses/>. |
11 | 11 |
|
12 | 12 | #pragma once |
13 | | -#include "../struct/particle.h" |
14 | 13 | #include "floatdef.h" |
15 | 14 | #include "octree.h" |
16 | | -#include <vector> |
| 15 | +#include "particle.h" |
17 | 16 | #include <memory> |
18 | 17 | #include <algorithm> |
19 | | - |
20 | | -inline void Step(std::vector<Particle> &p, real dt) { |
21 | | - if (p.empty()) return; |
22 | | - |
23 | | - real theta = 0.5; |
24 | | - real half = dt * real(0.5); |
25 | | - |
26 | | - // Helper lambda that returns a unique_ptr |
27 | | - auto buildTree = [&]() -> std::unique_ptr<Octree> { |
28 | | - real minx = +1e30, miny = +1e30, minz = +1e30; |
29 | | - real maxx = -1e30, maxy = -1e30, maxz = -1e30; |
30 | | - |
31 | | - for (const auto &a : p) { |
32 | | - minx = std::min(minx, a.x); miny = std::min(miny, a.y); minz = std::min(minz, a.z); |
33 | | - maxx = std::max(maxx, a.x); maxy = std::max(maxy, a.y); maxz = std::max(maxz, a.z); |
| 18 | +#include <omp.h> |
| 19 | + |
| 20 | +/** |
| 21 | + * @brief Performs a complete Leapfrog Step (Kick-Drift-Kick) using SoA data. |
| 22 | + */ |
| 23 | +inline void Step(ParticleSystem &ps, real dt) { |
| 24 | + if (ps.size() == 0) return; |
| 25 | + |
| 26 | + const real theta = 0.5; |
| 27 | + const real half = dt * real(0.5); |
| 28 | + const int N = static_cast<int>(ps.size()); |
| 29 | + |
| 30 | + // Helper lambda to build the tree using the ParticleSystem indices |
| 31 | + auto buildTree = [&]() -> std::unique_ptr<Octree> { |
| 32 | + real minx = 1e30, miny = 1e30, minz = 1e30; |
| 33 | + real maxx = -1e30, maxy = -1e30, maxz = -1e30; |
| 34 | + |
| 35 | + // Bounding box calculation (SoA access is very fast here) |
| 36 | + for (int i = 0; i < N; ++i) { |
| 37 | + minx = std::min(minx, ps.x[i]); miny = std::min(miny, ps.y[i]); minz = std::min(minz, ps.z[i]); |
| 38 | + maxx = std::max(maxx, ps.x[i]); maxy = std::max(maxy, ps.y[i]); maxz = std::max(maxz, ps.z[i]); |
| 39 | + } |
| 40 | + |
| 41 | + real cx = (minx + maxx) * 0.5; |
| 42 | + real cy = (miny + maxy) * 0.5; |
| 43 | + real cz = (minz + maxz) * 0.5; |
| 44 | + real size = std::max({maxx - minx, maxy - miny, maxz - minz}) * 0.5; |
| 45 | + |
| 46 | + if (size <= 0) size = 1.0; |
| 47 | + |
| 48 | + auto root = std::make_unique<Octree>(cx, cy, cz, size); |
| 49 | + |
| 50 | + // Insert particle indices 0 to N-1 |
| 51 | + for (int i = 0; i < N; ++i) { |
| 52 | + root->insert(i, ps); |
| 53 | + } |
| 54 | + |
| 55 | + root->computeMass(ps); |
| 56 | + return root; |
| 57 | + }; |
| 58 | + |
| 59 | + // --- First Kick (dt/2) --- |
| 60 | + { |
| 61 | + std::unique_ptr<Octree> root = buildTree(); |
| 62 | + |
| 63 | + #pragma omp parallel for schedule(dynamic, 64) |
| 64 | + for (int i = 0; i < N; ++i) { |
| 65 | + real ax = 0, ay = 0, az = 0; |
| 66 | + bhAccel(root.get(), i, ps, theta, ax, ay, az); |
| 67 | + |
| 68 | + ps.vx[i] += ax * half; |
| 69 | + ps.vy[i] += ay * half; |
| 70 | + ps.vz[i] += az * half; |
| 71 | + } |
34 | 72 | } |
35 | 73 |
|
36 | | - real cx = (minx + maxx) * 0.5; |
37 | | - real cy = (miny + maxy) * 0.5; |
38 | | - real cz = (minz + maxz) * 0.5; |
39 | | - real size = std::max({maxx - minx, maxy - miny, maxz - minz}) * real(0.5); |
40 | | - |
41 | | - if (size <= 0) size = 1; |
42 | | - |
43 | | - // Create the owned root |
44 | | - auto root = std::make_unique<Octree>(cx, cy, cz, size); |
45 | | - |
46 | | - for (auto &a : p) |
47 | | - root->insert(&a); |
48 | | - |
49 | | - root->computeMass(); |
50 | | - return root; |
51 | | - }; |
| 74 | + // --- Drift (dt) --- |
| 75 | + // Contiguous memory access makes this loop ideal for SIMD |
| 76 | + #pragma omp parallel for schedule(static) |
| 77 | + for (int i = 0; i < N; ++i) { |
| 78 | + ps.x[i] += ps.vx[i] * dt; |
| 79 | + ps.y[i] += ps.vy[i] * dt; |
| 80 | + ps.z[i] += ps.vz[i] * dt; |
| 81 | + } |
52 | 82 |
|
53 | | - // --- First Kick (dt/2) --- |
54 | | - { |
55 | | - std::unique_ptr<Octree> root = buildTree(); |
| 83 | + // --- Second Kick (dt/2) --- |
| 84 | + { |
| 85 | + std::unique_ptr<Octree> root = buildTree(); |
56 | 86 |
|
57 | | -#pragma omp parallel for schedule(static) |
58 | | - for (int i = 0; i < (int)p.size(); i++) { |
59 | | - real ax = 0, ay = 0, az = 0; |
60 | | - // Pass the raw pointer via .get() for the traversal |
61 | | - bhAccel(root.get(), p[i], theta, ax, ay, az); |
| 87 | + #pragma omp parallel for schedule(dynamic, 64) |
| 88 | + for (int i = 0; i < N; ++i) { |
| 89 | + real ax = 0, ay = 0, az = 0; |
| 90 | + bhAccel(root.get(), i, ps, theta, ax, ay, az); |
62 | 91 |
|
63 | | - p[i].vx += ax * half; |
64 | | - p[i].vy += ay * half; |
65 | | - p[i].vz += az * half; |
66 | | - } |
67 | | - // No 'delete root' needed! It happens automatically here. |
68 | | - } |
69 | | - |
70 | | - // --- Drift (dt) --- |
71 | | -#pragma omp parallel for schedule(static) |
72 | | - for (int i = 0; i < (int)p.size(); i++) { |
73 | | - p[i].x += p[i].vx * dt; |
74 | | - p[i].y += p[i].vy * dt; |
75 | | - p[i].z += p[i].vz * dt; |
76 | | - } |
77 | | - |
78 | | - // --- Second Kick (dt/2) --- |
79 | | - { |
80 | | - std::unique_ptr<Octree> root = buildTree(); |
81 | | - |
82 | | -#pragma omp parallel for schedule(static) |
83 | | - for (int i = 0; i < (int)p.size(); i++) { |
84 | | - real ax = 0, ay = 0, az = 0; |
85 | | - bhAccel(root.get(), p[i], theta, ax, ay, az); |
86 | | - |
87 | | - p[i].vx += ax * half; |
88 | | - p[i].vy += ay * half; |
89 | | - p[i].vz += az * half; |
| 92 | + ps.vx[i] += ax * half; |
| 93 | + ps.vy[i] += ay * half; |
| 94 | + ps.vz[i] += az * half; |
| 95 | + } |
90 | 96 | } |
91 | | - } |
92 | 97 | } |
0 commit comments