Skip to content

Commit 5e7aa70

Browse files
authored
Micro optimizations (dftd4#351)
1 parent 7fa3ab8 commit 5e7aa70

2 files changed

Lines changed: 44 additions & 27 deletions

File tree

src/dftd4/damping/atm.f90

Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ module dftd4_damping_atm
2424

2525
public :: get_atm_dispersion
2626

27+
real(wp), parameter :: third = 1.0_wp / 3.0_wp
2728

2829
contains
2930

@@ -134,16 +135,17 @@ subroutine get_atm_dispersion_energy(mol, trans, cutoff, s9, a1, a2, alp, r4r2,
134135
integer :: iat, jat, kat, izp, jzp, kzp, jtr, ktr
135136
real(wp) :: vij(3), vjk(3), vik(3), r2ij, r2jk, r2ik, c6ij, c6jk, c6ik, triple
136137
real(wp) :: r0ij, r0jk, r0ik, r0, r1, r2, r3, r5, rr, fdmp, ang
137-
real(wp) :: cutoff2, c9, dE
138+
real(wp) :: cutoff2, c9, dE, alp3
138139

139140
! Thread-private arrays for reduction
140141
! Set to 0 explicitly as the shared variants are potentially non-zero (inout)
141142
real(wp), allocatable :: energy_local(:)
142143

143144
cutoff2 = cutoff*cutoff
145+
alp3 = alp / 3.0_wp
144146

145147
!$omp parallel default(none) &
146-
!$omp shared(mol, trans, c6, s9, a1, a2, alp, r4r2, cutoff2) &
148+
!$omp shared(mol, trans, c6, s9, a1, a2, alp3, r4r2, cutoff2) &
147149
!$omp private(iat, jat, kat, izp, jzp, kzp, jtr, ktr, vij, vjk, vik, &
148150
!$omp& r2ij, r2jk, r2ik, c6ij, c6jk, c6ik, triple, r0ij, r0jk, r0ik, r0, &
149151
!$omp& r1, r2, r3, r5, rr, fdmp, ang, c9, dE) &
@@ -174,25 +176,28 @@ subroutine get_atm_dispersion_energy(mol, trans, cutoff, s9, a1, a2, alp, r4r2,
174176
vik(:) = mol%xyz(:, kat) + trans(:, ktr) - mol%xyz(:, iat)
175177
r2ik = vik(1)*vik(1) + vik(2)*vik(2) + vik(3)*vik(3)
176178
if (r2ik > cutoff2 .or. r2ik < epsilon(1.0_wp)) cycle
177-
vjk(:) = mol%xyz(:, kat) + trans(:, ktr) - mol%xyz(:, jat) &
178-
& - trans(:, jtr)
179+
180+
! vjk(:) = mol%xyz(:, kat) + trans(:, ktr)
181+
! - mol%xyz(:, jat) - trans(:, jtr)
182+
vjk(:) = vik(:) - vij(:)
179183
r2jk = vjk(1)*vjk(1) + vjk(2)*vjk(2) + vjk(3)*vjk(3)
180184
if (r2jk > cutoff2 .or. r2jk < epsilon(1.0_wp)) cycle
185+
181186
r2 = r2ij*r2ik*r2jk
182187
r1 = sqrt(r2)
183188
r3 = r2 * r1
184189
r5 = r3 * r2
185190

186-
fdmp = 1.0_wp / (1.0_wp + 6.0_wp * (r0 / r1)**(alp / 3.0_wp))
187-
ang = 0.375_wp*(r2ij + r2jk - r2ik)*(r2ij - r2jk + r2ik)&
191+
fdmp = 1.0_wp / (1.0_wp + 6.0_wp * (r0 / r1)**alp3)
192+
ang = 0.375_wp*(r2ij + r2jk - r2ik)*(r2ij - r2jk + r2ik) &
188193
& *(-r2ij + r2jk + r2ik) / r5 + 1.0_wp / r3
189194

190195
rr = ang*fdmp
191196

192-
dE = rr * c9 * triple
193-
energy_local(iat) = energy_local(iat) - dE/3
194-
energy_local(jat) = energy_local(jat) - dE/3
195-
energy_local(kat) = energy_local(kat) - dE/3
197+
dE = rr * c9 * triple * third
198+
energy_local(iat) = energy_local(iat) - dE
199+
energy_local(jat) = energy_local(jat) - dE
200+
energy_local(kat) = energy_local(kat) - dE
196201
end do
197202
end do
198203
end do
@@ -263,7 +268,7 @@ subroutine get_atm_dispersion_derivs(mol, trans, cutoff, s9, a1, a2, alp, r4r2,
263268
integer :: iat, jat, kat, izp, jzp, kzp, jtr, ktr
264269
real(wp) :: vij(3), vjk(3), vik(3), r2ij, r2jk, r2ik, c6ij, c6jk, c6ik, triple
265270
real(wp) :: r0ij, r0jk, r0ik, r0, r1, r2, r3, r5, rr, fdmp, dfdmp, ang, dang
266-
real(wp) :: cutoff2, c9, dE, dGij(3), dGjk(3), dGik(3), dS(3, 3)
271+
real(wp) :: cutoff2, alp3, c9, dE, dE_third, dGij(3), dGjk(3), dGik(3), dS(3, 3)
267272

268273
! Thread-private arrays for reduction
269274
! Set to 0 explicitly as the shared variants are potentially non-zero (inout)
@@ -274,13 +279,15 @@ subroutine get_atm_dispersion_derivs(mol, trans, cutoff, s9, a1, a2, alp, r4r2,
274279
real(wp), allocatable :: sigma_local(:, :)
275280

276281
cutoff2 = cutoff*cutoff
282+
alp3 = alp / 3.0_wp
277283

278284
!$omp parallel default(none) &
279-
!$omp shared(mol, trans, c6, s9, a1, a2, alp, r4r2, cutoff2, dc6dcn, dc6dq) &
285+
!$omp shared(mol, trans, c6, s9, a1, a2, alp, alp3, r4r2, cutoff2, &
286+
!$omp& dc6dcn, dc6dq) &
280287
!$omp private(iat, jat, kat, izp, jzp, kzp, jtr, ktr, vij, vjk, vik, &
281288
!$omp& r2ij, r2jk, r2ik, c6ij, c6jk, c6ik, triple, r0ij, r0jk, r0ik, r0, &
282-
!$omp& r1, r2, r3, r5, rr, fdmp, dfdmp, ang, dang, c9, dE, dGij, dGjk, &
283-
!$omp& dGik, dS) &
289+
!$omp& r1, r2, r3, r5, rr, fdmp, dfdmp, ang, dang, c9, dE, dE_third, dGij, &
290+
!$omp& dGjk, dGik, dS) &
284291
!$omp shared(energy, gradient, sigma, dEdcn, dEdq) &
285292
!$omp private(energy_local, gradient_local, sigma_local, dEdcn_local, &
286293
!$omp& dEdq_local)
@@ -313,22 +320,25 @@ subroutine get_atm_dispersion_derivs(mol, trans, cutoff, s9, a1, a2, alp, r4r2,
313320
vik(:) = mol%xyz(:, kat) + trans(:, ktr) - mol%xyz(:, iat)
314321
r2ik = vik(1)*vik(1) + vik(2)*vik(2) + vik(3)*vik(3)
315322
if (r2ik > cutoff2 .or. r2ik < epsilon(1.0_wp)) cycle
316-
vjk(:) = mol%xyz(:, kat) + trans(:, ktr) - mol%xyz(:, jat) &
317-
& - trans(:, jtr)
323+
324+
! vjk(:) = mol%xyz(:, kat) + trans(:, ktr)
325+
! - mol%xyz(:, jat) - trans(:, jtr)
326+
vjk(:) = vik(:) - vij(:)
318327
r2jk = vjk(1)*vjk(1) + vjk(2)*vjk(2) + vjk(3)*vjk(3)
319328
if (r2jk > cutoff2 .or. r2jk < epsilon(1.0_wp)) cycle
329+
320330
r2 = r2ij*r2ik*r2jk
321331
r1 = sqrt(r2)
322332
r3 = r2 * r1
323333
r5 = r3 * r2
324334

325-
fdmp = 1.0_wp / (1.0_wp + 6.0_wp * (r0 / r1)**(alp / 3.0_wp))
335+
fdmp = 1.0_wp / (1.0_wp + 6.0_wp * (r0 / r1)**alp3)
326336
ang = 0.375_wp*(r2ij + r2jk - r2ik)*(r2ij - r2jk + r2ik)&
327337
& *(-r2ij + r2jk + r2ik) / r5 + 1.0_wp / r3
328338

329339
rr = ang*fdmp
330340

331-
dfdmp = -2.0_wp * alp * (r0 / r1)**(alp / 3.0_wp) * fdmp**2
341+
dfdmp = -2.0_wp * alp * (r0 / r1)**alp3 * fdmp**2
332342

333343
! d/drij
334344
dang = -0.375_wp * (r2ij**3 + r2ij**2 * (r2jk + r2ik)&
@@ -352,9 +362,10 @@ subroutine get_atm_dispersion_derivs(mol, trans, cutoff, s9, a1, a2, alp, r4r2,
352362
dGjk(:) = c9 * (-dang * fdmp + ang * dfdmp) / r2jk * vjk
353363

354364
dE = rr * c9 * triple
355-
energy_local(iat) = energy_local(iat) - dE/3
356-
energy_local(jat) = energy_local(jat) - dE/3
357-
energy_local(kat) = energy_local(kat) - dE/3
365+
dE_third = dE * third
366+
energy_local(iat) = energy_local(iat) - dE_third
367+
energy_local(jat) = energy_local(jat) - dE_third
368+
energy_local(kat) = energy_local(kat) - dE_third
358369

359370
gradient_local(:, iat) = gradient_local(:, iat) - dGij - dGik
360371
gradient_local(:, jat) = gradient_local(:, jat) + dGij - dGjk

src/dftd4/damping/rational.f90

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ module dftd4_damping_rational
5151

5252
end type rational_damping_param
5353

54+
real(wp), parameter :: sixth = 1.0_wp / 6.0_wp
55+
5456

5557
contains
5658

@@ -478,17 +480,18 @@ subroutine get_pairwise_dispersion3(self, mol, trans, cutoff, r4r2, c6, energy)
478480
integer :: iat, jat, kat, izp, jzp, kzp, jtr, ktr
479481
real(wp) :: vij(3), vjk(3), vik(3), r2ij, r2jk, r2ik, c6ij, c6jk, c6ik, triple
480482
real(wp) :: r0ij, r0jk, r0ik, r0, r1, r2, r3, r5, rr, fdmp, ang
481-
real(wp) :: cutoff2, c9, dE
483+
real(wp) :: cutoff2, c9, dE, alp3
482484

483485
! Thread-private arrays for reduction
484486
! Set to 0 explicitly as the shared variants are potentially non-zero (inout)
485487
real(wp), allocatable :: energy_local(:, :)
486488

487489
if (abs(self%s9) < epsilon(1.0_wp)) return
488490
cutoff2 = cutoff*cutoff
491+
alp3 = self%alp / 3.0_wp
489492

490493
!$omp parallel default(none) &
491-
!$omp shared(mol, trans, c6, r4r2, cutoff2, self) &
494+
!$omp shared(mol, trans, c6, r4r2, cutoff2, alp3, self) &
492495
!$omp private(iat, jat, kat, izp, jzp, kzp, jtr, ktr, vij, vjk, vik, &
493496
!$omp& r2ij, r2jk, r2ik, c6ij, c6jk, c6ik, triple, r0ij, r0jk, r0ik, r0, &
494497
!$omp& r1, r2, r3, r5, rr, fdmp, ang, c9, dE) &
@@ -519,22 +522,25 @@ subroutine get_pairwise_dispersion3(self, mol, trans, cutoff, r4r2, c6, energy)
519522
vik(:) = mol%xyz(:, kat) + trans(:, ktr) - mol%xyz(:, iat)
520523
r2ik = vik(1)*vik(1) + vik(2)*vik(2) + vik(3)*vik(3)
521524
if (r2ik > cutoff2 .or. r2ik < epsilon(1.0_wp)) cycle
522-
vjk(:) = mol%xyz(:, kat) + trans(:, ktr) - mol%xyz(:, jat) &
523-
& - trans(:, jtr)
525+
526+
! vjk(:) = mol%xyz(:, kat) + trans(:, ktr)
527+
! - mol%xyz(:, jat) - trans(:, jtr)
528+
vjk(:) = vik(:) - vij(:)
524529
r2jk = vjk(1)*vjk(1) + vjk(2)*vjk(2) + vjk(3)*vjk(3)
525530
if (r2jk > cutoff2 .or. r2jk < epsilon(1.0_wp)) cycle
531+
526532
r2 = r2ij*r2ik*r2jk
527533
r1 = sqrt(r2)
528534
r3 = r2 * r1
529535
r5 = r3 * r2
530536

531-
fdmp = 1.0_wp / (1.0_wp + 6.0_wp * (r0 / r1)**(self%alp / 3.0_wp))
537+
fdmp = 1.0_wp / (1.0_wp + 6.0_wp * (r0 / r1)**alp3)
532538
ang = 0.375_wp*(r2ij + r2jk - r2ik)*(r2ij - r2jk + r2ik)&
533539
& *(-r2ij + r2jk + r2ik) / r5 + 1.0_wp / r3
534540

535541
rr = ang*fdmp
536542

537-
dE = rr * c9 * triple/6
543+
dE = rr * c9 * triple * sixth
538544
energy_local(jat, iat) = energy_local(jat, iat) - dE
539545
energy_local(kat, iat) = energy_local(kat, iat) - dE
540546
energy_local(iat, jat) = energy_local(iat, jat) - dE

0 commit comments

Comments
 (0)