Skip to content

Commit 2052275

Browse files
committed
speed up SpatialMap smooth() in 2D without periodicity
1 parent 0b2a61d commit 2052275

2 files changed

Lines changed: 155 additions & 37 deletions

File tree

VERSIONS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ Note that not every commit will be logged here; that is what the Github commit h
66

77

88
development head (in the master branch):
9+
add cppcheck support in our cmake config, for development testing purposes
10+
speed up the SpatialMap smooth() method in 2D without periodicity, since this is important for the Chevy et al. competition algorithm
911

1012

1113
version 5.1 (Eidos version 4.1):

core/spatial_map.cpp

Lines changed: 153 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -879,68 +879,184 @@ void SpatialMap::Convolve_S2(SpatialKernel &kernel)
879879
double *new_values_ptr = new_values;
880880

881881
// FIXME: TO BE PARALLELIZED
882-
for (int64_t b = 0; b < dim_b; ++b)
882+
if (!periodic_a_ && !periodic_b_)
883883
{
884-
double coverage_b = (!periodic_b_ && ((b == 0) || (b == dim_b - 1))) ? 0.5 : 1.0;
885-
886-
for (int64_t a = 0; a < dim_a; ++a)
884+
// special-case for the completely non-periodic case with a non-zero kernel total
885+
// see below for code that checks the optimizations done here, since they're a bit tricky
886+
// FIXME this optimization should be extended to the 1D and 3D cases
887+
for (int64_t b = 0; b < dim_b; ++b)
887888
{
888-
double coverage_a = (!periodic_a_ && ((a == 0) || (a == dim_a - 1))) ? 0.5 : 1.0;
889-
double coverage = coverage_a * coverage_b; // handles partial coverage at the edges of the spatial map
889+
double coverage_b = ((b == 0) || (b == dim_b - 1)) ? 0.5 : 1.0;
890890

891-
// calculate the kernel's effect at point (a,b)
892-
double kernel_total = 0.0;
893-
double conv_total = 0.0;
891+
// we want to loop over just the kernel extent that falls inside [0, dim_b - 1]
892+
int64_t kernel_b_first = std::max((int64_t)0, -(b + kernel_b_offset));
893+
int64_t kernel_b_last = std::min(kernel_dim_b - 1, (kernel_dim_b - 1) + (((dim_b - 1) - b) + kernel_b_offset));
894894

895-
for (int64_t kernel_a = 0; kernel_a < kernel_dim_a; kernel_a++)
895+
for (int64_t a = 0; a < dim_a; ++a)
896896
{
897-
int64_t conv_a = a + kernel_a + kernel_a_offset;
897+
double coverage_a = ((a == 0) || (a == dim_a - 1)) ? 0.5 : 1.0;
898+
double coverage = coverage_a * coverage_b; // handles partial coverage at the edges of the spatial map
899+
900+
// we want to loop over just the kernel extent that falls inside [0, dim_a - 1]
901+
int64_t kernel_a_first = std::max((int64_t)0, 0 - ((a - 0) + kernel_a_offset));
902+
int64_t kernel_a_last = std::min(kernel_dim_a - 1, (kernel_dim_a - 1) + (((dim_a - 1) - a) + kernel_a_offset));
903+
904+
// calculate the kernel's effect at point (a,b)
905+
// BCH 10/2/2025: I tried adding a special case here for the middle portion where the kernel
906+
// is not clipped at all and the kernel_total value is constant; it was not worthwhile,
907+
// testing for whether we are in that case took time, and I think totalling the kernel
908+
// in the loop below pretty much pipelines out so getting rid of it is inconsequential.
909+
double kernel_total = 0.0;
910+
double conv_total = 0.0;
898911

899-
// handle bounds: either clip or wrap
900-
if ((conv_a < 0) || (conv_a >= dim_a))
912+
for (int64_t kernel_a = kernel_a_first; kernel_a <= kernel_a_last; kernel_a++)
901913
{
902-
if (!periodic_a_)
903-
continue;
914+
int64_t conv_a = a + kernel_a + kernel_a_offset;
904915

905-
// periodicity: assume the two edges have identical values, skip over the edge value on the opposite side
906-
while (conv_a < 0)
907-
conv_a += (dim_a - 1); // move -1 to dim - 2
908-
while (conv_a >= dim_a)
909-
conv_a -= (dim_a - 1); // move dim to 1
916+
for (int64_t kernel_b = kernel_b_first; kernel_b <= kernel_b_last; kernel_b++)
917+
{
918+
int64_t conv_b = b + kernel_b + kernel_b_offset;
919+
920+
// this point is within bounds; add it in to the totals
921+
double kernel_value = kernel_values[kernel_a + kernel_b * kernel_dim_a] * coverage;
922+
double pixel_value = values_[conv_a + conv_b * dim_a];
923+
924+
// we keep a total of the kernel values that were within bounds, for this point
925+
kernel_total += kernel_value;
926+
927+
// and we keep a total of the convolution - kernel values times pixel values
928+
conv_total += kernel_value * pixel_value;
929+
}
910930
}
911931

912-
for (int64_t kernel_b = 0; kernel_b < kernel_dim_b; kernel_b++)
932+
*(new_values_ptr++) = ((kernel_total > 0) ? (conv_total / kernel_total) : 0);
933+
}
934+
}
935+
}
936+
else
937+
{
938+
// general case that handles periodicity
939+
for (int64_t b = 0; b < dim_b; ++b)
940+
{
941+
double coverage_b = (!periodic_b_ && ((b == 0) || (b == dim_b - 1))) ? 0.5 : 1.0;
942+
943+
for (int64_t a = 0; a < dim_a; ++a)
944+
{
945+
double coverage_a = (!periodic_a_ && ((a == 0) || (a == dim_a - 1))) ? 0.5 : 1.0;
946+
double coverage = coverage_a * coverage_b; // handles partial coverage at the edges of the spatial map
947+
948+
// calculate the kernel's effect at point (a,b)
949+
double kernel_total = 0.0;
950+
double conv_total = 0.0;
951+
952+
for (int64_t kernel_a = 0; kernel_a < kernel_dim_a; kernel_a++)
913953
{
914-
int64_t conv_b = b + kernel_b + kernel_b_offset;
954+
int64_t conv_a = a + kernel_a + kernel_a_offset;
915955

916956
// handle bounds: either clip or wrap
917-
if ((conv_b < 0) || (conv_b >= dim_b))
957+
if ((conv_a < 0) || (conv_a >= dim_a))
918958
{
919-
if (!periodic_b_)
959+
if (!periodic_a_)
920960
continue;
921961

922962
// periodicity: assume the two edges have identical values, skip over the edge value on the opposite side
923-
while (conv_b < 0)
924-
conv_b += (dim_b - 1); // move -1 to dim - 2
925-
while (conv_b >= dim_b)
926-
conv_b -= (dim_b - 1); // move dim to 1
963+
while (conv_a < 0)
964+
conv_a += (dim_a - 1); // move -1 to dim - 2
965+
while (conv_a >= dim_a)
966+
conv_a -= (dim_a - 1); // move dim to 1
927967
}
928968

929-
// this point is within bounds; add it in to the totals
930-
double kernel_value = kernel_values[kernel_a + kernel_b * kernel_dim_a] * coverage;
931-
double pixel_value = values_[conv_a + conv_b * dim_a];
969+
for (int64_t kernel_b = 0; kernel_b < kernel_dim_b; kernel_b++)
970+
{
971+
int64_t conv_b = b + kernel_b + kernel_b_offset;
972+
973+
// handle bounds: either clip or wrap
974+
if ((conv_b < 0) || (conv_b >= dim_b))
975+
{
976+
if (!periodic_b_)
977+
continue;
978+
979+
// periodicity: assume the two edges have identical values, skip over the edge value on the opposite side
980+
while (conv_b < 0)
981+
conv_b += (dim_b - 1); // move -1 to dim - 2
982+
while (conv_b >= dim_b)
983+
conv_b -= (dim_b - 1); // move dim to 1
984+
}
985+
986+
// this point is within bounds; add it in to the totals
987+
double kernel_value = kernel_values[kernel_a + kernel_b * kernel_dim_a] * coverage;
988+
double pixel_value = values_[conv_a + conv_b * dim_a];
989+
990+
// we keep a total of the kernel values that were within bounds, for this point
991+
kernel_total += kernel_value;
992+
993+
// and we keep a total of the convolution - kernel values times pixel values
994+
conv_total += kernel_value * pixel_value;
995+
}
996+
}
997+
998+
*(new_values_ptr++) = ((kernel_total > 0) ? (conv_total / kernel_total) : 0);
999+
}
1000+
}
1001+
}
1002+
1003+
#if 0
1004+
// Check the values computed above by the optimized algorithm against the simple algorithm
1005+
// test_smooth2D.slim is a test file that can be used with this validation code
1006+
new_values_ptr = new_values;
1007+
1008+
if (!periodic_a_ && !periodic_b_)
1009+
{
1010+
for (int64_t b = 0; b < dim_b; ++b)
1011+
{
1012+
double coverage_b = (!periodic_b_ && ((b == 0) || (b == dim_b - 1))) ? 0.5 : 1.0;
1013+
1014+
for (int64_t a = 0; a < dim_a; ++a)
1015+
{
1016+
double coverage_a = (!periodic_a_ && ((a == 0) || (a == dim_a - 1))) ? 0.5 : 1.0;
1017+
double coverage = coverage_a * coverage_b; // handles partial coverage at the edges of the spatial map
1018+
1019+
// calculate the kernel's effect at point (a,b)
1020+
double kernel_total = 0.0;
1021+
double conv_total = 0.0;
1022+
1023+
for (int64_t kernel_a = 0; kernel_a < kernel_dim_a; kernel_a++)
1024+
{
1025+
int64_t conv_a = a + kernel_a + kernel_a_offset;
9321026

933-
// we keep a total of the kernel values that were within bounds, for this point
934-
kernel_total += kernel_value;
1027+
// handle bounds: either clip or wrap
1028+
if ((conv_a < 0) || (conv_a >= dim_a))
1029+
continue;
9351030

936-
// and we keep a total of the convolution - kernel values times pixel values
937-
conv_total += kernel_value * pixel_value;
1031+
for (int64_t kernel_b = 0; kernel_b < kernel_dim_b; kernel_b++)
1032+
{
1033+
int64_t conv_b = b + kernel_b + kernel_b_offset;
1034+
1035+
// handle bounds: either clip or wrap
1036+
if ((conv_b < 0) || (conv_b >= dim_b))
1037+
continue;
1038+
1039+
// this point is within bounds; add it in to the totals
1040+
double kernel_value = kernel_values[kernel_a + kernel_b * kernel_dim_a] * coverage;
1041+
double pixel_value = values_[conv_a + conv_b * dim_a];
1042+
1043+
// we keep a total of the kernel values that were within bounds, for this point
1044+
kernel_total += kernel_value;
1045+
1046+
// and we keep a total of the convolution - kernel values times pixel values
1047+
conv_total += kernel_value * pixel_value;
1048+
}
9381049
}
1050+
1051+
double check_value = ((kernel_total > 0) ? (conv_total / kernel_total) : 0);
1052+
double calculated_value = *(new_values_ptr++);
1053+
1054+
if (check_value != calculated_value)
1055+
std::cout << "Convolve_S2 optimization mismatch: " << check_value << " versus " << calculated_value << std::endl;
9391056
}
940-
941-
*(new_values_ptr++) = ((kernel_total > 0) ? (conv_total / kernel_total) : 0);
9421057
}
9431058
}
1059+
#endif
9441060

9451061
TakeOverMallocedValues(new_values, 2, grid_size_); // takes new_values from us
9461062
}

0 commit comments

Comments
 (0)