@@ -879,68 +879,184 @@ void SpatialMap::Convolve_S2(SpatialKernel &kernel)
879879 double *new_values_ptr = new_values;
880880
881881 // FIXME: TO BE PARALLELIZED
882- for ( int64_t b = 0 ; b < dim_b; ++b )
882+ if (!periodic_a_ && !periodic_b_ )
883883 {
884- double coverage_b = (!periodic_b_ && ((b == 0 ) || (b == dim_b - 1 ))) ? 0.5 : 1.0 ;
885-
886- for (int64_t a = 0 ; a < dim_a; ++a)
884+ // special-case for the completely non-periodic case with a non-zero kernel total
885+ // see below for code that checks the optimizations done here, since they're a bit tricky
886+ // FIXME this optimization should be extended to the 1D and 3D cases
887+ for (int64_t b = 0 ; b < dim_b; ++b)
887888 {
888- double coverage_a = (!periodic_a_ && ((a == 0 ) || (a == dim_a - 1 ))) ? 0.5 : 1.0 ;
889- double coverage = coverage_a * coverage_b; // handles partial coverage at the edges of the spatial map
889+ double coverage_b = ((b == 0 ) || (b == dim_b - 1 )) ? 0.5 : 1.0 ;
890890
891- // calculate the kernel's effect at point (a,b)
892- double kernel_total = 0.0 ;
893- double conv_total = 0.0 ;
891+ // we want to loop over just the kernel extent that falls inside [0, dim_b - 1]
892+ int64_t kernel_b_first = std::max (( int64_t ) 0 , -(b + kernel_b_offset)) ;
893+ int64_t kernel_b_last = std::min (kernel_dim_b - 1 , (kernel_dim_b - 1 ) + (((dim_b - 1 ) - b) + kernel_b_offset)) ;
894894
895- for (int64_t kernel_a = 0 ; kernel_a < kernel_dim_a; kernel_a++ )
895+ for (int64_t a = 0 ; a < dim_a; ++a )
896896 {
897- int64_t conv_a = a + kernel_a + kernel_a_offset;
897+ double coverage_a = ((a == 0 ) || (a == dim_a - 1 )) ? 0.5 : 1.0 ;
898+ double coverage = coverage_a * coverage_b; // handles partial coverage at the edges of the spatial map
899+
900+ // we want to loop over just the kernel extent that falls inside [0, dim_a - 1]
901+ int64_t kernel_a_first = std::max ((int64_t )0 , 0 - ((a - 0 ) + kernel_a_offset));
902+ int64_t kernel_a_last = std::min (kernel_dim_a - 1 , (kernel_dim_a - 1 ) + (((dim_a - 1 ) - a) + kernel_a_offset));
903+
904+ // calculate the kernel's effect at point (a,b)
905+ // BCH 10/2/2025: I tried adding a special case here for the middle portion where the kernel
906+ // is not clipped at all and the kernel_total value is constant; it was not worthwhile,
907+ // testing for whether we are in that case took time, and I think totalling the kernel
908+ // in the loop below pretty much pipelines out so getting rid of it is inconsequential.
909+ double kernel_total = 0.0 ;
910+ double conv_total = 0.0 ;
898911
899- // handle bounds: either clip or wrap
900- if ((conv_a < 0 ) || (conv_a >= dim_a))
912+ for (int64_t kernel_a = kernel_a_first; kernel_a <= kernel_a_last; kernel_a++)
901913 {
902- if (!periodic_a_)
903- continue ;
914+ int64_t conv_a = a + kernel_a + kernel_a_offset;
904915
905- // periodicity: assume the two edges have identical values, skip over the edge value on the opposite side
906- while (conv_a < 0 )
907- conv_a += (dim_a - 1 ); // move -1 to dim - 2
908- while (conv_a >= dim_a)
909- conv_a -= (dim_a - 1 ); // move dim to 1
916+ for (int64_t kernel_b = kernel_b_first; kernel_b <= kernel_b_last; kernel_b++)
917+ {
918+ int64_t conv_b = b + kernel_b + kernel_b_offset;
919+
920+ // this point is within bounds; add it in to the totals
921+ double kernel_value = kernel_values[kernel_a + kernel_b * kernel_dim_a] * coverage;
922+ double pixel_value = values_[conv_a + conv_b * dim_a];
923+
924+ // we keep a total of the kernel values that were within bounds, for this point
925+ kernel_total += kernel_value;
926+
927+ // and we keep a total of the convolution - kernel values times pixel values
928+ conv_total += kernel_value * pixel_value;
929+ }
910930 }
911931
912- for (int64_t kernel_b = 0 ; kernel_b < kernel_dim_b; kernel_b++)
932+ *(new_values_ptr++) = ((kernel_total > 0 ) ? (conv_total / kernel_total) : 0 );
933+ }
934+ }
935+ }
936+ else
937+ {
938+ // general case that handles periodicity
939+ for (int64_t b = 0 ; b < dim_b; ++b)
940+ {
941+ double coverage_b = (!periodic_b_ && ((b == 0 ) || (b == dim_b - 1 ))) ? 0.5 : 1.0 ;
942+
943+ for (int64_t a = 0 ; a < dim_a; ++a)
944+ {
945+ double coverage_a = (!periodic_a_ && ((a == 0 ) || (a == dim_a - 1 ))) ? 0.5 : 1.0 ;
946+ double coverage = coverage_a * coverage_b; // handles partial coverage at the edges of the spatial map
947+
948+ // calculate the kernel's effect at point (a,b)
949+ double kernel_total = 0.0 ;
950+ double conv_total = 0.0 ;
951+
952+ for (int64_t kernel_a = 0 ; kernel_a < kernel_dim_a; kernel_a++)
913953 {
914- int64_t conv_b = b + kernel_b + kernel_b_offset ;
954+ int64_t conv_a = a + kernel_a + kernel_a_offset ;
915955
916956 // handle bounds: either clip or wrap
917- if ((conv_b < 0 ) || (conv_b >= dim_b ))
957+ if ((conv_a < 0 ) || (conv_a >= dim_a ))
918958 {
919- if (!periodic_b_ )
959+ if (!periodic_a_ )
920960 continue ;
921961
922962 // periodicity: assume the two edges have identical values, skip over the edge value on the opposite side
923- while (conv_b < 0 )
924- conv_b += (dim_b - 1 ); // move -1 to dim - 2
925- while (conv_b >= dim_b )
926- conv_b -= (dim_b - 1 ); // move dim to 1
963+ while (conv_a < 0 )
964+ conv_a += (dim_a - 1 ); // move -1 to dim - 2
965+ while (conv_a >= dim_a )
966+ conv_a -= (dim_a - 1 ); // move dim to 1
927967 }
928968
929- // this point is within bounds; add it in to the totals
930- double kernel_value = kernel_values[kernel_a + kernel_b * kernel_dim_a] * coverage;
931- double pixel_value = values_[conv_a + conv_b * dim_a];
969+ for (int64_t kernel_b = 0 ; kernel_b < kernel_dim_b; kernel_b++)
970+ {
971+ int64_t conv_b = b + kernel_b + kernel_b_offset;
972+
973+ // handle bounds: either clip or wrap
974+ if ((conv_b < 0 ) || (conv_b >= dim_b))
975+ {
976+ if (!periodic_b_)
977+ continue ;
978+
979+ // periodicity: assume the two edges have identical values, skip over the edge value on the opposite side
980+ while (conv_b < 0 )
981+ conv_b += (dim_b - 1 ); // move -1 to dim - 2
982+ while (conv_b >= dim_b)
983+ conv_b -= (dim_b - 1 ); // move dim to 1
984+ }
985+
986+ // this point is within bounds; add it in to the totals
987+ double kernel_value = kernel_values[kernel_a + kernel_b * kernel_dim_a] * coverage;
988+ double pixel_value = values_[conv_a + conv_b * dim_a];
989+
990+ // we keep a total of the kernel values that were within bounds, for this point
991+ kernel_total += kernel_value;
992+
993+ // and we keep a total of the convolution - kernel values times pixel values
994+ conv_total += kernel_value * pixel_value;
995+ }
996+ }
997+
998+ *(new_values_ptr++) = ((kernel_total > 0 ) ? (conv_total / kernel_total) : 0 );
999+ }
1000+ }
1001+ }
1002+
1003+ #if 0
1004+ // Check the values computed above by the optimized algorithm against the simple algorithm
1005+ // test_smooth2D.slim is a test file that can be used with this validation code
1006+ new_values_ptr = new_values;
1007+
1008+ if (!periodic_a_ && !periodic_b_)
1009+ {
1010+ for (int64_t b = 0; b < dim_b; ++b)
1011+ {
1012+ double coverage_b = (!periodic_b_ && ((b == 0) || (b == dim_b - 1))) ? 0.5 : 1.0;
1013+
1014+ for (int64_t a = 0; a < dim_a; ++a)
1015+ {
1016+ double coverage_a = (!periodic_a_ && ((a == 0) || (a == dim_a - 1))) ? 0.5 : 1.0;
1017+ double coverage = coverage_a * coverage_b; // handles partial coverage at the edges of the spatial map
1018+
1019+ // calculate the kernel's effect at point (a,b)
1020+ double kernel_total = 0.0;
1021+ double conv_total = 0.0;
1022+
1023+ for (int64_t kernel_a = 0; kernel_a < kernel_dim_a; kernel_a++)
1024+ {
1025+ int64_t conv_a = a + kernel_a + kernel_a_offset;
9321026
933- // we keep a total of the kernel values that were within bounds, for this point
934- kernel_total += kernel_value;
1027+ // handle bounds: either clip or wrap
1028+ if ((conv_a < 0) || (conv_a >= dim_a))
1029+ continue;
9351030
936- // and we keep a total of the convolution - kernel values times pixel values
937- conv_total += kernel_value * pixel_value;
1031+ for (int64_t kernel_b = 0; kernel_b < kernel_dim_b; kernel_b++)
1032+ {
1033+ int64_t conv_b = b + kernel_b + kernel_b_offset;
1034+
1035+ // handle bounds: either clip or wrap
1036+ if ((conv_b < 0) || (conv_b >= dim_b))
1037+ continue;
1038+
1039+ // this point is within bounds; add it in to the totals
1040+ double kernel_value = kernel_values[kernel_a + kernel_b * kernel_dim_a] * coverage;
1041+ double pixel_value = values_[conv_a + conv_b * dim_a];
1042+
1043+ // we keep a total of the kernel values that were within bounds, for this point
1044+ kernel_total += kernel_value;
1045+
1046+ // and we keep a total of the convolution - kernel values times pixel values
1047+ conv_total += kernel_value * pixel_value;
1048+ }
9381049 }
1050+
1051+ double check_value = ((kernel_total > 0) ? (conv_total / kernel_total) : 0);
1052+ double calculated_value = *(new_values_ptr++);
1053+
1054+ if (check_value != calculated_value)
1055+ std::cout << "Convolve_S2 optimization mismatch: " << check_value << " versus " << calculated_value << std::endl;
9391056 }
940-
941- *(new_values_ptr++) = ((kernel_total > 0 ) ? (conv_total / kernel_total) : 0 );
9421057 }
9431058 }
1059+ #endif
9441060
9451061 TakeOverMallocedValues (new_values, 2 , grid_size_); // takes new_values from us
9461062}
0 commit comments