fix #550, add filter() function for running means and such

bhaller · bhaller · commit 3ccddf630ead · 2025-08-31T16:09:18.000-04:00
diff --git a/EidosScribe/EidosHelpFunctions.rtf b/EidosScribe/EidosHelpFunctions.rtf
@@ -915,7 +915,44 @@
 \f3\fs20  with a matrix or array argument, because the desired behavior in that case has not yet been implemented.\
 \pard\pardeftab720\li720\fi-446\ri720\sb180\sa60\partightenfactor0
 
-\f1\fs18 \cf0 \kerning1\expnd0\expndtw0 (+$)max(+\'a0x, ...)\
+\f1\fs18 \cf2 \kerning1\expnd0\expndtw0 (float)filter(numeric\'a0x, float\'a0filter)\
+\pard\pardeftab720\li547\ri720\sb60\sa60\partightenfactor0
+
+\f3\fs20 \cf2 Returns the result of convolving 
+\f1\fs18 x
+\f3\fs20  with 
+\f1\fs18 filter
+\f3\fs20 .  The returned vector will be the same length as 
+\f1\fs18 x
+\f3\fs20 .  The convolution is performed by centering 
+\f1\fs18 filter
+\f3\fs20  on each position of 
+\f1\fs18 x
+\f3\fs20  to produce a corresponding result element that is the sum over the products of each 
+\f1\fs18 filter
+\f3\fs20  value with each 
+\f1\fs18 x
+\f3\fs20  value within the filter\'92s range.  If the filter, centered over a given value of 
+\f1\fs18 x
+\f3\fs20 , extends beyond the end of 
+\f1\fs18 x
+\f3\fs20  the corresponding result element will be 
+\f1\fs18 NAN
+\f3\fs20  since its value is undefined.  The length of 
+\f1\fs18 filter
+\f3\fs20  is required to be odd, so that the filter has a central value (and can thus be centered over each value of 
+\f1\fs18 x
+\f3\fs20 ).\
+This function is useful for computing running means and similar transformations of an input vector,.  For a simple running mean of width 
+\f1\fs18 w
+\f3\fs20 , pass r
+\f1\fs18 ep(1/w, w)
+\f3\fs20  for 
+\f1\fs18 filter
+\f3\fs20 .\
+\pard\pardeftab720\li720\fi-446\ri720\sb180\sa60\partightenfactor0
+
+\f1\fs18 \cf0 (+$)max(+\'a0x, ...)\
 \pard\pardeftab720\li547\ri720\sb60\sa60\partightenfactor0
 
 \f3\fs20 \cf0 Returns the 
diff --git a/QtSLiM/help/EidosHelpFunctions.html b/QtSLiM/help/EidosHelpFunctions.html
@@ -118,6 +118,9 @@
 <p class="p5"><span class="s5">Returns the <b>sample Pearson’s correlation coefficient</b> between </span><span class="s6">x</span><span class="s5"> and </span><span class="s6">y</span><span class="s5">, usually denoted <i>r</i>.<span class="Apple-converted-space">  </span>The sizes of </span><span class="s6">x</span><span class="s5"> and </span><span class="s6">y</span><span class="s5"> must be identical.<span class="Apple-converted-space">  </span>If </span><span class="s6">x</span><span class="s5"> and </span><span class="s6">y</span><span class="s5"> have a size of </span><span class="s6">0</span><span class="s5"> or </span><span class="s6">1</span><span class="s5">, the return value will be </span><span class="s6">NULL</span><span class="s5">.<span class="Apple-converted-space">  </span>At present it is illegal to call </span><span class="s6">cor()</span><span class="s5"> with a matrix or array argument, because the desired behavior in that case has not yet been implemented.</span></p>
 <p class="p4"><span class="s5">(float$)cov(numeric x, numeric y)</span></p>
 <p class="p5"><span class="s5">Returns the <b>corrected sample covariance</b> between </span><span class="s6">x</span><span class="s5"> and </span><span class="s6">y</span><span class="s5">.<span class="Apple-converted-space">  </span>The sizes of </span><span class="s6">x</span><span class="s5"> and </span><span class="s6">y</span><span class="s5"> must be identical.<span class="Apple-converted-space">  </span>If </span><span class="s6">x</span><span class="s5"> and </span><span class="s6">y</span><span class="s5"> have a size of </span><span class="s6">0</span><span class="s5"> or </span><span class="s6">1</span><span class="s5">, the return value will be </span><span class="s6">NULL</span><span class="s5">.<span class="Apple-converted-space">  </span>At present it is illegal to call </span><span class="s6">cov()</span><span class="s5"> with a matrix or array argument, because the desired behavior in that case has not yet been implemented.</span></p>
+<p class="p4">(float)filter(numeric x, float filter)</p>
+<p class="p5">Returns the result of convolving <span class="s2">x</span> with <span class="s2">filter</span>.<span class="Apple-converted-space">  </span>The returned vector will be the same length as <span class="s2">x</span>.<span class="Apple-converted-space">  </span>The convolution is performed by centering <span class="s2">filter</span> on each position of <span class="s2">x</span> to produce a corresponding result element that is the sum over the products of each <span class="s2">filter</span> value with each <span class="s2">x</span> value within the filter’s range.<span class="Apple-converted-space">  </span>If the filter, centered over a given value of <span class="s2">x</span>, extends beyond the end of <span class="s2">x</span> the corresponding result element will be <span class="s2">NAN</span> since its value is undefined.<span class="Apple-converted-space">  </span>The length of <span class="s2">filter</span> is required to be odd, so that the filter has a central value (and can thus be centered over each value of <span class="s2">x</span>).</p>
+<p class="p5">This function is useful for computing running means and similar transformations of an input vector,.<span class="Apple-converted-space">  </span>For a simple running mean of width <span class="s2">w</span>, pass r<span class="s2">ep(1/w, w)</span> for <span class="s2">filter</span>.</p>
 <p class="p2">(+$)max(+ x, ...)</p>
 <p class="p3">Returns the <b>maximum</b> of <span class="s2">x</span> and the other arguments supplied: the single greatest value contained by all of them.<span class="Apple-converted-space">  </span>All of the arguments must be the same type as <span class="s2">x</span>, and the return type will match that of <span class="s2">x</span><span class="s3">.</span><span class="Apple-converted-space">  </span>If all of the arguments have a size of <span class="s2">0</span>, the return value will be <span class="s2">NULL</span>; note that this means that <span class="s2">max(x, max(y))</span> may produce an error, if <span class="s2">max(y)</span> is <span class="s2">NULL</span>, in cases where <span class="s2">max(x, y)</span> does not.</p>
 <p class="p2">(float$)mean(lif<span class="s1"> </span>x)</p>
diff --git a/VERSIONS b/VERSIONS
@@ -26,6 +26,7 @@ development head (in the master branch):
 	fix #534, support uniparentally-transmitted chromosomes in hermaphrodite populations
 	add new Plot legendTitleEntry() method for making a title line in a plot legend
 	click-drag in the SLiMgui script view's line number area now drag-selects whole lines
+	add filter() function to Eidos, which is essentially a subset of R's filter(): (float)filter(numeric x, float filter)
 
 
 version 5.0 (Eidos version 4.0):
diff --git a/eidos/eidos_functions.cpp b/eidos/eidos_functions.cpp
@@ -116,6 +116,7 @@ const std::vector<EidosFunctionSignature_CSP> &EidosInterpreter::BuiltInFunction
 		
 		signatures->emplace_back((EidosFunctionSignature *)(new EidosFunctionSignature("cor",				Eidos_ExecuteFunction_cor,			kEidosValueMaskFloat | kEidosValueMaskSingleton))->AddNumeric("x")->AddNumeric("y"));
 		signatures->emplace_back((EidosFunctionSignature *)(new EidosFunctionSignature("cov",				Eidos_ExecuteFunction_cov,			kEidosValueMaskFloat | kEidosValueMaskSingleton))->AddNumeric("x")->AddNumeric("y"));
+		signatures->emplace_back((EidosFunctionSignature *)(new EidosFunctionSignature("filter",			Eidos_ExecuteFunction_filter,		kEidosValueMaskFloat))->AddNumeric("x")->AddFloat("filter"));
 		signatures->emplace_back((EidosFunctionSignature *)(new EidosFunctionSignature("max",				Eidos_ExecuteFunction_max,			kEidosValueMaskAnyBase | kEidosValueMaskSingleton))->AddAnyBase("x")->AddEllipsis());
 		signatures->emplace_back((EidosFunctionSignature *)(new EidosFunctionSignature("mean",				Eidos_ExecuteFunction_mean,			kEidosValueMaskFloat | kEidosValueMaskSingleton))->AddLogicalEquiv("x"));
 		signatures->emplace_back((EidosFunctionSignature *)(new EidosFunctionSignature("min",				Eidos_ExecuteFunction_min,			kEidosValueMaskAnyBase | kEidosValueMaskSingleton))->AddAnyBase("x")->AddEllipsis());
diff --git a/eidos/eidos_functions.h b/eidos/eidos_functions.h
@@ -84,6 +84,7 @@ EidosValue_SP Eidos_ExecuteFunction_trunc(const std::vector<EidosValue_SP> &p_ar
 //	statistics functions
 EidosValue_SP Eidos_ExecuteFunction_cor(const std::vector<EidosValue_SP> &p_arguments, EidosInterpreter &p_interpreter);
 EidosValue_SP Eidos_ExecuteFunction_cov(const std::vector<EidosValue_SP> &p_arguments, EidosInterpreter &p_interpreter);
+EidosValue_SP Eidos_ExecuteFunction_filter(const std::vector<EidosValue_SP> &p_arguments, EidosInterpreter &p_interpreter);
 EidosValue_SP Eidos_ExecuteFunction_max(const std::vector<EidosValue_SP> &p_arguments, EidosInterpreter &p_interpreter);
 EidosValue_SP Eidos_ExecuteFunction_mean(const std::vector<EidosValue_SP> &p_arguments, EidosInterpreter &p_interpreter);
 EidosValue_SP Eidos_ExecuteFunction_min(const std::vector<EidosValue_SP> &p_arguments, EidosInterpreter &p_interpreter);
diff --git a/eidos/eidos_functions_stats.cpp b/eidos/eidos_functions_stats.cpp
@@ -140,6 +140,141 @@ EidosValue_SP Eidos_ExecuteFunction_cov(const std::vector<EidosValue_SP> &p_argu
 	return result_SP;
 }
 
+//	(float)filter(numeric x, float filter)
+EidosValue_SP Eidos_ExecuteFunction_filter(const std::vector<EidosValue_SP> &p_arguments, __attribute__((unused)) EidosInterpreter &p_interpreter)
+{
+	// this is patterned after the R function filter(), but only for method="convolution", sides=2, circular=F
+	// so for now we support only a centered filter convolved over x; values outside x are assumed to be NAN
+	
+	EidosValue *x_value = p_arguments[0].get();
+	EidosValue *filter_value = p_arguments[1].get();
+	int x_count = x_value->Count();
+	int filter_count = filter_value->Count();
+	
+	// the maximum filter length is arbitrary, but seems like a good idea to flag weird bugs?
+	if ((filter_count <= 0) | (filter_count > 999) | (filter_count % 2 == 0))
+		EIDOS_TERMINATION << "ERROR (Eidos_ExecuteFunction_filter): function filter() requires filter to have a length that is odd and within the interval [1, 999]." << EidosTerminate(nullptr);
+	
+	// half rounded down; e.g., for a filter of length 5, this is 2; this is the number of NANs at the
+	// start/end of the result, since the filter extends past the end of x for this many positions
+	int half_filter = filter_count / 2;
+	
+	// the result is the same length as x, in all cases
+	EidosValue_Float *float_result = (new (gEidosValuePool->AllocateChunk()) EidosValue_Float())->resize_no_initialize(x_count);
+	EidosValue_SP result_SP(float_result);
+	double *result_data = float_result->FloatData_Mutable();
+	
+	if (x_count == 0)
+		return result_SP;
+	
+	if (x_count < filter_count)
+	{
+		for (int pos = 0; pos < x_count; ++pos)
+			result_data[pos] = std::numeric_limits<double>::quiet_NaN();
+	}
+	
+	// get x data and filter data
+	const double *filter_data = filter_value->FloatData();
+	
+	// we test here for a simple moving average, with equal weights summing to 1.0, to special-case it
+	bool is_simple_moving_average = true;
+	
+	for (int index = 0; index < filter_count; ++index)
+	{
+		if (std::abs(filter_data[index] - (1.0 / filter_count)) > 1e-15)	// 1e-15 is a roundoff epsilon
+		{
+			is_simple_moving_average = false;
+			break;
+		}
+	}
+	
+	// the half-filter length at the start fills with NAN
+	for (int pos = 0; pos < half_filter; ++pos)
+		result_data[pos] = std::numeric_limits<double>::quiet_NaN();
+	
+	// now we branch depending on whether x is integer or float
+	if (x_value->Type() == EidosValueType::kValueFloat)
+	{
+		const double *x_data = x_value->FloatData();
+		
+		if (is_simple_moving_average)
+		{
+			// the first position after the half-filter length sets up a moving total
+			double moving_total = 0.0;
+			
+			for (int pos = 0; pos < filter_count; ++pos)
+				moving_total += x_data[pos];
+			
+			result_data[half_filter] = moving_total / filter_count;
+			
+			// the remaining non-NAN positions modify the moving total
+			for (int pos = half_filter + 1; pos < x_count - half_filter; ++pos)
+			{
+				moving_total -= x_data[pos - half_filter - 1];
+				moving_total += x_data[pos + half_filter];
+				
+				result_data[pos] = moving_total / filter_count;
+			}
+		}
+		else
+		{
+			// we compute the filter over the appropriate range of x at each position
+			for (int pos = half_filter; pos < x_count - half_filter; ++pos)
+			{
+				double filter_total = 0.0;
+				
+				for (int filter_pos = 0; filter_pos < filter_count; filter_pos++)
+					filter_total += filter_data[filter_pos] * x_data[filter_pos + pos - half_filter];
+				
+				result_data[pos] = filter_total;
+			}
+		}
+	}
+	else
+	{
+		const int64_t *x_data = x_value->IntData();
+		
+		if (is_simple_moving_average)
+		{
+			// the first position after the half-filter length sets up a moving total
+			double moving_total = 0.0;
+			
+			for (int pos = 0; pos < filter_count; ++pos)
+				moving_total += x_data[pos];
+			
+			result_data[half_filter] = moving_total / filter_count;
+			
+			// the remaining non-NAN positions modify the moving total
+			for (int pos = half_filter + 1; pos < x_count - half_filter; ++pos)
+			{
+				moving_total -= x_data[pos - half_filter - 1];
+				moving_total += x_data[pos + half_filter];
+				
+				result_data[pos] = moving_total / filter_count;
+			}
+		}
+		else
+		{
+			// we compute the filter over the appropriate range of x at each position
+			for (int pos = half_filter; pos < x_count - half_filter; ++pos)
+			{
+				double filter_total = 0.0;
+				
+				for (int filter_pos = 0; filter_pos < filter_count; filter_pos++)
+					filter_total += filter_data[filter_pos] * x_data[filter_pos + pos - half_filter];
+				
+				result_data[pos] = filter_total;
+			}
+		}
+	}
+	
+	// the half-filter length at the end fills with NAN
+	for (int pos = x_count - half_filter; pos < x_count; ++pos)
+		result_data[pos] = std::numeric_limits<double>::quiet_NaN();
+	
+	return result_SP;
+}
+
 //	(+$)max(+ x, ...)
 EidosValue_SP Eidos_ExecuteFunction_max(const std::vector<EidosValue_SP> &p_arguments, __attribute__((unused)) EidosInterpreter &p_interpreter)
 {
diff --git a/eidos/eidos_test_functions_statistics.cpp b/eidos/eidos_test_functions_statistics.cpp
@@ -78,6 +78,26 @@ void _RunFunctionStatisticsTests_a_through_p(void)
 	EidosAssertScriptSuccess_NULL("cov(float(0), float(0));");
 	EidosAssertScriptRaise("cov(string(0), string(0));", 0, "cannot be type");
 	
+	// filter()
+	EidosAssertScriptRaise("filter(1.0:10, float(0));", 0, "within the interval [1,");
+	EidosAssertScriptRaise("filter(1.0:10, 1.0:2);", 0, "length that is odd");
+	EidosAssertScriptSuccess_L("x = runif(100); identical(x, filter(x, 1.0));", true);
+	EidosAssertScriptSuccess_L("x = runif(100); identical(x * 2.0, filter(x, 2.0));", true);
+	EidosAssertScriptSuccess_L("x = runif(100); identical(x * -2.5, filter(x, -2.5));", true);
+	EidosAssertScriptSuccess_L("x = rep(NAN, 10); identical(x, filter(x, 1.0));", true);
+	EidosAssertScriptSuccess_FV("filter(1.0:10, rep(1/3, 3));", {std::numeric_limits<double>::quiet_NaN(), 2, 3, 4, 5, 6, 7, 8, 9, std::numeric_limits<double>::quiet_NaN()});
+	EidosAssertScriptSuccess_FV("filter(1.0:10, rep(1/5, 5));", {std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), 3, 4, 5, 6, 7, 8, std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN()});
+	EidosAssertScriptSuccess_FV("filter(1.0:10, rep(1.0, 5));", {std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), 15, 20, 25, 30, 35, 40, std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN()});
+	
+	EidosAssertScriptRaise("filter(1:10, float(0));", 0, "within the interval [1,");
+	EidosAssertScriptRaise("filter(1:10, 1.0:2);", 0, "length that is odd");
+	EidosAssertScriptSuccess_L("x = rdunif(100, -100, 100); identical(x * 1.0, filter(x, 1.0));", true);
+	EidosAssertScriptSuccess_L("x = rdunif(100, -100, 100); identical(x * 2.0, filter(x, 2.0));", true);
+	EidosAssertScriptSuccess_L("x = rdunif(100); identical(x * -2.5, filter(x, -2.5));", true);
+	EidosAssertScriptSuccess_FV("filter(1:10, rep(1/3, 3));", {std::numeric_limits<double>::quiet_NaN(), 2, 3, 4, 5, 6, 7, 8, 9, std::numeric_limits<double>::quiet_NaN()});
+	EidosAssertScriptSuccess_FV("filter(1:10, rep(1/5, 5));", {std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), 3, 4, 5, 6, 7, 8, std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN()});
+	EidosAssertScriptSuccess_FV("filter(1:10, rep(1.0, 5));", {std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(), 15, 20, 25, 30, 35, 40, std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN()});
+	
 	// max()
 	EidosAssertScriptSuccess_L("max(T);", true);
 	EidosAssertScriptSuccess_I("max(3);", 3);