@@ -73,6 +73,9 @@ TEST_CASE("Filter_ACRMS")
7373 Kahan 560.00 ms
7474 GPU average 511.00 ms
7575 GPU edge detect 401.00 ms
76+ GPU initial RMS 174.79 ms
77+ Preallocate output 172.80 ms
78+ GPU cycle-by-cycle output 10.84 ms
7679 */
7780
7881 const size_t depth = 50000000 ;
@@ -124,14 +127,17 @@ TEST_CASE("Filter_ACRMS")
124127 REQUIRE (fabs (gpurms - 0.353553 ) < epsilon);
125128
126129 // Verify the cycle-by-cycle results
127- // TODO: why do we occasionally get spikes of larger deltas?
128- const float epsilon2 = 0.05 ;
130+ // TODO: why do we occasionally get spikes of larger deltas? smaller epsilon should be achievable
131+ const float epsilon2 = 0.025 ;
129132 SparseAnalogWaveform& gpucycles = *dynamic_cast <SparseAnalogWaveform*>(filter->GetData (0 ));
133+ gpucycles.PrepareForCpuAccess ();
130134 REQUIRE (cycles.size () == gpucycles.size ());
131135 for (size_t i=0 ; i<gpucycles.size (); i++)
132136 {
133137 int64_t doff = cycles.m_offsets [i] == gpucycles.m_offsets [i];
138+ int64_t ddur = cycles.m_durations [i] == gpucycles.m_durations [i];
134139 REQUIRE (llabs (doff) <= 1 );
140+ REQUIRE (llabs (ddur) <= 1 );
135141
136142 float delta = cycles.m_samples [i] - gpucycles.m_samples [i];
137143 if (fabs (delta) >= epsilon2)
@@ -178,7 +184,6 @@ float ReferenceImplementation(UniformAnalogWaveform* wfm, SparseAnalogWaveform&
178184 float rms = sqrt (temp / length);
179185
180186 // Auto-threshold analog signals at average of the full scale range
181- temp = 0 ;
182187 vector<int64_t > edges;
183188 Filter::FindZeroCrossings (wfm, average, edges);
184189 cycles.clear ();
@@ -193,6 +198,7 @@ float ReferenceImplementation(UniformAnalogWaveform* wfm, SparseAnalogWaveform&
193198 int64_t j = 0 ;
194199
195200 // Simply sum the squares of all values in a cycle after subtracting the DC value
201+ temp = 0 ;
196202 for (j = start; (j <= end) && (j < (int64_t )length); j++)
197203 temp += ((wfm->m_samples [j] - average) * (wfm->m_samples [j] - average));
198204
0 commit comments