Skip to content

Commit 75f92f7

Browse files
committed
[mathcore] Add test for non-evenly-dividing TKDTreeBinning
Add a regression test covering the non-evenly-dividing TKDTreeBinning case. This covers the bug reported in GitHub issue #10786. 🤖 Done with the help of AI.
1 parent 610ca8c commit 75f92f7

1 file changed

Lines changed: 63 additions & 0 deletions

File tree

math/mathcore/test/testkdTreeBinning.cxx

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,67 @@ int testkdTreeBinning()
165165
return nfail;
166166
}
167167

168+
// Regression test for the case where the requested number of bins does not
169+
// divide the data size evenly. In that situation the kd-tree builds more
170+
// terminal nodes (bins) than naively expected, and FindBin() must never return
171+
// an index outside [0, GetNBins()). See
172+
// https://github.com/root-project/root/issues/10784 about
173+
// TKDTreeBinning::FindBin returning non-existent bins. Returns the number of
174+
// detected failures (0 on success) so that the caller can turn it into a
175+
// non-zero process exit code: ROOT's Error() only prints, it does not by
176+
// itself make the test fail under ctest.
177+
int testkdTreeBinningFindBinRange()
178+
{
179+
180+
int nfail = 0;
181+
182+
const UInt_t DATASZ = 100500; // deliberately NOT a multiple of NBINS
183+
const UInt_t DATADIM = 5;
184+
const UInt_t NBINS = 1000;
185+
186+
std::vector<Double_t> smp(DATASZ * DATADIM);
187+
TRandom3 r;
188+
r.SetSeed(1);
189+
for (UInt_t i = 0; i < DATADIM; ++i)
190+
for (UInt_t j = 0; j < DATASZ; ++j)
191+
smp[DATASZ * i + j] = r.Uniform(-1., 1.);
192+
193+
TKDTreeBinning kdBins(DATASZ, DATADIM, smp, NBINS);
194+
195+
const UInt_t nbins = kdBins.GetNBins();
196+
197+
// The number of bins must match the number of terminal nodes of the kd-tree.
198+
if ((int)nbins != kdBins.GetTree()->GetNNodes() + 1) {
199+
Error("testkdTreeBinningFindBinRange", "GetNBins() (%u) != number of kd-tree terminal nodes (%d)", nbins,
200+
kdBins.GetTree()->GetNNodes() + 1);
201+
++nfail;
202+
}
203+
204+
// Every data point must be assigned to a valid bin.
205+
std::vector<Double_t> point(DATADIM);
206+
for (UInt_t j = 0; j < DATASZ; ++j) {
207+
for (UInt_t i = 0; i < DATADIM; ++i)
208+
point[i] = smp[DATASZ * i + j];
209+
UInt_t bin = kdBins.FindBin(point.data());
210+
if (bin >= nbins) {
211+
Error("testkdTreeBinningFindBinRange", "FindBin returned out-of-range bin %u (NBins = %u)", bin, nbins);
212+
++nfail;
213+
break;
214+
}
215+
}
216+
217+
// The total bin content must add up to the data size.
218+
Long64_t total = 0;
219+
for (UInt_t i = 0; i < nbins; ++i)
220+
total += kdBins.GetBinContent(i);
221+
if (total != (Long64_t)DATASZ) {
222+
Error("testkdTreeBinningFindBinRange", "Sum of bin contents (%lld) != data size (%u)", total, DATASZ);
223+
++nfail;
224+
}
225+
226+
return nfail;
227+
}
228+
168229
int main(int argc, char **argv)
169230
{
170231
// Parse command line arguments
@@ -193,6 +254,8 @@ int main(int argc, char **argv)
193254

194255
int nfail = testkdTreeBinning();
195256

257+
nfail += testkdTreeBinningFindBinRange();
258+
196259
if ( showGraphics )
197260
{
198261
theApp->Run();

0 commit comments

Comments
 (0)