forked from root-project/root
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrdataframe_misc.py
More file actions
150 lines (114 loc) · 4.96 KB
/
Copy pathrdataframe_misc.py
File metadata and controls
150 lines (114 loc) · 4.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import array
import os
import platform
import unittest
import numpy
import ROOT
class DatasetContext:
"""A helper class to create the dataset for the tutorial below."""
filenames = ["rdataframe_misc_1.root", "rdataframe_misc_2.root", "rdataframe_misc_3.root"]
treename = "dataset"
nentries = 5
def __init__(self):
for filename in self.filenames:
with ROOT.TFile(filename, "RECREATE") as f:
t = ROOT.TTree(self.treename, self.treename)
x = array.array("i", [0])
y = array.array("i", [0])
t.Branch("x", x, "x/I")
t.Branch("y", y, "y/I")
for i in range(1, self.nentries + 1):
x[0] = i
y[0] = 2 * i
t.Fill()
f.Write()
def __enter__(self):
"""Enable using the class as a context manager."""
return self
def __exit__(self, *_):
"""
Enable using the class as a context manager. At the end of the context,
remove the files created.
"""
for filename in self.filenames:
os.remove(filename)
class RDataFrameMisc(unittest.TestCase):
"""Miscellaneous RDataFrame tests"""
def test_empty_filenames(self):
"""
An empty list of filenames should be detected and the user should be informed
"""
# LLVM JIT fails to catch exceptions on Windows, so we disable their testing
if not platform.system() == "Windows":
# With implicit conversions, cppyy also needs to try dispatching to the various
# constructor overloads. The C++ exception will be thrown, but will be incapsulated
# in a more generic TypeError telling the user that none of the overloads worked
with self.assertRaisesRegex(TypeError, "RDataFrame: empty list of input files."):
ROOT.RDataFrame("events", [])
# When passing explicitly the vector of strings, type dispatching will not be necessary
# and the real C++ exception will immediately surface
with self.assertRaisesRegex(ROOT.std.invalid_argument, "RDataFrame: empty list of input files."):
ROOT.RDataFrame("events", ROOT.std.vector[ROOT.std.string]())
with self.assertRaisesRegex(TypeError, "RDataFrame: empty list of input files."):
ROOT.RDataFrame("events", ())
def _get_rdf(self, dataset):
chain = ROOT.TChain(dataset.treename)
for filename in dataset.filenames:
chain.Add(filename)
return ROOT.RDataFrame(chain)
def _get_chain(self, dataset):
chain = ROOT.TChain(dataset.treename)
for filename in dataset.filenames:
chain.Add(filename)
return chain
def _define_col(self, rdf):
return rdf.Define("z", "42")
def _filter_x(self, rdf):
return rdf.Filter("x > 2")
def _test_rdf_in_function(self, chain):
rdf = ROOT.RDataFrame(chain)
meanx = rdf.Mean("x")
meany = rdf.Mean("y")
self.assertLess(meanx.GetValue(), meany.GetValue())
def test_ttree_ownership(self):
"""
Regression tests for https://github.com/root-project/root/issues/17691
"""
# Issues on windows with contention on file deletion
if platform.system() == "Windows":
return
with DatasetContext() as dataset:
import numpy
rdf = self._get_rdf(dataset)
npy_dict = rdf.AsNumpy()
self.assertIsNone(numpy.testing.assert_array_equal(npy_dict["x"], numpy.array([1, 2, 3, 4, 5] * 3)))
self.assertIsNone(numpy.testing.assert_array_equal(npy_dict["y"], numpy.array([2, 4, 6, 8, 10] * 3)))
chain = self._get_chain(dataset)
rdf = ROOT.RDataFrame(chain)
self._test_rdf_in_function(chain)
rdf = self._define_col(rdf)
rdf = self._filter_x(rdf)
self.assertEqual(rdf.Count().GetValue(), 9)
def test_regression_gh_20291(self):
"""
Regression test for https://github.com/root-project/root/issues/20291
"""
# Issues on Windows with contention on file deletion
if platform.system() == "Windows":
return
out_path = "dataframe_misc_regression_gh20291.root"
try:
x, y = numpy.array([1, 2, 3], dtype='int64'), numpy.array([4, 5, 6], dtype='int64')
df = ROOT.RDF.FromNumpy({"x": x, "y": y})
df.Snapshot("tree", out_path)
df_out = ROOT.RDataFrame("tree", out_path)
count = df_out.Count()
take_x = df_out.Take["Long64_t"]("x")
take_y = df_out.Take["Long64_t"]("y")
self.assertEqual(count.GetValue(), 3)
self.assertSequenceEqual(take_x.GetValue(), [1, 2, 3])
self.assertSequenceEqual(take_y.GetValue(), [4, 5, 6])
finally:
os.remove(out_path)
if __name__ == "__main__":
unittest.main()