Skip to content

Commit f357c28

Browse files
committed
more debug functions for generator
1 parent 3bb3eb5 commit f357c28

4 files changed

Lines changed: 22 additions & 16 deletions

File tree

compiled/interface/trainDataGenerator.h

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ class trainDataGenerator{
114114
*/
115115
trainData<T> getBatch(); //if no threading batch index can be given? just for future?
116116

117-
bool debug;
117+
int debuglevel;
118118

119119
#ifdef DJC_DATASTRUCTURE_PYTHON_BINDINGS
120120
void setFileListP(boost::python::list files){
@@ -155,7 +155,7 @@ class trainDataGenerator{
155155

156156

157157
template<class T>
158-
trainDataGenerator<T>::trainDataGenerator() :debug(false),
158+
trainDataGenerator<T>::trainDataGenerator() :debuglevel(0),
159159
randomcount_(1), batchsize_(2),sqelementslimit_(false),skiplargebatches_(true), readthread_(0), filecount_(0), nbatches_(
160160
0), npossiblebatches_(0), ntotal_(0), nsamplesprocessed_(0),lastbatchsize_(0),filetimeout_(10),
161161
batchcount_(0){
@@ -208,7 +208,11 @@ void trainDataGenerator<T>::readBuffer(){
208208
while(ntries < filetimeout_){
209209
if(io::fileExists(nextread_)){
210210
try{
211+
if(debuglevel>0)
212+
std::cout << "reading file " << nextread_ << std::endl;
211213
buffer_read.readFromFile(nextread_);
214+
if(debuglevel>0)
215+
std::cout << "reading file " << nextread_ << " done"<< std::endl;
212216
return;
213217
}
214218
catch(std::exception & e){ //if there are data glitches we don't want the whole training fail immediately
@@ -244,15 +248,15 @@ void trainDataGenerator<T>::readInfo(){
244248
}
245249
if(hasRagged){
246250
std::vector<int64_t> rowsplits = td.readShapesAndRowSplitsFromFile(f, firstfile);//check consistency only for first
247-
if(debug)
248-
std::cout << "rowsplits.size() " <<rowsplits.size() << ": "<<f << std::endl; //DEBUG
251+
if(debuglevel>1)
252+
std::cout << "rowsplits.size() " <<rowsplits.size() << ": "<<f << std::endl; //debuglevel
249253
orig_rowsplits_.push_back(rowsplits);
250254
}
251255
firstfile=false;
252256
ntotal_ += td.nElements();
253257
}
254-
if(debug)
255-
std::cout << "total elements "<< ntotal_ <<std::endl;
258+
if(debuglevel>0)
259+
std::cout << "trainDataGenerator<T>::readInfo: total elements "<< ntotal_ <<std::endl;
256260
batchcount_=0;
257261
prepareSplitting();
258262
}
@@ -276,7 +280,7 @@ void trainDataGenerator<T>::prepareSplitting(){
276280
break;
277281
}
278282
}
279-
if(debug){
283+
if(debuglevel>1){
280284
std::cout << "trainDataGenerator<T>::prepareSplitting: splits" <<std::endl;
281285
for(const auto& s: splits_)
282286
std::cout << s << ", ";
@@ -297,7 +301,7 @@ void trainDataGenerator<T>::prepareSplitting(){
297301
}
298302
}
299303

300-
if(debug){
304+
if(debuglevel>1){
301305
std::cout << "all (first 100) row splits " << allrs.size() << std::endl;
302306
int counter =0;
303307
for(const auto& s: allrs){
@@ -319,7 +323,7 @@ void trainDataGenerator<T>::prepareSplitting(){
319323
}
320324

321325

322-
if(debug){
326+
if(debuglevel>1){
323327
size_t nprint = splits_.size();
324328
if(nprint>200)nprint=200;
325329
for(size_t i=0;i< nprint;i++){
@@ -436,7 +440,7 @@ trainData<T> trainDataGenerator<T>::prepareBatch(){
436440
if(usebatch_.size())
437441
usebatch = usebatch_.at(batchcount_);
438442

439-
if(debug)
443+
if(debuglevel>2)
440444
std::cout << "expect_batchelements "<<expect_batchelements << " vs " << bufferelements <<" bufferelements" << std::endl;
441445

442446
while(bufferelements<expect_batchelements){
@@ -450,7 +454,7 @@ trainData<T> trainDataGenerator<T>::prepareBatch(){
450454
buffer_read.clear();
451455
bufferelements = buffer_store.nElements();
452456

453-
if(debug)
457+
if(debuglevel>2)
454458
std::cout << "nprocessed " << nsamplesprocessed_ << " file " << filecount_ << " in buffer " << bufferelements
455459
<< " file read " << nextread_ << " totalfiles " << orig_infiles_.size()
456460
<< " total events "<< ntotal_<< std::endl;
@@ -462,7 +466,7 @@ trainData<T> trainDataGenerator<T>::prepareBatch(){
462466

463467
nextread_ = orig_infiles_.at(shuffle_indices_.at(filecount_));
464468

465-
if(debug)
469+
if(debuglevel>0)
466470
std::cout << "start new read on file "<< nextread_ <<std::endl;
467471

468472
filecount_++;
@@ -477,13 +481,13 @@ trainData<T> trainDataGenerator<T>::prepareBatch(){
477481
// return prepareBatch();
478482
}
479483

480-
if(debug)
484+
if(debuglevel>2)
481485
std::cout << "providing batch " << nsamplesprocessed_ << "-" << nsamplesprocessed_+expect_batchelements <<
482486
" elements in buffer before: " << bufferelements <<
483487
"\nsplitting at " << expect_batchelements << " use this batch "<< usebatch
484488
<< " total elements " << thisbatch.nTotalElements() << " elements left in buffer " << buffer_store.nElements()<< std::endl;
485489

486-
if(debug){
490+
if(debuglevel>3){
487491
int dbpcount=0;
488492
for(const auto& s: buffer_store.featureArray(0).rowsplits()){
489493
std::cout << s << ", ";

compiled/src/c_trainDataGenerator.C

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ BOOST_PYTHON_MODULE(c_trainDataGenerator) {
5454

5555
.def("getNTotal", &trainDataGenerator<float>::getNTotal)
5656

57-
.def_readwrite("debug", &trainDataGenerator<float>::debug);
57+
.def_readwrite("debuglevel", &trainDataGenerator<float>::debuglevel);
5858
;
5959
}
6060

testing/CI/testGenerator_splitting.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@
7979
expected_here = expected_elmts[b]
8080

8181
gen = trainDataGenerator()
82-
gen.debug=True
82+
gen.debuglevel=100
8383
#gen.setSquaredElementsLimit(True)
8484
gen.setBatchSize(batchsize)
8585
print('batchsize',batchsize)

training/training_base.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,8 @@ def trainModel(self,
457457
max_queue_size=1, #handled by DJC
458458
validation_freq=1,
459459
use_multiprocessing=False, #the threading one doe not loke DJC
460+
shuffle=False,
461+
workers=0,#run gen on main thread
460462
**trainargs)
461463
self.trainedepoches += 1
462464
self.train_data.generator.shuffleFilelist()

0 commit comments

Comments
 (0)