NTupleController.cxx

Go to the documentation of this file.
00001 
00012 // for truncation warning in debug mode
00013 #ifdef _MSC_VER
00014 #include "msdevstudio/MSconfig.h"
00015 #endif
00016 
00017 #include "NTupleController.h"
00018 
00019 #include "CircularBuffer.h"
00020 #include "DataSourceController.h"
00021 #include "TupleCut.h"
00022 
00023 #include "pattern/string_convert.h"
00024 
00025 
00026 
00027 #include <algorithm>
00028 #include <stdexcept>
00029 #include <fstream>
00030 #include <utility>
00031 
00032 #include <cassert>
00033 
00034 using std::endl;
00035 using std::ofstream;
00036 using std::runtime_error;
00037 using std::string;
00038 using std::vector;
00039 
00040 using namespace hippodraw;
00041 
00042 NTupleController * NTupleController::s_instance = 0;
00043 
00044 NTupleController::NTupleController ()
00045 {
00046 }
00047 
00050 NTupleController::NTupleController ( const NTupleController & )
00051 {
00052   assert ( false );
00053 }
00054 
00055 NTupleController * NTupleController::instance ( )
00056 {
00057   if ( s_instance == 0 ) {
00058     s_instance = new NTupleController ( );
00059   }
00060   return s_instance;
00061 }
00062 
00063 string::size_type
00064 NTupleController::
00065 findWhite ( const std::string & line, unsigned int left, bool tabs_only )
00066 {
00067   string::size_type right = line.find( '\t', left );
00068 
00069   if( line.find( '\n', left ) < right ) right = line.find( '\n', left );
00070 
00071   if ( tabs_only == false ) { 
00072     if( line.find( ' ', left ) < right ) right = line.find( ' ', left );
00073   }
00074 
00075   return right;
00076 }
00077 
00078 void
00079 NTupleController::
00080 splitAndFill ( const std::string & line,
00081                std::vector < std::string > & values )
00082 {
00083   static string  white ( " \n\r\t" );
00084   values.clear ();
00085 
00086   string::size_type start = line.find_first_not_of ( white, 0 );
00087   while ( start != string::npos ) {
00088     string::size_type end = line.find_first_of ( white, start );
00089     if ( end != string::npos ) {
00090       values.push_back ( string ( line, start, end - start ) );
00091       start = line.find_first_not_of ( white, end );
00092     }
00093     else {
00094       values.push_back ( string ( line, start ) ); // to end
00095       break;
00096     }
00097   }
00098 }
00099 
00112 int
00113 NTupleController::
00114 readAsciiNTuple ( NTuple * ntuple, const std::string & filename )
00115 {
00116   // Two local flags to define if name or labels are missing
00117   bool hasTitle = true;
00118   bool hasLabel = true;
00119 
00120   string dir_name = filename;
00121 
00122   string::size_type pos = dir_name.find_last_of( '/' );
00123   if ( pos == string::npos ) {
00124     dir_name = ".";
00125   } else {
00126     dir_name.erase( pos );
00127   }
00128   dir_name += "/";
00129 
00130   std::ifstream infile( filename.c_str() );
00131   if( !infile.is_open() ) {
00132     string what ( "NTupleController: Could not open file\n`" );
00133     what += filename.c_str();
00134     what += "'\n";
00135     what += "Files doesn't exist or is not readable.";
00136     throw runtime_error ( what );
00137   }
00138   string line;
00139   if( !std::getline( infile, line ) ) {
00140     return -2;
00141   }
00142 
00143   // ignore spaces at the beginning of the line
00144   while( findWhite( line, 0, false ) == 0 ) line = line.substr( 1 );
00145   string::size_type firstTab = line.find( '\t', 0 );
00146 
00147   // fist line with tab, no name
00148   if ( firstTab != line.npos ) { 
00149     hasTitle = false;
00150     ntuple->setTitle ("<none>");
00151     string firstword = line.substr ( 0, firstTab );
00152     
00153     // first line are numbers, no labels
00154     if ( std::atof ( firstword.c_str() ) != 0.0 ) { 
00155       hasLabel = false;
00156     }
00157   }
00158   else {
00159     // first line is a single number, no name, no labels
00160     if ( std::atof ( line.c_str() ) != 0.0 ) {
00161       hasTitle = false;
00162       ntuple->setTitle("<none>");
00163       hasLabel = false;
00164     }
00165     // name exist
00166     else ntuple->setTitle ( line );
00167   }
00168 
00169   // second line missing
00170   if ( hasTitle ){
00171     if ( !std::getline( infile, line ) ) 
00172     return -2;
00173   }
00174 
00175   string::size_type size = line.size ();
00176   if ( line[size-1] == '\r' ) { // CR on Windows written file
00177     line.erase ( size-1 );
00178   }
00179   line += "\n";
00180   string::size_type right = findWhite( line, 0, true );
00181   string firstlabel = line.substr( 0, right );
00182   // current line start with number, no labels
00183   if ( std::atof ( firstlabel.c_str() ) != 0.0 ) hasLabel=false;
00184 
00185   vector< string > labels;
00186   string::size_type left = 0;
00187   // label index, used when labels in the file are missing
00188   int i=0; 
00189 
00190   while( right != line.npos ){
00191     if( right == left ){
00192       left++;
00193       right = findWhite( line, left, true );
00194       continue;
00195     }
00196 
00197     string label;
00198     if (!hasLabel) { 
00199       // make a label name when label name is missing 
00200       label="Column"+String::convert(i); 
00201       i++;
00202     }
00203     // read label name from the file
00204     else label = line.substr( left, right - left );
00205     labels.push_back ( label );
00206     left = right + 1;
00207     right = findWhite( line, left, true );
00208   }
00209   if ( labels.size () == 0 ) {
00210     return -2;
00211   }
00212 
00213   ntuple->setLabels ( labels );
00214 
00215   unsigned int columns = labels.size ();
00216   vector < double > vals ( columns );
00217 
00218   if ( hasLabel ) {
00219     std::getline ( infile, line );
00220   }
00221 
00222   do {
00223     // else use current line
00224     vector < string > values;
00225     splitAndFill ( line, values );
00226 
00227     if ( values.size() != columns ) {
00228       return -2;
00229     }
00230     for ( unsigned int i = 0; i < columns; i++ ) {
00231       vals[i] = atof ( values[i].c_str() );
00232     }
00233     ntuple -> addRow ( vals );
00234 
00235   } while ( std::getline ( infile, line ) );
00236 
00237   vector < unsigned int > shape ( 2 );
00238   shape[0] = ntuple -> rows();
00239   shape[1] = ntuple -> columns ();
00240   ntuple -> setShape ( shape );
00241 
00242   return 0;
00243 }
00244 
00245 DataSource * 
00246 NTupleController::
00247 createNTuple ( const std::string & filename )
00248 {
00249   unsigned int columns = 0;
00250   NTuple * nt = new NTuple ( columns );
00251   try {
00252     fillFromFile ( filename, nt );
00253   }
00254   catch ( const runtime_error & e ) {
00255     delete nt;
00256     throw e;
00257   }
00258 
00259   return nt;
00260 }
00261 
00262 void
00263 NTupleController::
00264 fillFromFile ( const std::string & filename, NTuple * nt )
00265 {
00266   int retval = readAsciiNTuple ( nt, filename );
00267 
00268   if ( retval < 0 ) {
00269     std::string what ( "NTupleController: File:\n `" );
00270     what += filename;
00271     what += "'\n ";
00272     if ( retval == -1 ) {
00273       what += "could not be found.";
00274     }
00275     else {
00276       what += "had bad format.";
00277     }
00278     throw std::runtime_error ( what );
00279   }
00280 
00281   DataSourceController * controller = DataSourceController::instance ();
00282   DataSource * ds = controller -> getDataSource ( filename );
00283 
00284   // Register the NTuple name if it's not in memory.
00285   // Alway register that it came from a file.
00286   nt -> setName ( filename ); // before registering.
00287   if ( ds == 0 ) controller -> registerNTuple ( filename, nt );
00288   controller -> registerDataSourceFile ( nt );
00289 }
00290 
00291 NTuple * 
00292 NTupleController::
00293 createNTuple ( const std::vector < std::string > & labels )
00294 {
00295   NTuple * nt = new NTuple ( labels );
00296   DataSourceController * controller = DataSourceController::instance ();
00297   controller -> registerNTuple ( nt );
00298 
00299   return nt;
00300 }
00301 
00302 CircularBuffer * 
00303 NTupleController::
00304 createCircularBuffer ( const std::vector < std::string > & labels )
00305 {
00306   CircularBuffer * nt = new CircularBuffer ( labels );
00307   DataSourceController * controller = DataSourceController::instance ();
00308   controller -> registerNTuple ( nt );
00309 
00310   return nt;
00311 }
00312 
00313 NTuple * 
00314 NTupleController::
00315 createNTuple ( unsigned int columns )
00316 {
00317   NTuple * nt = new NTuple ( columns );
00318   DataSourceController * controller = DataSourceController::instance ();
00319   controller -> registerNTuple ( nt );
00320 
00321   return nt;
00322 }
00323 
00324 CircularBuffer * 
00325 NTupleController::
00326 createCircularBuffer ( unsigned int columns )
00327 {
00328   CircularBuffer * nt = new CircularBuffer ( columns );
00329   DataSourceController * controller = DataSourceController::instance ();
00330   controller -> registerNTuple ( nt );
00331 
00332   return nt;
00333 }
00334 
00335 NTuple * 
00336 NTupleController::
00337 createNTuple ( )
00338 {
00339   NTuple * nt = new NTuple ();
00340   DataSourceController * controller = DataSourceController::instance ();
00341   controller -> registerNTuple ( nt );
00342 
00343   return nt;
00344 }
00345 
00346 CircularBuffer * 
00347 NTupleController::
00348 createCircularBuffer ( )
00349 {
00350   CircularBuffer * nt = new CircularBuffer ();
00351   DataSourceController * controller = DataSourceController::instance ();
00352   controller -> registerNTuple ( nt );
00353 
00354   return nt;
00355 }
00356 
00357 DataSource * 
00358 NTupleController::
00359 findDataSource ( const std::string & name ) const
00360 {
00361   DataSourceController * controller = DataSourceController::instance ();
00362 
00363   return controller -> findDataSource ( name );
00364 }
00365 
00366 void
00367 NTupleController::
00368 changeName ( DataSource * ntuple, const std::string & new_name )
00369 {
00370   ntuple -> setName ( new_name );
00371 }
00372 
00373 int 
00374 NTupleController::
00375 writeNTupleToFile ( const std::string & name,
00376                     const std::string & filename )
00377 {
00378   DataSourceController * controller = DataSourceController::instance ();
00379   DataSource * ntuple 
00380     = controller -> findDataSource ( name );
00381   // throws exception if not found
00382   if ( ntuple == 0 ) return -1;
00383 
00384   return writeNTupleToFile ( ntuple, filename );
00385 }
00386 
00388 int 
00389 NTupleController::
00390 writeNTupleToFile ( DataSource * ntuple, 
00391                     const std::string & filename )
00392 {
00393   ofstream file ( filename.c_str() );
00394   if ( file.is_open () == false ) {
00395     return 1;
00396   }
00397   file << ntuple->title() << endl;
00398 
00399   const vector < string > & labels = ntuple->getLabels ();
00400 #ifdef ITERATOR_MEMBER_DEFECT
00401   std::
00402 #endif
00403   vector < string > ::const_iterator first = labels.begin ();
00404   string label = *first++;
00405   file << label;
00406   while (  first != labels.end() ) {
00407     label = *first++;
00408     file << "\t" << label;
00409   }
00410   file << endl;
00411 
00412   unsigned int rows = ntuple->rows ();
00413   for ( unsigned int i = 0; i < rows; i++ ) {
00414     const vector < double > & row = ntuple->getRow ( i );
00415 
00416 #ifdef ITERATOR_MEMBER_DEFECT
00417     std::
00418 #endif
00419       vector < double > ::const_iterator first = row.begin();
00420     while ( first != row.end() ) {
00421       file << "\t" << *first++;
00422     }
00423     file << endl;
00424   }
00425 
00426   DataSourceController * controller = DataSourceController::instance ();
00427   controller -> registerDataSourceFile ( ntuple );
00428 
00429   return 0;
00430 }
00431 
00435 void
00436 NTupleController::
00437 saveNTuples ( const std::string & fileprefix, const std::string & filesuffix )
00438 {
00439   string::size_type pos = fileprefix.find_last_of ( '/' );
00440   const string path = fileprefix.substr ( 0, pos + 1 );
00441   const string basename = fileprefix.substr ( pos + 1 );
00442 
00443   DataSourceController * controller = DataSourceController::instance ();
00444   vector < DataSource * > tuples;
00445   controller->getDataSources ( tuples, false ); // only non files
00446   unsigned int  size = tuples.size();
00447 
00448   for ( unsigned int i = 0; i < size; i++ ) {
00449     DataSource * ntuple = tuples[i];
00450     const string & tuple_name = ntuple->getName ();
00451 
00452     string filename ( basename );
00453     filename += String::convert ( i );
00454     filename += filesuffix;
00455 
00456     string tuple_file ( path );
00457     tuple_file += filename;
00458 
00459     writeNTupleToFile ( tuple_name, tuple_file );
00460     controller -> changeName ( tuple_name, filename );
00461   }
00462 }
00463 
00464 string
00465 NTupleController::
00466 registerNTuple ( DataSource * ds )
00467 {
00468   DataSourceController * controller = DataSourceController::instance ();
00469   controller -> registerNTuple ( ds );
00470 
00471   return ds -> getName ();
00472 }
00473 
00474 void
00475 NTupleController::
00476 registerNTuple ( const std::string & key, DataSource * ntuple )
00477 {
00478   DataSourceController * controller = DataSourceController::instance ();
00479   controller -> registerNTuple ( key, ntuple );
00480 }
00481 
00482 
00483 NTuple *
00484 NTupleController::
00485 createNTuple( const std::vector < std::string > & column_list,
00486               const std::vector < const TupleCut * > & cut_list, 
00487               DataSource * ds )
00488 {
00489   if ( column_list.empty() ) return NULL;
00490 
00491   unsigned int columnNumber = column_list.size();
00492   unsigned int cutNumber = cut_list.size();
00493   unsigned int size = ds->rows();
00494 
00495   NTuple * nt = new NTuple( column_list );
00496  
00497   // Check all the rows.
00498   for ( unsigned int i = 0; i < size; i++ )
00499     {
00500       // If cut is not selected, default is accept.
00501       bool accept = true;
00502 
00503       // Check all the cuts.
00504       for ( unsigned int j = 0; j < cutNumber; j++ ) 
00505         {
00506           const TupleCut * tc = cut_list[j];
00507           accept = tc -> acceptRow ( ds, i );
00508           if (!accept) break;
00509         }
00510       
00511       // Add the row when all cuts accept the row.
00512       if (accept) {
00513         vector <double> row;
00514         for ( unsigned int k = 0; k < columnNumber; k++ ) 
00515           {
00516             row.push_back(ds->valueAtNoCache(i,ds->indexOf(column_list[k])));
00517           }
00518         nt->addRow(row);
00519       }
00520     }
00521 
00522   return nt;
00523 
00524 }
00525 
00526 int
00527 NTupleController::
00528 createNTupleToFile( const std::vector < std::string > & column_list,
00529                     const std::vector < const TupleCut * > & cut_list, 
00530                     DataSource * ds,
00531                     const std::string & filename,
00532                     const std::string & dsname)
00533 {
00534   if ( column_list.empty() ) return 1;
00535 
00536   ofstream file ( filename.c_str() );
00537   if ( file.is_open () == false ) {
00538     return 1;
00539   }
00540   // Check the column list and create indices for inner loop
00541   unsigned int columnNumber = column_list.size();
00542   vector < int > col_indices ( columnNumber );
00543   
00544   for ( unsigned int i = 0; i < columnNumber; i++ ) {
00545     const string & label = column_list [ i ];
00546     int index = ds -> indexOf ( label );
00547     if ( index < 0 ) { 
00548      ds -> throwIfInvalidLabel ( label );
00549     }
00550     col_indices [i] = index;
00551   }
00552 
00553   file << dsname << endl;
00554 
00555 #ifdef ITERATOR_MEMBER_DEFECT
00556   std::
00557 #endif
00558   vector < string > ::const_iterator first = column_list.begin ();
00559   string label = *first++;
00560   file << label;
00561   while (  first != column_list.end() ) {
00562     label = *first++;
00563     file << "\t" << label;
00564   }
00565   file << endl;
00566 
00567   unsigned int cutNumber = cut_list.size();
00568   unsigned int size = ds->rows();
00569 
00570    // Check all the rows.
00571   for ( unsigned int i = 0; i < size; i++ )
00572     {
00573       // If cut is not selected, default is accept.
00574       bool accept = true;
00575 
00576       // Check all the cuts.
00577       for ( unsigned int j = 0; j < cutNumber; j++ ) 
00578         {
00579           const TupleCut * tc = cut_list[j];
00580           accept = tc -> acceptRow ( ds, i );
00581           if (!accept) break;
00582         }
00583       
00584       // Add the row to the file when all cuts accept the row.
00585       if (accept) {
00586         
00587         for ( unsigned int k = 0; k < columnNumber; k++ ) 
00588           {
00589             int index = col_indices [ k ];
00590             file << "\t" << ds -> valueAtNoCache (i, index );       
00591           }
00592         file << endl;
00593       }
00594     }
00595   return 0; 
00596 }

Generated for HippoDraw Class Library by doxygen