#include <GlastClassify.h>
Inheritance diagram for GlastClassify:

Public Types | |
| enum | Subset { ALL, ODD, EVEN } |
Public Member Functions | |
| GlastClassify (const std::string &info_path, bool mixed=true) | |
| ctor | |
| virtual | ~GlastClassify () |
| void | run () |
| operate on the | |
Static Public Member Functions | |
| void | setPaths (std::string rootpath, std::string treepath) |
Static Public Attributes | |
| std::string | s_rootpath |
| path to the root files | |
| std::string | s_treepath |
| path to tree data, input and output | |
| bool | s_train |
| true if in training mode, false only test | |
| int | s_boost = 0 |
| number of boost cycles | |
| int | s_events = 0 |
| int | s_normalize = 100 |
| if non-zero, normalize signal and background to this | |
| Subset | s_train_sample = ODD |
| subset to train on | |
| Subset | s_test_sample = EVEN |
| subset to test with | |
Protected Member Functions | |
| virtual bool | isgood () |
| subclasses may implement this to define the good, or signal events default value is controlled by setbkgnd(v), v=true for background, false for signal | |
| virtual void | define (std::vector< std::string > &) |
| subclass may override | |
| virtual bool | accept () |
| acceptance cut applied to events in training sample: subclass may override | |
| int | find_index (const std::string &name) |
| return index of the variable in the training list (exception if not found) | |
| int | add_index (const std::string &name) |
| add an entry to the list for making cuts to define sample to train on | |
| float | datum (int index) const |
| access to values in the current event | |
| void | setbkgnd (bool v=false) |
Private Member Functions | |
| void | load (unsigned int max_events, Subset set) |
| void | load (TrainingInfo::StringList input, unsigned int max_events, Subset set, bool good=true) |
| void | classify () |
| void | test () |
| void | boost (Classifier &classify) |
| void | current_time (std::ostream &out=std::cout) |
| std::ostream & | log () |
Private Attributes | |
| std::string | m_title |
| std::ostream * | m_log |
| Classifier::Table | m_data |
| TrainingInfo | m_info |
| const std::vector< float > * | m_row |
| current row in interating thru tuple | |
| bool | m_nobkgnd |
| std::vector< std::string > | m_all_names |
| bool | m_mixed |
| true in file has mixed good/bad: uses isGood() in this case | |
| DecisionTree * | m_dtree |
| the tree to create | |
| DecisionTree * | m_filter |
| may be used to select events | |
| unsigned | m_total_good |
| unsigned | m_total_bad |
Definition at line 15 of file GlastClassify.h.
|
|
Definition at line 19 of file GlastClassify.h. Referenced by ParseOptions::parse_subset().
|
|
||||||||||||
|
ctor
Definition at line 54 of file classify/GlastClassify.cxx. References log(), m_all_names, m_filter, m_info, m_log, m_mixed, m_nobkgnd, m_title, m_total_bad, m_total_good, s_rootpath, s_train, and s_treepath.
00055 : m_info(s_treepath+"/"+info_path, s_rootpath) 00056 , m_all_names(m_info.vars()) 00057 , m_mixed(mixed) 00058 , m_filter(0) 00059 , m_total_good(0), m_total_bad(0) 00060 { 00061 s_current= this; // for communication of pointer during setup 00062 00063 std::ofstream* logfile = new std::ofstream(m_info.log().c_str(), std::ofstream::app); 00064 if( ! logfile->is_open() ) { 00065 throw std::runtime_error("log file "+ m_info.log() + " did not open"); 00066 } 00067 m_nobkgnd = true; 00068 m_log = logfile; 00069 std::ifstream titlefile((m_info.filepath()+"/title.txt").c_str()); 00070 titlefile >> m_title; 00071 log() << "=======================================================================\n" 00072 <<"Starting "<<(s_train? "classification and testing ":"testing only of " )<< m_title << std::endl; 00073 std::cout << "Starting "<<(s_train? "classification and testing ":"testing of " )<< m_title << std::endl; 00074 00075 std::string filterfilename(m_info.filepath()+"/filter.txt"); 00076 std::ifstream filterstream(filterfilename.c_str()); 00077 00078 if( filterstream.is_open() ){ 00079 filterstream.close(); 00080 // filter file exists: set it up 00081 00082 m_filter = new DecisionTree("filter"); 00083 Filter createfilter(m_all_names, *m_filter); 00084 createfilter.addCutsFrom(filterfilename); // do it 00085 log() << "Using filter: " << std::endl; 00086 createfilter.print(log()); 00087 } 00088 00089 } |
|
|
Definition at line 24 of file GlastClassify.h.
00024 {}
|
|
|
acceptance cut applied to events in training sample: subclass may override
Reimplemented in ClassifyCore, ClassifyEnergy, and ClassifyGamma. Definition at line 56 of file GlastClassify.h. Referenced by load().
00056 {return true;}
|
|
|
add an entry to the list for making cuts to define sample to train on
Definition at line 111 of file classify/GlastClassify.cxx. References m_all_names. Referenced by find_index().
00112 {
00113 m_all_names.push_back(name);
00114 return m_all_names.size()-1;
00115 }
|
|
|
Definition at line 320 of file classify/GlastClassify.cxx. References classify(), log(), m_data, m_dtree, m_info, and s_boost. Referenced by classify().
00321 {
00322 // set up the booster, and create the first boosted tree
00323 //
00324 AdaBoost booster(m_data, 0.5); //TODO adaBeta);
00325 double boostwt = booster(classify); //weight of initial tree, boost training sample
00326 m_dtree = classify.createTree(m_info.title(), boostwt);
00327
00328 for (int itree = 1; itree < s_boost+1; ++itree) {
00329 if( itree%5==1 ){ // checkpoint every 5, starting with single tree
00330 //TODO
00331 #if 0
00332 saveTree();
00333 double resolution = evaluate(testing_sample) ;
00334 std::cout << "Test resolution: " << resolution << std::endl;
00335 log() << "Test resolution: " << resolution << std::endl;
00336 #endif
00337 }
00338 log() << "Making boosted tree #"
00339 << itree << ", current weight " << boostwt
00340 << ", nodes: "<<Classifier::Node::s_nodes
00341 << std::endl;
00342 std::cout << "Making boosted tree #"
00343 << itree << ", current weight " << boostwt
00344 << ", nodes: " << Classifier::Node::s_nodes
00345 << std::endl;
00346 if ( boostwt < 1.001) {
00347 log() << "quitting boost: weight too small to continue" << std::endl;
00348 break;
00349 }
00350 // create new Classifier object with reweighted data from booster
00351 Classifier classify(booster.data());
00352 classify.makeTree(); // and make a new tree using it
00353
00354 // get the weight from the booster, which also reweights data for next cycle
00355 boostwt = booster(classify);
00356
00357 // generate the decision tree from the current classification, append it
00358 std::auto_ptr<DecisionTree> boostedtree( classify.createTree(m_info.title(),boostwt));
00359 m_dtree->addTree(&*boostedtree); //(how to pass a pointer to an auto_ptr object)
00360
00361 }
00362 }
|
|
|
Definition at line 210 of file classify/GlastClassify.cxx. References boost(), log(), m_data, m_dtree, m_info, and s_boost. Referenced by boost(), and run().
00211 {
00212
00213 // create the tree from the data
00214 Classifier ctree(m_data, m_info.vars());
00215 std::set<std::string> varset(m_info.vars().begin(), m_info.vars().end());
00216
00217 log() << "Variables used for trainging:\n\t";
00218 std::copy( varset.begin(), varset.end(),
00219 std::ostream_iterator<std::string>(log(), "\n\t"));
00220 log() << std::endl;
00221
00222 ctree.makeTree();
00223
00224 // summary stuff at the top of the file
00225 log() << "Number of nodes in the tree: " << Classifier::Node::s_nodes << std::endl;
00226
00227 // insert list of used variables in the log, and to a file
00228 ctree.printVariables(log());
00229 log() << "======================================\n";
00230 std::string usedvarfilename(m_info.filepath()+"/used_variables.txt");
00231 std::ofstream usedvarsfile(usedvarfilename.c_str());
00232 ctree.printVariables(usedvarsfile);
00233 log()<<" writing to file " << usedvarfilename << std::endl;
00234
00235
00236 BackgroundVsEfficiency plot(ctree);
00237 log() << "\nFigure of merit sigma: " << plot.sigma() << std::endl;
00238
00239 std::string plotfilename(m_info.filepath()+"/train_efficiency.txt");
00240 log()<<" writing to file " << plotfilename << std::endl;
00241
00242 std::ofstream plotfile(plotfilename.c_str());
00243 plot.print(plotfile, m_info.title());
00244
00245 #ifdef VERBOSE // generates a lot of output, need a special option
00246 // print the node list, and the variables used
00247 tree.printTree(log());
00248 #endif
00249
00250 if( s_boost==0) {
00251
00252 // single tree, no boosting
00253 m_dtree = ctree.createTree(m_info.title());
00254
00255 }else{
00256 boost(ctree);
00257 BackgroundVsEfficiency plot(ctree);
00258
00259 // redo, and overwrite, the plot
00260 log() << "\nFigure of merit after boosting: " << plot.sigma() << std::endl;
00261 std::string plotfilename(m_info.filepath()+"/train_efficiency.txt");
00262 log()<<" writing to file " << plotfilename << std::endl;
00263
00264 std::ofstream plotfile(plotfilename.c_str());
00265 plot.print(plotfile);
00266
00267 }
00268 std::string dtfilename(m_info.filepath()+"/dtree.txt");
00269 std::ofstream dtfile(dtfilename.c_str());
00270 log()<<" writing tree to file " << dtfilename << std::endl;
00271 m_dtree->print(dtfile);
00272 }
|
|
|
Definition at line 288 of file classify/GlastClassify.cxx. Referenced by run().
00289 {
00290 static bool first=true;
00291 static time_t start;
00292 if(first){ first=false; ::time(&start);}
00293 time_t aclock;
00294 ::time( &aclock );
00295 char tbuf[25]; ::strncpy(tbuf, asctime( localtime( &aclock ) ),24);
00296 tbuf[24]=0;
00297 out<< "Current time: " << tbuf
00298 << " ( "<< ::difftime( aclock, start) <<" s elapsed)" << std::endl;
00299 }
|
|
|
access to values in the current event
Definition at line 65 of file GlastClassify.h. Referenced by GlastClassify::Entry::operator double().
00065 {return (*m_row)[index];};
|
|
|
subclass may override
Definition at line 53 of file GlastClassify.h.
00053 {};
|
|
|
return index of the variable in the training list (exception if not found)
Definition at line 274 of file classify/GlastClassify.cxx. References add_index(), and m_info.
|
|
|
subclasses may implement this to define the good, or signal events default value is controlled by setbkgnd(v), v=true for background, false for signal
Reimplemented in ClassifyCal, ClassifyCore, ClassifyEnergy, and ClassifyVertex. Definition at line 50 of file GlastClassify.h. References m_nobkgnd. Referenced by load().
00050 {return !m_nobkgnd;};
|
|
||||||||||||||||||||
|
Definition at line 147 of file classify/GlastClassify.cxx. References accept(), ALL, isgood(), log(), m_all_names, m_data, m_filter, m_mixed, m_row, m_total_bad, and m_total_good.
00149 {
00150 std::cout << "loading " << input.size() << " files: \n\t" ;
00151 std::copy(input.begin(), input.end(), std::ostream_iterator<std::string>(std::cout, "\n\t"));
00152 std::cout << std::endl;
00153
00154 log() << "Processing file(s)\n\t";
00155 std::copy(input.begin(), input.end(), std::ostream_iterator<std::string>(log(), "\n\t"));
00156
00157 RootTuple t(input, "MeritTuple");
00158 t.selectColumns(m_all_names, false); // not weighted
00159 int good=0, bad=0, rejected=0, nan=0, filtered=0;
00160 Classifier::Record::setup();
00161 log() << "\tsize = " << t.size() << std::endl;
00162 int nvars = m_all_names.size();
00163
00164 RootTuple::Iterator rit = t.begin();
00165 if( set==EVEN) ++rit; // skip the first if EVEN
00166 //if( set==RANDOM && m_rand->shoot()>0.5) ++rit; // skip first
00167
00168 for( ; rit!=t.end(); ++rit)
00169 {
00170 if (max_events>0 && rit > max_events) break ; // max
00171 try {
00172 m_row = &*rit;
00173 } catch( std::runtime_error err){
00174 ++nan;
00175 ++rejected;
00176 continue;
00177 }
00178
00179 // If there is a filter, invoke it
00180 bool filter_reject = m_filter!=0 && ( (*m_filter)(*m_row)==0 );
00181 if ( filter_reject ) ++filtered;
00182
00183 // invoke virtual accepance function to select subset to train with
00184 if(! filter_reject && accept() ) {
00185 // copy to local
00186 bool signal = m_mixed ? isgood() : isSignal;
00187 if (signal) ++good; else ++bad;
00188 // copy the data to the Classifier's table for the classification
00189 m_data.push_back( Classifier::Record(signal, m_row->begin(), m_row->begin()+nvars));
00190 } else {
00191 ++rejected;
00192 }
00193 // if doing alternate or random, skip the next record here.
00194 if( set !=ALL ){ // || set==RANDOM && m_rand->shoot()>0.5
00195 ++rit; if( rit >= t.end() )break;
00196 }
00197 }
00198 log() << "\tgood, bad, rejected records: " << good << ", " << bad <<", " << rejected ;
00199 if( filtered>0) {log() << " (by the filter: "<< filtered<< ")";}
00200 log() << std::endl;
00201
00202 if( nan>0) log() << "\tWARNING: found "<< nan << " events with non-finite values " << std::endl;
00203 log() << "Loaded " << (good+bad) << " records"<< std::endl;
00204 m_total_good += good;
00205 m_total_bad += bad;
00206
00207 }
|
|
||||||||||||
|
Definition at line 117 of file classify/GlastClassify.cxx. References ALL, log(), m_data, m_info, m_mixed, m_total_bad, m_total_good, and s_normalize. Referenced by run().
00118 {
00119 log() << "Loading ";
00120 switch (set){
00121 case ALL: log() << "all"; break;
00122 case EVEN: log() << "even"; break;
00123 case ODD : log() << "odd"; break;
00124 // case RANDOM : log() << "random"; break;
00125 }
00126 log() << " events." << std::endl;
00127
00128 m_data.clear();
00129 m_total_good= m_total_bad=0;
00130
00131 if(m_mixed){
00132 // signal and background are mixed in one batch of files
00133 load(m_info.signalFiles(), max_events, set);
00134 }else {
00135 // separate signal, background
00136 load(m_info.signalFiles(), max_events, set, true);
00137 load(m_info.backgroundFiles(),max_events, set, false);
00138 }
00139 log() << "Event totals: good "<< m_total_good << ", bad " << m_total_bad << std::endl;
00140
00141 if( s_normalize>0) {
00142 log() << "Normalizing signal, background to be equal to "<< s_normalize << std::endl;
00143 m_data.normalize(s_normalize, s_normalize);
00144
00145 }
00146 }
|
|
|
Definition at line 95 of file GlastClassify.h. References m_log. Referenced by boost(), classify(), GlastClassify(), load(), run(), and test().
00095 {return * m_log;}
|
|
|
operate on the
Definition at line 91 of file classify/GlastClassify.cxx. References classify(), current_time(), load(), log(), s_events, s_test_sample, s_train, s_train_sample, and test().
00092 {
00093 current_time();
00094 current_time(log());
00095
00096 // run the train or test (or both)?
00097
00098 if( s_train) {
00099 load(s_events, s_train_sample);
00100 classify();
00101 }
00102 log() << "========================== testing ============================\n";
00103 std::cout << "start testing ...\n";
00104 load( s_events, s_test_sample);
00105 test();
00106
00107 current_time();
00108 current_time(log());
00109 }
|
|
|
Definition at line 67 of file GlastClassify.h. References m_nobkgnd.
00067 { m_nobkgnd=v;};
|
|
||||||||||||
|
Definition at line 31 of file GlastClassify.h. References s_rootpath, and s_treepath. Referenced by main().
00031 {
00032 s_rootpath = rootpath+"/";
00033 s_treepath = treepath+"/";
00034 }
|
|
|
Definition at line 301 of file classify/GlastClassify.cxx. References log(), m_data, and m_info. Referenced by run().
00302 {
00303 // create the tree from by reading the dtree file
00304 std::ifstream treefile((m_info.filepath()+"/dtree.txt").c_str());
00305 DecisionTree& dtree = *new DecisionTree(treefile);
00306 log() << "======================================\n";
00307 BackgroundVsEfficiency plot(dtree, m_data);
00308 log() << "\nFigure of merit sigma: " << plot.sigma() << std::endl;
00309
00310 std::string plotfilename(m_info.filepath()+"/test_efficiency.txt");
00311 log()<<" writing to file " << plotfilename << std::endl;
00312
00313 std::ofstream plotfile(plotfilename.c_str());
00314
00315 // a table of the background for a given efficiency
00316 plot.print(plotfile);
00317
00318 }
|
|
|
Definition at line 100 of file GlastClassify.h. Referenced by add_index(), GlastClassify(), and load(). |
|
|
Definition at line 96 of file GlastClassify.h. Referenced by boost(), classify(), load(), and test(). |
|
|
the tree to create
Definition at line 103 of file GlastClassify.h. Referenced by boost(), and classify(). |
|
|
may be used to select events
Definition at line 104 of file GlastClassify.h. Referenced by GlastClassify(), and load(). |
|
|
Definition at line 97 of file GlastClassify.h. Referenced by boost(), classify(), find_index(), GlastClassify(), load(), and test(). |
|
|
Definition at line 94 of file GlastClassify.h. Referenced by GlastClassify(), and log(). |
|
|
true in file has mixed good/bad: uses isGood() in this case
Definition at line 101 of file GlastClassify.h. Referenced by GlastClassify(), and load(). |
|
|
Definition at line 99 of file GlastClassify.h. Referenced by GlastClassify(), isgood(), and setbkgnd(). |
|
|
current row in interating thru tuple
Definition at line 98 of file GlastClassify.h. Referenced by load(). |
|
|
Definition at line 93 of file GlastClassify.h. Referenced by GlastClassify(). |
|
|
Definition at line 106 of file GlastClassify.h. Referenced by GlastClassify(), and load(). |
|
|
Definition at line 106 of file GlastClassify.h. Referenced by GlastClassify(), and load(). |
|
|
number of boost cycles
Definition at line 35 of file classify/GlastClassify.cxx. Referenced by boost(), classify(), and main(). |
|
|
Definition at line 36 of file classify/GlastClassify.cxx. |
|
|
if non-zero, normalize signal and background to this
Definition at line 37 of file classify/GlastClassify.cxx. |
|
|
path to the root files
Definition at line 30 of file classify/GlastClassify.cxx. Referenced by GlastClassify(), and setPaths(). |
|
|
subset to test with
Definition at line 40 of file classify/GlastClassify.cxx. |
|
|
true if in training mode, false only test
Referenced by GlastClassify(), main(), and run(). |
|
|
subset to train on
Definition at line 39 of file classify/GlastClassify.cxx. |
|
|
path to tree data, input and output
Definition at line 32 of file classify/GlastClassify.cxx. Referenced by GlastClassify(), and setPaths(). |
1.3.3