Bdb packages | Design docs | Source docs | Guidelines | Recent releases

Search | Site Map .

Main Page   Modules   Namespace List   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

/BdbClusteringServer/BdbHintKernel.cc

Go to the documentation of this file.
00001 //------------------------------------------------------------------------------
00002 // File and Version Information:
00003 //      $Id: BdbHintKernel.cc,v 1.11 2002/08/30 01:13:31 ryd Exp $
00004 //
00005 // Description:
00006 //      The class is responsible for kernel clustering hint operations, like
00007 //          - distributing containers
00008 //          - creation / deletion of databases and containers
00009 //      It can do that using a server on in-process.
00010 //
00011 // Environment:
00012 //      Software developed for the BaBar Detector at the SLAC B-Factory
00013 //
00014 // Author List:
00015 //      Jacek Becla                Original Author
00016 //
00017 // Copyright Information:
00018 //      Copyright (C) 2000      Stanford Linear Accelerator Center
00019 //
00020 //------------------------------------------------------------------------------
00021 #include "BaBar/BaBar.hh"
00022 
00023 // Work around a bug in ACE/TAO with DEC CXX6.3
00024 #include <string.h>
00025 
00026 
00027 #include "BdbClusteringServer/BdbHintKernel.hh"
00028 
00029 
00030 #include "BdbApplication/BdbDebug.hh"
00031 #include "BdbApplication/BdbDomain.hh"
00032 #include "BdbAccess/BdbClusterConfigMgr.hh"
00033 #include "BdbAccess/BdbClustHintSetupMgr.hh"
00034 #include "BdbClusteringServer/BdbClSrvCRef.hh"
00035 #include "BbrStdUtils/CollectionUtils.hh"
00036 
00037 #include <unistd.h> // getpid
00038 
00039 #include <set>
00040 using std::set;
00041 
00042 BdbClustSrvModule::BdbClusteringServer_var  BdbHintKernel::_clustServerRef;
00043 BdbClustSrvModule::ClientDescr              BdbHintKernel::_myDescr;
00044 char*      BdbHintKernel::_clusterName          = 0;
00045 d_Boolean  BdbHintKernel::_orbIsInit            = d_False;
00046 const int  BdbHintKernel::_maxNoFailuresAllowed = 5;
00047 int        BdbHintKernel::_noFailures           = 0;
00048 int        BdbHintKernel::_fdid                 = 0;
00049 
00050 
00051 BdbHintKernel::BdbHintKernel()
00052 {}
00053 
00054 
00055 
00056 BdbHintKernel::~BdbHintKernel()
00057 {
00058     returnAllContainers();
00059     _orbIsInit = d_False;
00060 }
00061 
00062         
00063 
00064 BdbStatus
00065 BdbHintKernel::initORB(int argc, char** argv)
00066 {
00067     if ( _orbIsInit ) {
00068         return BdbcSuccess;
00069     }
00070 
00071     BdbClustHintSetupMgr* chsMgr = BdbClustHintSetupMgr::instance();
00072     const char* iorFile = chsMgr->iorFilePath();
00073     if ( 0 == iorFile ) {
00074         cerr << "Invalid iorFile, check clustering config file" << endl;
00075         ::abort();
00076     }
00077     
00078     char hN[64];
00079     gethostname(hN, 64); // this does not contain domain name
00080     struct hostent* hent1 = gethostbyname(hN);
00081     if ( 0 == hent1 ) {
00082         cerr << "Unable to determine host name" << endl;
00083         return BdbcError;
00084     }
00085     
00086     CORBA::Environment theEnv;
00087     CORBA::ORB_ptr     theOrb = CORBA::ORB_init(argc, argv, "hello", theEnv);
00088     if(theEnv.exception() != 0 ) {
00089         cerr << "CLIENT: theEnv = " << theEnv.exception() << endl;
00090         return BdbcError;
00091     }
00092 
00093     char theStr[1024];
00094     ifstream theIORFile(iorFile);
00095     if ( ! theIORFile ) {
00096         cerr << "Fatal error: cannot open IOR file" << endl;
00097         ::abort();
00098     }
00099     
00100     theIORFile >> theStr;
00101     theIORFile.close();
00102 
00103     CORBA::Object_var theObject;
00104     theObject = theOrb->string_to_object(theStr);
00105     if( CORBA::is_nil(theObject) ) {
00106         COUT1 << "FATAL ERROR: Clustering server not responding" << endl;
00107         ::abort();
00108     }
00109 
00110     // Narrow obtained object reference into an object reference
00111     // of a specific class.
00112     _clustServerRef = BdbClustSrvModule::BdbClusteringServer::_narrow(theObject);
00113     if( CORBA::is_nil(_clustServerRef.in()) ) {
00114         cerr << "CLIENT: Failed to narrow the object reference into BdbClServer." << endl;
00115         return BdbcError;
00116     }
00117 
00118     _myDescr.hostName = CORBA::string_dup(hent1->h_name);
00119     _myDescr.pid      = getpid() ;
00120 
00121     initFDID();
00122     int repeat = 60, serverFDID = 0;
00123     while(1) {        
00124         if ( _clustServerRef->ping(serverFDID, _myDescr) ) {
00125             break;
00126         }
00127         if ( repeat-- == 0 ) {
00128             COUT1 << "FATAL ERROR: clustering server cannot be reached" << endl;
00129             ::abort();
00130         }
00131         sleep(1);
00132         COUT1 << "ERROR: clustering server cannot be reached, retrying" << endl;
00133     }
00134 
00135     if ( serverFDID != _fdid ) {
00136         cerr << "FATAL ERROR: client fdid (" << _fdid << ") differs "
00137              << "from server fdid (" << serverFDID << ")" << endl;
00138         ::abort();
00139     }
00140 
00141     _orbIsInit = d_True;
00142     
00143     return BdbcSuccess;
00144 }
00145 
00146 
00147 BdbStatus
00148 BdbHintKernel::initFDID()
00149 {
00150     BdbApplicationOrDomain* app = BdbApplicationOrDomain::activeInstance();
00151     d_Boolean inTrans = ( BdbcNoOpen != app->mode() );
00152     if ( ! inTrans ) {
00153         if ( BdbcSuccess != app->startRead("getfdid") ) {
00154             return BdbcError;
00155         }
00156     }
00157     _fdid = app->fd().number();
00158     COUT1 << "_fdid init: " << _fdid << endl;
00159     if ( ! inTrans ) {
00160         app->commit("getfdid");
00161     }
00162 
00163     return BdbcSuccess;
00164 }
00165 
00166 
00167 BdbStatus
00168 BdbHintKernel::getOneContainer(const char* sdName,
00169                                char authLevel,
00170                                const char* authName,
00171                                const char* compName,
00172                                const char* streamName, 
00173                                ooRef(ooObj)& ref,
00174                                d_ULong& maxNoPages)
00175 {
00176     if ( ! _orbIsInit ) {
00177         cerr << "ORB not initialize, in-process not supported" << endl;
00178         return BdbcError;
00179     }
00180 
00181     if ( 0 == _clusterName ) {
00182         const char* c = BdbClusterConfigMgr::getClusterName();
00183         if ( 0 == c ) {
00184             c = "default";
00185         }
00186         _clusterName = new char [strlen(c)+1];
00187         strcpy(_clusterName, c);
00188     }
00189     
00190     if ( authLevel != 'g' && authLevel != 's' && authLevel != 'u' ) {
00191         cerr << "Incorrect auth level: " << authLevel << endl;
00192         return BdbcError;
00193     }
00194     
00195     BdbClustSrvModule::GroupDescr descr;
00196     descr.clusterName = CORBA::string_dup(_clusterName);
00197     descr.sdName      = CORBA::string_dup(sdName);
00198     descr.authLevel   = authLevel;
00199     descr.authName    = CORBA::string_dup(authName);
00200     descr.compName    = CORBA::string_dup(compName);
00201     if ( 0 != streamName ) {
00202         descr.streamName  = CORBA::string_dup(streamName);
00203     } else {
00204         descr.streamName  = CORBA::string_dup("NULL");
00205     }
00206     
00207     BdbClustSrvModule::RetStatus* ret;
00208     BdbClustSrvModule::OID oid;
00209     oid.db = oid.oc = oid.pg = oid.sl = oid.noPg = 0;
00210 
00211     COUT1 << "Requesting a container for: ";
00212     if ( 0 == _clusterName ) COUT1n << "NULL-"; else   COUT1n << _clusterName << "-";
00213     if ( 0 == sdName      )  COUT1n << "NULL-"; else   COUT1n << sdName       << "-";
00214     COUT1n << authLevel << "-";
00215     if ( 0 == authName    )  COUT1n << "NULL-"; else   COUT1n << authName     << "-";
00216     if ( 0 == compName    )  COUT1n << "NULL-"; else   COUT1n << compName     << "-";
00217     if ( 0 == streamName  )  COUT1n << "NULL" ; else   COUT1n << streamName;
00218     COUT1n << endl;
00219 
00220 
00221     long startedAt = time(0);    
00222  retry:
00223     ret = _clustServerRef->getOneContainer(descr, _myDescr, oid);
00224     if ( 0 == ret ) {
00225         cerr << "Returned RetStatus object = 0" << endl;
00226         return BdbcError;
00227     }
00228 
00229     if ( ret->level != BdbClustSrvModule::Success ) {
00230         ooRef(ooContObj) nullRef;
00231         ref = nullRef;
00232         if ( ret->level == BdbClustSrvModule::Warning ) {
00233             COUT1 << "Warning: " << ret->msg << endl;
00234             if ( ! strcmp(ret->msg, "wait") ) {
00235                 COUT1 << "Waiting " << oid.sl << " sec for precreation to finish" 
00236                       << ", precreation started " << oid.pg << " sec ago" << endl;
00237                 sleep(oid.sl);
00238                 
00239                 COUT1 << "Attempting to get container again" << endl;
00240                 delete ret;
00241                 if ( time(0) - startedAt > 3600 ) {
00242                     COUT1 << "Waited ~1 hour, giving up" << endl;
00243                     return BdbcError;
00244                 }
00245                 goto retry;
00246             }
00247         } else {
00248             cout << "Error: " << ret->msg << endl;
00249             cout << "Error returned by getOneContainer" << endl;
00250             if ( ++ _noFailures >= _maxNoFailuresAllowed ) {
00251                 cerr << "Fatal error: server keeps returning error" << endl;
00252                 ::abort();
00253             }
00254             return BdbcError;
00255         }
00256     }
00257     
00258     _noFailures = 0;
00259 
00260     delete ret;
00261     ret = 0;
00262     
00263     ref.set_DB(oid.db);
00264     ref.set_OC(oid.oc);
00265     ref.set_page(oid.pg);
00266     ref.set_slot(oid.sl);
00267     maxNoPages = oid.noPg;
00268     COUT1 << "Got:" << BdbPrintOID(ref) << ", maxNoPages = " << maxNoPages << endl;
00269     
00270     return BdbcSuccess;
00271 }
00272 
00273 
00274 
00275 BdbStatus
00276 BdbHintKernel::returnAllContainers()
00277 {
00278     if ( ! _orbIsInit ) {
00279         cerr << "ORB not initialize, nothing to return" << endl;
00280         return BdbcSuccess;
00281     }
00282 
00283 
00284     BdbClustSrvModule::RetStatus* ret;
00285     COUT1 << "Returning all containers" << endl;
00286     
00287     ret = _clustServerRef->returnAllContainers(_myDescr);
00288     if ( 0 == ret ) {
00289         cerr << "Returned RetStatus object = 0" << endl;
00290         return BdbcError;
00291     }
00292     
00293     if ( ret->level != BdbClustSrvModule::Success ) {
00294         if ( ret->level == BdbClustSrvModule::Warning ) {
00295             cout << "Warning: " << ret->msg << endl;
00296         } else {
00297             cout << "Error: " << ret->msg << endl;
00298             cout << "Error returned by returnAllContainers" << endl;
00299             return BdbcError;
00300         }
00301     }
00302     delete ret;
00303     ret = 0;
00304     
00305     return BdbcSuccess;
00306 }
00307 
00308 
00309 
00310 
00311 BdbStatus
00312 BdbHintKernel::returnContainers(set<BdbClSrvCRef*,babar::Collection::PtrLess>* s)
00313 {
00314     if ( ! _orbIsInit ) {
00315         cerr << "ORB not initialize, nothing to return" << endl;
00316         return BdbcSuccess;
00317     }
00318 
00319     // convert set to BdbClustSrvModule::OIDArray
00320     BdbClSrvCRef* cr;
00321     int i = 0, len = s->size();
00322     BdbClustSrvModule::OIDArray array(len);
00323     array.length(len);
00324     set<BdbClSrvCRef*,babar::Collection::PtrLess>::iterator iter=s->begin();
00325 
00326     while ( iter!=s->end() ) {
00327       cr=*iter;
00328         ooRef(ooContObj) r = cr->getContRef();
00329         BdbClustSrvModule::OID oid;
00330         oid.db   = r.get_DB();
00331         oid.oc   = r.get_OC();
00332         oid.pg   = r.get_page();
00333         oid.sl   = r.get_slot();
00334         oid.noPg = cr->get_nPage();
00335         array[i++] = oid;
00336         iter++;
00337     }
00338 
00339     BdbClustSrvModule::RetStatus* ret;
00340     COUT1 << "About to return some containers" << endl;
00341     
00342     ret = _clustServerRef->returnContainers(array, _myDescr);
00343 
00344     if ( 0 == ret ) {
00345         cerr << "Returned RetStatus object = 0" << endl;
00346         return BdbcError;
00347     }
00348     
00349     if ( ret->level != BdbClustSrvModule::Success ) {
00350         if ( ret->level == BdbClustSrvModule::Warning ) {
00351             cout << "Warning: " << ret->msg << endl;
00352         } else {
00353             cout << "Error: " << ret->msg << endl;
00354             cout << "Error returned by returnAllContainers" << endl;
00355             return BdbcError;
00356         }
00357     }
00358     delete ret;
00359     ret = 0;
00360     
00361     return BdbcSuccess;
00362 }
00363 
00364 
00365 void
00366 BdbHintKernel::overrideClusterName(const char* name)
00367 {
00368     if ( 0 != _clusterName ) {
00369         delete [] _clusterName;
00370         _clusterName = 0;
00371     }
00372     if ( 0 != name ) {
00373         _clusterName = new char [strlen(name)+1];
00374         strcpy(_clusterName, name);
00375     }    
00376 }

 


BaBar Public Site | SLAC | News | Links | Who's Who | Contact Us

Page Owner: Jacek Becla
Last Update: October 04, 2002