![]() |
|
|
Bdb packages | Design docs | Source docs | Guidelines | Recent releases |
|
Main Page Modules Namespace List Class Hierarchy Alphabetical List Compound List File List Compound Members File Members /BdbClusteringServer/BdbHintKernel.cc
Go to the documentation of this file.00001 //------------------------------------------------------------------------------ 00002 // File and Version Information: 00003 // $Id: BdbHintKernel.cc,v 1.11 2002/08/30 01:13:31 ryd Exp $ 00004 // 00005 // Description: 00006 // The class is responsible for kernel clustering hint operations, like 00007 // - distributing containers 00008 // - creation / deletion of databases and containers 00009 // It can do that using a server on in-process. 00010 // 00011 // Environment: 00012 // Software developed for the BaBar Detector at the SLAC B-Factory 00013 // 00014 // Author List: 00015 // Jacek Becla Original Author 00016 // 00017 // Copyright Information: 00018 // Copyright (C) 2000 Stanford Linear Accelerator Center 00019 // 00020 //------------------------------------------------------------------------------ 00021 #include "BaBar/BaBar.hh" 00022 00023 // Work around a bug in ACE/TAO with DEC CXX6.3 00024 #include <string.h> 00025 00026 00027 #include "BdbClusteringServer/BdbHintKernel.hh" 00028 00029 00030 #include "BdbApplication/BdbDebug.hh" 00031 #include "BdbApplication/BdbDomain.hh" 00032 #include "BdbAccess/BdbClusterConfigMgr.hh" 00033 #include "BdbAccess/BdbClustHintSetupMgr.hh" 00034 #include "BdbClusteringServer/BdbClSrvCRef.hh" 00035 #include "BbrStdUtils/CollectionUtils.hh" 00036 00037 #include <unistd.h> // getpid 00038 00039 #include <set> 00040 using std::set; 00041 00042 BdbClustSrvModule::BdbClusteringServer_var BdbHintKernel::_clustServerRef; 00043 BdbClustSrvModule::ClientDescr BdbHintKernel::_myDescr; 00044 char* BdbHintKernel::_clusterName = 0; 00045 d_Boolean BdbHintKernel::_orbIsInit = d_False; 00046 const int BdbHintKernel::_maxNoFailuresAllowed = 5; 00047 int BdbHintKernel::_noFailures = 0; 00048 int BdbHintKernel::_fdid = 0; 00049 00050 00051 BdbHintKernel::BdbHintKernel() 00052 {} 00053 00054 00055 00056 BdbHintKernel::~BdbHintKernel() 00057 { 00058 returnAllContainers(); 00059 _orbIsInit = d_False; 00060 } 00061 00062 00063 00064 BdbStatus 00065 BdbHintKernel::initORB(int argc, char** argv) 00066 { 00067 if ( _orbIsInit ) { 00068 return BdbcSuccess; 00069 } 00070 00071 BdbClustHintSetupMgr* chsMgr = BdbClustHintSetupMgr::instance(); 00072 const char* iorFile = chsMgr->iorFilePath(); 00073 if ( 0 == iorFile ) { 00074 cerr << "Invalid iorFile, check clustering config file" << endl; 00075 ::abort(); 00076 } 00077 00078 char hN[64]; 00079 gethostname(hN, 64); // this does not contain domain name 00080 struct hostent* hent1 = gethostbyname(hN); 00081 if ( 0 == hent1 ) { 00082 cerr << "Unable to determine host name" << endl; 00083 return BdbcError; 00084 } 00085 00086 CORBA::Environment theEnv; 00087 CORBA::ORB_ptr theOrb = CORBA::ORB_init(argc, argv, "hello", theEnv); 00088 if(theEnv.exception() != 0 ) { 00089 cerr << "CLIENT: theEnv = " << theEnv.exception() << endl; 00090 return BdbcError; 00091 } 00092 00093 char theStr[1024]; 00094 ifstream theIORFile(iorFile); 00095 if ( ! theIORFile ) { 00096 cerr << "Fatal error: cannot open IOR file" << endl; 00097 ::abort(); 00098 } 00099 00100 theIORFile >> theStr; 00101 theIORFile.close(); 00102 00103 CORBA::Object_var theObject; 00104 theObject = theOrb->string_to_object(theStr); 00105 if( CORBA::is_nil(theObject) ) { 00106 COUT1 << "FATAL ERROR: Clustering server not responding" << endl; 00107 ::abort(); 00108 } 00109 00110 // Narrow obtained object reference into an object reference 00111 // of a specific class. 00112 _clustServerRef = BdbClustSrvModule::BdbClusteringServer::_narrow(theObject); 00113 if( CORBA::is_nil(_clustServerRef.in()) ) { 00114 cerr << "CLIENT: Failed to narrow the object reference into BdbClServer." << endl; 00115 return BdbcError; 00116 } 00117 00118 _myDescr.hostName = CORBA::string_dup(hent1->h_name); 00119 _myDescr.pid = getpid() ; 00120 00121 initFDID(); 00122 int repeat = 60, serverFDID = 0; 00123 while(1) { 00124 if ( _clustServerRef->ping(serverFDID, _myDescr) ) { 00125 break; 00126 } 00127 if ( repeat-- == 0 ) { 00128 COUT1 << "FATAL ERROR: clustering server cannot be reached" << endl; 00129 ::abort(); 00130 } 00131 sleep(1); 00132 COUT1 << "ERROR: clustering server cannot be reached, retrying" << endl; 00133 } 00134 00135 if ( serverFDID != _fdid ) { 00136 cerr << "FATAL ERROR: client fdid (" << _fdid << ") differs " 00137 << "from server fdid (" << serverFDID << ")" << endl; 00138 ::abort(); 00139 } 00140 00141 _orbIsInit = d_True; 00142 00143 return BdbcSuccess; 00144 } 00145 00146 00147 BdbStatus 00148 BdbHintKernel::initFDID() 00149 { 00150 BdbApplicationOrDomain* app = BdbApplicationOrDomain::activeInstance(); 00151 d_Boolean inTrans = ( BdbcNoOpen != app->mode() ); 00152 if ( ! inTrans ) { 00153 if ( BdbcSuccess != app->startRead("getfdid") ) { 00154 return BdbcError; 00155 } 00156 } 00157 _fdid = app->fd().number(); 00158 COUT1 << "_fdid init: " << _fdid << endl; 00159 if ( ! inTrans ) { 00160 app->commit("getfdid"); 00161 } 00162 00163 return BdbcSuccess; 00164 } 00165 00166 00167 BdbStatus 00168 BdbHintKernel::getOneContainer(const char* sdName, 00169 char authLevel, 00170 const char* authName, 00171 const char* compName, 00172 const char* streamName, 00173 ooRef(ooObj)& ref, 00174 d_ULong& maxNoPages) 00175 { 00176 if ( ! _orbIsInit ) { 00177 cerr << "ORB not initialize, in-process not supported" << endl; 00178 return BdbcError; 00179 } 00180 00181 if ( 0 == _clusterName ) { 00182 const char* c = BdbClusterConfigMgr::getClusterName(); 00183 if ( 0 == c ) { 00184 c = "default"; 00185 } 00186 _clusterName = new char [strlen(c)+1]; 00187 strcpy(_clusterName, c); 00188 } 00189 00190 if ( authLevel != 'g' && authLevel != 's' && authLevel != 'u' ) { 00191 cerr << "Incorrect auth level: " << authLevel << endl; 00192 return BdbcError; 00193 } 00194 00195 BdbClustSrvModule::GroupDescr descr; 00196 descr.clusterName = CORBA::string_dup(_clusterName); 00197 descr.sdName = CORBA::string_dup(sdName); 00198 descr.authLevel = authLevel; 00199 descr.authName = CORBA::string_dup(authName); 00200 descr.compName = CORBA::string_dup(compName); 00201 if ( 0 != streamName ) { 00202 descr.streamName = CORBA::string_dup(streamName); 00203 } else { 00204 descr.streamName = CORBA::string_dup("NULL"); 00205 } 00206 00207 BdbClustSrvModule::RetStatus* ret; 00208 BdbClustSrvModule::OID oid; 00209 oid.db = oid.oc = oid.pg = oid.sl = oid.noPg = 0; 00210 00211 COUT1 << "Requesting a container for: "; 00212 if ( 0 == _clusterName ) COUT1n << "NULL-"; else COUT1n << _clusterName << "-"; 00213 if ( 0 == sdName ) COUT1n << "NULL-"; else COUT1n << sdName << "-"; 00214 COUT1n << authLevel << "-"; 00215 if ( 0 == authName ) COUT1n << "NULL-"; else COUT1n << authName << "-"; 00216 if ( 0 == compName ) COUT1n << "NULL-"; else COUT1n << compName << "-"; 00217 if ( 0 == streamName ) COUT1n << "NULL" ; else COUT1n << streamName; 00218 COUT1n << endl; 00219 00220 00221 long startedAt = time(0); 00222 retry: 00223 ret = _clustServerRef->getOneContainer(descr, _myDescr, oid); 00224 if ( 0 == ret ) { 00225 cerr << "Returned RetStatus object = 0" << endl; 00226 return BdbcError; 00227 } 00228 00229 if ( ret->level != BdbClustSrvModule::Success ) { 00230 ooRef(ooContObj) nullRef; 00231 ref = nullRef; 00232 if ( ret->level == BdbClustSrvModule::Warning ) { 00233 COUT1 << "Warning: " << ret->msg << endl; 00234 if ( ! strcmp(ret->msg, "wait") ) { 00235 COUT1 << "Waiting " << oid.sl << " sec for precreation to finish" 00236 << ", precreation started " << oid.pg << " sec ago" << endl; 00237 sleep(oid.sl); 00238 00239 COUT1 << "Attempting to get container again" << endl; 00240 delete ret; 00241 if ( time(0) - startedAt > 3600 ) { 00242 COUT1 << "Waited ~1 hour, giving up" << endl; 00243 return BdbcError; 00244 } 00245 goto retry; 00246 } 00247 } else { 00248 cout << "Error: " << ret->msg << endl; 00249 cout << "Error returned by getOneContainer" << endl; 00250 if ( ++ _noFailures >= _maxNoFailuresAllowed ) { 00251 cerr << "Fatal error: server keeps returning error" << endl; 00252 ::abort(); 00253 } 00254 return BdbcError; 00255 } 00256 } 00257 00258 _noFailures = 0; 00259 00260 delete ret; 00261 ret = 0; 00262 00263 ref.set_DB(oid.db); 00264 ref.set_OC(oid.oc); 00265 ref.set_page(oid.pg); 00266 ref.set_slot(oid.sl); 00267 maxNoPages = oid.noPg; 00268 COUT1 << "Got:" << BdbPrintOID(ref) << ", maxNoPages = " << maxNoPages << endl; 00269 00270 return BdbcSuccess; 00271 } 00272 00273 00274 00275 BdbStatus 00276 BdbHintKernel::returnAllContainers() 00277 { 00278 if ( ! _orbIsInit ) { 00279 cerr << "ORB not initialize, nothing to return" << endl; 00280 return BdbcSuccess; 00281 } 00282 00283 00284 BdbClustSrvModule::RetStatus* ret; 00285 COUT1 << "Returning all containers" << endl; 00286 00287 ret = _clustServerRef->returnAllContainers(_myDescr); 00288 if ( 0 == ret ) { 00289 cerr << "Returned RetStatus object = 0" << endl; 00290 return BdbcError; 00291 } 00292 00293 if ( ret->level != BdbClustSrvModule::Success ) { 00294 if ( ret->level == BdbClustSrvModule::Warning ) { 00295 cout << "Warning: " << ret->msg << endl; 00296 } else { 00297 cout << "Error: " << ret->msg << endl; 00298 cout << "Error returned by returnAllContainers" << endl; 00299 return BdbcError; 00300 } 00301 } 00302 delete ret; 00303 ret = 0; 00304 00305 return BdbcSuccess; 00306 } 00307 00308 00309 00310 00311 BdbStatus 00312 BdbHintKernel::returnContainers(set<BdbClSrvCRef*,babar::Collection::PtrLess>* s) 00313 { 00314 if ( ! _orbIsInit ) { 00315 cerr << "ORB not initialize, nothing to return" << endl; 00316 return BdbcSuccess; 00317 } 00318 00319 // convert set to BdbClustSrvModule::OIDArray 00320 BdbClSrvCRef* cr; 00321 int i = 0, len = s->size(); 00322 BdbClustSrvModule::OIDArray array(len); 00323 array.length(len); 00324 set<BdbClSrvCRef*,babar::Collection::PtrLess>::iterator iter=s->begin(); 00325 00326 while ( iter!=s->end() ) { 00327 cr=*iter; 00328 ooRef(ooContObj) r = cr->getContRef(); 00329 BdbClustSrvModule::OID oid; 00330 oid.db = r.get_DB(); 00331 oid.oc = r.get_OC(); 00332 oid.pg = r.get_page(); 00333 oid.sl = r.get_slot(); 00334 oid.noPg = cr->get_nPage(); 00335 array[i++] = oid; 00336 iter++; 00337 } 00338 00339 BdbClustSrvModule::RetStatus* ret; 00340 COUT1 << "About to return some containers" << endl; 00341 00342 ret = _clustServerRef->returnContainers(array, _myDescr); 00343 00344 if ( 0 == ret ) { 00345 cerr << "Returned RetStatus object = 0" << endl; 00346 return BdbcError; 00347 } 00348 00349 if ( ret->level != BdbClustSrvModule::Success ) { 00350 if ( ret->level == BdbClustSrvModule::Warning ) { 00351 cout << "Warning: " << ret->msg << endl; 00352 } else { 00353 cout << "Error: " << ret->msg << endl; 00354 cout << "Error returned by returnAllContainers" << endl; 00355 return BdbcError; 00356 } 00357 } 00358 delete ret; 00359 ret = 0; 00360 00361 return BdbcSuccess; 00362 } 00363 00364 00365 void 00366 BdbHintKernel::overrideClusterName(const char* name) 00367 { 00368 if ( 0 != _clusterName ) { 00369 delete [] _clusterName; 00370 _clusterName = 0; 00371 } 00372 if ( 0 != name ) { 00373 _clusterName = new char [strlen(name)+1]; 00374 strcpy(_clusterName, name); 00375 } 00376 }
BaBar Public Site | SLAC | News | Links | Who's Who | Contact Us
Page Owner: Jacek Becla
Last Update: October 04, 2002