// instance.cpp : Global state variables and functions. // /** * Copyright (C) 2008 10gen Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ #include "pch.h" #include "db.h" #include "introspect.h" #include "repl.h" #include "dbmessage.h" #include "instance.h" #include "lasterror.h" #include "security.h" #include "json.h" #include "replutil.h" #include "../s/d_logic.h" #include "../util/file_allocator.h" #include "../util/goodies.h" #include "cmdline.h" #if !defined(_WIN32) #include #endif #include "stats/counters.h" #include "background.h" #include "dur_journal.h" #include "dur_recover.h" #include "ops/update.h" #include "ops/delete.h" #include "ops/query.h" namespace mongo { inline void opread(Message& m) { if( _diaglog.level & 2 ) _diaglog.readop((char *) m.singleData(), m.header()->len); } inline void opwrite(Message& m) { if( _diaglog.level & 1 ) _diaglog.write((char *) m.singleData(), m.header()->len); } void receivedKillCursors(Message& m); void receivedUpdate(Message& m, CurOp& op); void receivedDelete(Message& m, CurOp& op); void receivedInsert(Message& m, CurOp& op); bool receivedGetMore(DbResponse& dbresponse, Message& m, CurOp& curop ); int nloggedsome = 0; #define LOGSOME if( ++nloggedsome < 1000 || nloggedsome % 100 == 0 ) string dbExecCommand; DiagLog _diaglog; bool useCursors = true; bool useHints = true; void flushDiagLog() { if( _diaglog.f && _diaglog.f->is_open() ) { log() << "flushing diag log" << endl; _diaglog.flush(); } } KillCurrentOp killCurrentOp; int lockFile = 0; #ifdef _WIN32 HANDLE lockFileHandle; #endif // see FSyncCommand: extern bool lockedForWriting; void inProgCmd( Message &m, DbResponse &dbresponse ) { BSONObjBuilder b; if( ! cc().isAdmin() ) { BSONObjBuilder b; b.append("err", "unauthorized"); } else { DbMessage d(m); QueryMessage q(d); bool all = q.query["$all"].trueValue(); vector vals; { Client& me = cc(); scoped_lock bl(Client::clientsMutex); for( set::iterator i = Client::clients.begin(); i != Client::clients.end(); i++ ) { Client *c = *i; assert( c ); CurOp* co = c->curop(); if ( c == &me && !co ) { continue; } assert( co ); if( all || co->active() ) vals.push_back( co->infoNoauth() ); } } b.append("inprog", vals); unsigned x = lockedForWriting; if( x ) { b.append("fsyncLock", x); b.append("info", "use db.fsyncUnlock() to terminate the fsync write/snapshot lock"); } } replyToQuery(0, m, dbresponse, b.obj()); } void killOp( Message &m, DbResponse &dbresponse ) { BSONObj obj; if( ! cc().isAdmin() ) { obj = fromjson("{\"err\":\"unauthorized\"}"); } /*else if( !dbMutexInfo.isLocked() ) obj = fromjson("{\"info\":\"no op in progress/not locked\"}"); */ else { DbMessage d(m); QueryMessage q(d); BSONElement e = q.query.getField("op"); if( !e.isNumber() ) { obj = fromjson("{\"err\":\"no op number field specified?\"}"); } else { log() << "going to kill op: " << e << endl; obj = fromjson("{\"info\":\"attempting to kill op\"}"); killCurrentOp.kill( (unsigned) e.number() ); } } replyToQuery(0, m, dbresponse, obj); } void unlockFsyncAndWait(); void unlockFsync(const char *ns, Message& m, DbResponse &dbresponse) { BSONObj obj; if ( ! cc().isAdmin() ) { // checks auth obj = fromjson("{\"err\":\"unauthorized\"}"); } else if (strncmp(ns, "admin.", 6) != 0 ) { obj = fromjson("{\"err\":\"unauthorized - this command must be run against the admin DB\"}"); } else { if( lockedForWriting ) { log() << "command: unlock requested" << endl; obj = fromjson("{ok:1,\"info\":\"unlock completed\"}"); unlockFsyncAndWait(); } else { obj = fromjson("{ok:0,\"errmsg\":\"not locked\"}"); } } replyToQuery(0, m, dbresponse, obj); } static bool receivedQuery(Client& c, DbResponse& dbresponse, Message& m ) { bool ok = true; MSGID responseTo = m.header()->id; DbMessage d(m); QueryMessage q(d); auto_ptr< Message > resp( new Message() ); CurOp& op = *(c.curop()); try { dbresponse.exhaust = runQuery(m, q, op, *resp); assert( !resp->empty() ); } catch ( AssertionException& e ) { ok = false; op.debug().exceptionInfo = e.getInfo(); LOGSOME { log() << "assertion " << e.toString() << " ns:" << q.ns << " query:" << (q.query.valid() ? q.query.toString() : "query object is corrupt") << endl; if( q.ntoskip || q.ntoreturn ) log() << " ntoskip:" << q.ntoskip << " ntoreturn:" << q.ntoreturn << endl; } BSONObjBuilder err; e.getInfo().append( err ); BSONObj errObj = err.done(); BufBuilder b; b.skip(sizeof(QueryResult)); b.appendBuf((void*) errObj.objdata(), errObj.objsize()); // todo: call replyToQuery() from here instead of this!!! see dbmessage.h QueryResult * msgdata = (QueryResult *) b.buf(); b.decouple(); QueryResult *qr = msgdata; qr->_resultFlags() = ResultFlag_ErrSet; if ( e.getCode() == StaleConfigInContextCode ) qr->_resultFlags() |= ResultFlag_ShardConfigStale; qr->len = b.len(); qr->setOperation(opReply); qr->cursorId = 0; qr->startingFrom = 0; qr->nReturned = 1; resp.reset( new Message() ); resp->setData( msgdata, true ); } op.debug().responseLength = resp->header()->dataLen(); dbresponse.response = resp.release(); dbresponse.responseTo = responseTo; return ok; } void (*reportEventToSystem)(const char *msg) = 0; void mongoAbort(const char *msg) { if( reportEventToSystem ) reportEventToSystem(msg); rawOut(msg); ::abort(); } // Returns false when request includes 'end' void assembleResponse( Message &m, DbResponse &dbresponse, const HostAndPort& remote ) { // before we lock... int op = m.operation(); bool isCommand = false; const char *ns = m.singleData()->_data + 4; if ( op == dbQuery ) { if( strstr(ns, ".$cmd") ) { isCommand = true; opwrite(m); if( strstr(ns, ".$cmd.sys.") ) { if( strstr(ns, "$cmd.sys.inprog") ) { inProgCmd(m, dbresponse); return; } if( strstr(ns, "$cmd.sys.killop") ) { killOp(m, dbresponse); return; } if( strstr(ns, "$cmd.sys.unlock") ) { unlockFsync(ns, m, dbresponse); return; } } } else { opread(m); } } else if( op == dbGetMore ) { opread(m); } else { opwrite(m); } globalOpCounters.gotOp( op , isCommand ); Client& c = cc(); auto_ptr nestedOp; CurOp* currentOpP = c.curop(); if ( currentOpP->active() ) { nestedOp.reset( new CurOp( &c , currentOpP ) ); currentOpP = nestedOp.get(); } CurOp& currentOp = *currentOpP; currentOp.reset(remote,op); OpDebug& debug = currentOp.debug(); debug.op = op; int logThreshold = cmdLine.slowMS; bool log = logLevel >= 1; if ( op == dbQuery ) { if ( handlePossibleShardedMessage( m , &dbresponse ) ) return; receivedQuery(c , dbresponse, m ); } else if ( op == dbGetMore ) { if ( ! receivedGetMore(dbresponse, m, currentOp) ) log = true; } else if ( op == dbMsg ) { // deprecated - replaced by commands char *p = m.singleData()->_data; int len = strlen(p); if ( len > 400 ) out() << curTimeMillis64() % 10000 << " long msg received, len:" << len << endl; Message *resp = new Message(); if ( strcmp( "end" , p ) == 0 ) resp->setData( opReply , "dbMsg end no longer supported" ); else resp->setData( opReply , "i am fine - dbMsg deprecated"); dbresponse.response = resp; dbresponse.responseTo = m.header()->id; } else { const char *ns = m.singleData()->_data + 4; char cl[256]; nsToDatabase(ns, cl); if( ! c.getAuthenticationInfo()->isAuthorized(cl) ) { uassert_nothrow("unauthorized"); } else { try { if ( op == dbInsert ) { receivedInsert(m, currentOp); } else if ( op == dbUpdate ) { receivedUpdate(m, currentOp); } else if ( op == dbDelete ) { receivedDelete(m, currentOp); } else if ( op == dbKillCursors ) { currentOp.ensureStarted(); logThreshold = 10; receivedKillCursors(m); } else { mongo::log() << " operation isn't supported: " << op << endl; currentOp.done(); log = true; } } catch ( UserException& ue ) { tlog(3) << " Caught Assertion in " << opToString(op) << ", continuing " << ue.toString() << endl; debug.exceptionInfo = ue.getInfo(); } catch ( AssertionException& e ) { tlog(3) << " Caught Assertion in " << opToString(op) << ", continuing " << e.toString() << endl; debug.exceptionInfo = e.getInfo(); log = true; } } } currentOp.ensureStarted(); currentOp.done(); debug.executionTime = currentOp.totalTimeMillis(); //DEV log = true; if ( log || debug.executionTime > logThreshold ) { if( logLevel < 3 && op == dbGetMore && strstr(ns, ".oplog.") && debug.executionTime < 4300 && !log ) { /* it's normal for getMore on the oplog to be slow because of use of awaitdata flag. */ } else { mongo::tlog() << debug << endl; } } if ( currentOp.shouldDBProfile( debug.executionTime ) ) { // performance profiling is on if ( dbMutex.getState() < 0 ) { mongo::log(1) << "note: not profiling because recursive read lock" << endl; } else { writelock lk; if ( dbHolder.isLoaded( nsToDatabase( currentOp.getNS() ) , dbpath ) ) { Client::Context cx( currentOp.getNS() ); profile(c , currentOp ); } else { mongo::log() << "note: not profiling because db went away - probably a close on: " << currentOp.getNS() << endl; } } } debug.reset(); } /* assembleResponse() */ void receivedKillCursors(Message& m) { int *x = (int *) m.singleData()->_data; x++; // reserved int n = *x++; uassert( 13659 , "sent 0 cursors to kill" , n != 0 ); massert( 13658 , str::stream() << "bad kill cursors size: " << m.dataSize() , m.dataSize() == 8 + ( 8 * n ) ); uassert( 13004 , str::stream() << "sent negative cursors to kill: " << n , n >= 1 ); if ( n > 2000 ) { log( n < 30000 ? LL_WARNING : LL_ERROR ) << "receivedKillCursors, n=" << n << endl; assert( n < 30000 ); } int found = ClientCursor::erase(n, (long long *) x); if ( logLevel > 0 || found != n ) { log( found == n ) << "killcursors: found " << found << " of " << n << endl; } } /* db - database name path - db directory */ /*static*/ void Database::closeDatabase( const char *db, const string& path ) { assertInWriteLock(); Client::Context * ctx = cc().getContext(); assert( ctx ); assert( ctx->inDB( db , path ) ); Database *database = ctx->db(); assert( database->name == db ); oplogCheckCloseDatabase( database ); // oplog caches some things, dirty its caches if( BackgroundOperation::inProgForDb(db) ) { log() << "warning: bg op in prog during close db? " << db << endl; } /* important: kill all open cursors on the database */ string prefix(db); prefix += '.'; ClientCursor::invalidate(prefix.c_str()); NamespaceDetailsTransient::clearForPrefix( prefix.c_str() ); dbHolder.erase( db, path ); ctx->clear(); delete database; // closes files } void receivedUpdate(Message& m, CurOp& op) { DbMessage d(m); const char *ns = d.getns(); op.debug().ns = ns; int flags = d.pullInt(); BSONObj query = d.nextJsObj(); assert( d.moreJSObjs() ); assert( query.objsize() < m.header()->dataLen() ); BSONObj toupdate = d.nextJsObj(); uassert( 10055 , "update object too large", toupdate.objsize() <= BSONObjMaxUserSize); assert( toupdate.objsize() < m.header()->dataLen() ); assert( query.objsize() + toupdate.objsize() < m.header()->dataLen() ); bool upsert = flags & UpdateOption_Upsert; bool multi = flags & UpdateOption_Multi; bool broadcast = flags & UpdateOption_Broadcast; op.debug().query = query; op.setQuery(query); writelock lk; // writelock is used to synchronize stepdowns w/ writes uassert( 10054 , "not master", isMasterNs( ns ) ); // if this ever moves to outside of lock, need to adjust check Client::Context::_finishInit if ( ! broadcast && handlePossibleShardedMessage( m , 0 ) ) return; Client::Context ctx( ns ); UpdateResult res = updateObjects(ns, toupdate, query, upsert, multi, true, op.debug() ); lastError.getSafe()->recordUpdate( res.existing , res.num , res.upserted ); // for getlasterror } void receivedDelete(Message& m, CurOp& op) { DbMessage d(m); const char *ns = d.getns(); op.debug().ns = ns; int flags = d.pullInt(); bool justOne = flags & RemoveOption_JustOne; bool broadcast = flags & RemoveOption_Broadcast; assert( d.moreJSObjs() ); BSONObj pattern = d.nextJsObj(); op.debug().query = pattern; op.setQuery(pattern); writelock lk(ns); // writelock is used to synchronize stepdowns w/ writes uassert( 10056 , "not master", isMasterNs( ns ) ); // if this ever moves to outside of lock, need to adjust check Client::Context::_finishInit if ( ! broadcast && handlePossibleShardedMessage( m , 0 ) ) return; Client::Context ctx(ns); long long n = deleteObjects(ns, pattern, justOne, true); lastError.getSafe()->recordDelete( n ); } QueryResult* emptyMoreResult(long long); bool receivedGetMore(DbResponse& dbresponse, Message& m, CurOp& curop ) { bool ok = true; DbMessage d(m); const char *ns = d.getns(); int ntoreturn = d.pullInt(); long long cursorid = d.pullInt64(); curop.debug().ns = ns; curop.debug().ntoreturn = ntoreturn; curop.debug().cursorid = cursorid; time_t start = 0; int pass = 0; bool exhaust = false; QueryResult* msgdata; while( 1 ) { try { readlock lk; Client::Context ctx(ns); msgdata = processGetMore(ns, ntoreturn, cursorid, curop, pass, exhaust); } catch ( AssertionException& e ) { exhaust = false; curop.debug().exceptionInfo = e.getInfo(); msgdata = emptyMoreResult(cursorid); ok = false; } if (msgdata == 0) { exhaust = false; massert(13073, "shutting down", !inShutdown() ); if( pass == 0 ) { start = time(0); } else { if( time(0) - start >= 4 ) { // after about 4 seconds, return. this is a sanity check. pass stops at 1000 normally // for DEV this helps and also if sleep is highly inaccurate on a platform. we want to // return occasionally so slave can checkpoint. pass = 10000; } } pass++; DEV sleepmillis(20); else sleepmillis(2); continue; } break; }; Message *resp = new Message(); resp->setData(msgdata, true); curop.debug().responseLength = resp->header()->dataLen(); curop.debug().nreturned = msgdata->nReturned; dbresponse.response = resp; dbresponse.responseTo = m.header()->id; if( exhaust ) { curop.debug().exhaust = true; dbresponse.exhaust = ns; } return ok; } void checkAndInsert(const char *ns, /*modifies*/BSONObj& js) { uassert( 10059 , "object to insert too large", js.objsize() <= BSONObjMaxUserSize); { // check no $ modifiers. note we only check top level. (scanning deep would be quite expensive) BSONObjIterator i( js ); while ( i.more() ) { BSONElement e = i.next(); uassert( 13511 , "document to insert can't have $ fields" , e.fieldName()[0] != '$' ); } } theDataFileMgr.insertWithObjMod(ns, js, false); // js may be modified in the call to add an _id field. logOp("i", ns, js); } NOINLINE_DECL void insertMulti(DbMessage& d, const char *ns, const BSONObj& _js) { const bool keepGoing = d.reservedField() & InsertOption_ContinueOnError; int n = 0; BSONObj js(_js); while( 1 ) { try { checkAndInsert(ns, js); ++n; getDur().commitIfNeeded(); } catch (const UserException&) { if (!keepGoing || !d.moreJSObjs()){ globalOpCounters.incInsertInWriteLock(n); throw; } // otherwise ignore and keep going } if( !d.moreJSObjs() ) break; js = d.nextJsObj(); // TODO: refactor to do objcheck outside of writelock } globalOpCounters.incInsertInWriteLock(n); } void receivedInsert(Message& m, CurOp& op) { DbMessage d(m); const char *ns = d.getns(); op.debug().ns = ns; if( !d.moreJSObjs() ) { // strange. should we complain? return; } BSONObj js = d.nextJsObj(); writelock lk(ns); // writelock is used to synchronize stepdowns w/ writes uassert( 10058 , "not master", isMasterNs(ns) ); if ( handlePossibleShardedMessage( m , 0 ) ) return; Client::Context ctx(ns); if( d.moreJSObjs() ) { insertMulti(d, ns, js); return; } checkAndInsert(ns, js); globalOpCounters.incInsertInWriteLock(1); } void getDatabaseNames( vector< string > &names , const string& usePath ) { boost::filesystem::path path( usePath ); for ( boost::filesystem::directory_iterator i( path ); i != boost::filesystem::directory_iterator(); ++i ) { if ( directoryperdb ) { boost::filesystem::path p = *i; string dbName = p.leaf(); p /= ( dbName + ".ns" ); if ( MMF::exists( p ) ) names.push_back( dbName ); } else { string fileName = boost::filesystem::path(*i).leaf(); if ( fileName.length() > 3 && fileName.substr( fileName.length() - 3, 3 ) == ".ns" ) names.push_back( fileName.substr( 0, fileName.length() - 3 ) ); } } } /* returns true if there is data on this server. useful when starting replication. local database does NOT count except for rsoplog collection. */ bool replHasDatabases() { vector names; getDatabaseNames(names); if( names.size() >= 2 ) return true; if( names.size() == 1 ) { if( names[0] != "local" ) return true; // we have a local database. return true if oplog isn't empty { readlock lk(rsoplog); BSONObj o; if( Helpers::getFirst(rsoplog, o) ) return true; } } return false; } bool DBDirectClient::call( Message &toSend, Message &response, bool assertOk , string * actualServer ) { if ( lastError._get() ) lastError.startRequest( toSend, lastError._get() ); DbResponse dbResponse; assembleResponse( toSend, dbResponse , _clientHost ); assert( dbResponse.response ); dbResponse.response->concat(); // can get rid of this if we make response handling smarter response = *dbResponse.response; getDur().commitIfNeeded(); return true; } void DBDirectClient::say( Message &toSend, bool isRetry ) { if ( lastError._get() ) lastError.startRequest( toSend, lastError._get() ); DbResponse dbResponse; assembleResponse( toSend, dbResponse , _clientHost ); getDur().commitIfNeeded(); } auto_ptr DBDirectClient::query(const string &ns, Query query, int nToReturn , int nToSkip , const BSONObj *fieldsToReturn , int queryOptions ) { //if ( ! query.obj.isEmpty() || nToReturn != 0 || nToSkip != 0 || fieldsToReturn || queryOptions ) return DBClientBase::query( ns , query , nToReturn , nToSkip , fieldsToReturn , queryOptions ); // //assert( query.obj.isEmpty() ); //throw UserException( (string)"yay:" + ns ); } void DBDirectClient::killCursor( long long id ) { ClientCursor::erase( id ); } HostAndPort DBDirectClient::_clientHost = HostAndPort( "0.0.0.0" , 0 ); unsigned long long DBDirectClient::count(const string &ns, const BSONObj& query, int options, int limit, int skip ) { readlock lk( ns ); string errmsg; long long res = runCount( ns.c_str() , _countCmd( ns , query , options , limit , skip ) , errmsg ); if ( res == -1 ) return 0; uassert( 13637 , str::stream() << "count failed in DBDirectClient: " << errmsg , res >= 0 ); return (unsigned long long )res; } DBClientBase * createDirectClient() { return new DBDirectClient(); } mongo::mutex exitMutex("exit"); int numExitCalls = 0; bool inShutdown() { return numExitCalls > 0; } void tryToOutputFatal( const string& s ) { try { rawOut( s ); return; } catch ( ... ) {} try { cerr << s << endl; return; } catch ( ... ) {} // uh - oh, not sure there is anything else we can do... } /** also called by ntservice.cpp */ void shutdownServer() { log() << "shutdown: going to close listening sockets..." << endl; ListeningSockets::get()->closeAll(); log() << "shutdown: going to flush diaglog..." << endl; flushDiagLog(); /* must do this before unmapping mem or you may get a seg fault */ log() << "shutdown: going to close sockets..." << endl; boost::thread close_socket_thread( boost::bind(MessagingPort::closeAllSockets, 0) ); // wait until file preallocation finishes // we would only hang here if the file_allocator code generates a // synchronous signal, which we don't expect log() << "shutdown: waiting for fs preallocator..." << endl; FileAllocator::get()->waitUntilFinished(); if( cmdLine.dur ) { log() << "shutdown: lock for final commit..." << endl; { int n = 10; while( 1 ) { // we may already be in a read lock from earlier in the call stack, so do read lock here // to be consistent with that. readlocktry w("", 20000); if( w.got() ) { log() << "shutdown: final commit..." << endl; getDur().commitNow(); break; } if( --n <= 0 ) { log() << "shutdown: couldn't acquire write lock, aborting" << endl; mongoAbort("couldn't acquire write lock"); } log() << "shutdown: waiting for write lock..." << endl; } } MemoryMappedFile::flushAll(true); } log() << "shutdown: closing all files..." << endl; stringstream ss3; MemoryMappedFile::closeAllFiles( ss3 ); log() << ss3.str() << endl; if( cmdLine.dur ) { dur::journalCleanup(true); } #if !defined(__sunos__) if ( lockFile ) { log() << "shutdown: removing fs lock..." << endl; /* This ought to be an unlink(), but Eliot says the last time that was attempted, there was a race condition with acquirePathLock(). */ #ifdef _WIN32 if( _chsize( lockFile , 0 ) ) log() << "couldn't remove fs lock " << WSAGetLastError() << endl; CloseHandle(lockFileHandle); #else if( ftruncate( lockFile , 0 ) ) log() << "couldn't remove fs lock " << errnoWithDescription() << endl; flock( lockFile, LOCK_UN ); #endif } #endif } void exitCleanly( ExitCode code ) { killCurrentOp.killAll(); { dblock lk; log() << "now exiting" << endl; dbexit( code ); } } /* not using log() herein in case we are already locked */ NOINLINE_DECL void dbexit( ExitCode rc, const char *why, bool tryToGetLock ) { auto_ptr wlt; if ( tryToGetLock ) { wlt.reset( new writelocktry( "" , 2 * 60 * 1000 ) ); uassert( 13455 , "dbexit timed out getting lock" , wlt->got() ); } Client * c = currentClient.get(); { scoped_lock lk( exitMutex ); if ( numExitCalls++ > 0 ) { if ( numExitCalls > 5 ) { // this means something horrible has happened ::_exit( rc ); } stringstream ss; ss << "dbexit: " << why << "; exiting immediately"; tryToOutputFatal( ss.str() ); if ( c ) c->shutdown(); ::exit( rc ); } } { stringstream ss; ss << "dbexit: " << why; tryToOutputFatal( ss.str() ); } try { shutdownServer(); // gracefully shutdown instance } catch ( ... ) { tryToOutputFatal( "shutdown failed with exception" ); } try { mutexDebugger.programEnding(); } catch (...) { } tryToOutputFatal( "dbexit: really exiting now" ); if ( c ) c->shutdown(); ::exit(rc); } #if !defined(__sunos__) void writePid(int fd) { stringstream ss; ss << getpid() << endl; string s = ss.str(); const char * data = s.c_str(); #ifdef _WIN32 assert ( _write( fd, data, strlen( data ) ) ); #else assert ( write( fd, data, strlen( data ) ) ); #endif } void acquirePathLock(bool doingRepair) { string name = ( boost::filesystem::path( dbpath ) / "mongod.lock" ).native_file_string(); bool oldFile = false; if ( boost::filesystem::exists( name ) && boost::filesystem::file_size( name ) > 0 ) { oldFile = true; } #ifdef _WIN32 lockFileHandle = CreateFileA( name.c_str(), GENERIC_READ | GENERIC_WRITE, 0 /* do not allow anyone else access */, NULL, OPEN_ALWAYS /* success if fh can open */, 0, NULL ); if (lockFileHandle == INVALID_HANDLE_VALUE) { DWORD code = GetLastError(); char *msg; FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, NULL, code, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), (LPSTR)&msg, 0, NULL); string m = msg; str::stripTrailing(m, "\r\n"); uasserted( 13627 , str::stream() << "Unable to create/open lock file: " << name << ' ' << m << " Is a mongod instance already running?" ); } lockFile = _open_osfhandle((intptr_t)lockFileHandle, 0); #else lockFile = open( name.c_str(), O_RDWR | O_CREAT , S_IRWXU | S_IRWXG | S_IRWXO ); if( lockFile <= 0 ) { uasserted( 10309 , str::stream() << "Unable to create/open lock file: " << name << ' ' << errnoWithDescription() << " Is a mongod instance already running?" ); } if (flock( lockFile, LOCK_EX | LOCK_NB ) != 0) { close ( lockFile ); lockFile = 0; uassert( 10310 , "Unable to acquire lock for lockfilepath: " + name, 0 ); } #endif if ( oldFile ) { // we check this here because we want to see if we can get the lock // if we can't, then its probably just another mongod running string errmsg; if (cmdLine.dur) { if (!dur::haveJournalFiles()) { vector dbnames; getDatabaseNames( dbnames ); if ( dbnames.size() == 0 ) { // this means that mongod crashed // between initial startup and when journaling was initialized // it is safe to continue } else { errmsg = str::stream() << "************** \n" << "old lock file: " << name << ". probably means unclean shutdown,\n" << "but there are no journal files to recover.\n" << "this is likely human error or filesystem corruption.\n" << "found " << dbnames.size() << " dbs.\n" << "see: http://dochub.mongodb.org/core/repair for more information\n" << "*************"; } } } else { if (!dur::haveJournalFiles() && !doingRepair) { errmsg = str::stream() << "************** \n" << "Unclean shutdown detected.\n" << "Please visit http://dochub.mongodb.org/core/repair for recovery instructions.\n" << "*************"; } } if (!errmsg.empty()) { cout << errmsg << endl; #ifdef _WIN32 CloseHandle( lockFileHandle ); #else close ( lockFile ); #endif lockFile = 0; uassert( 12596 , "old lock file" , 0 ); } } // Not related to lock file, but this is where we handle unclean shutdown if( !cmdLine.dur && dur::haveJournalFiles() ) { cout << "**************" << endl; cout << "Error: journal files are present in journal directory, yet starting without --journal enabled." << endl; cout << "It is recommended that you start with journaling enabled so that recovery may occur." << endl; cout << "**************" << endl; uasserted(13597, "can't start without --journal enabled when journal/ files are present"); } #ifdef _WIN32 uassert( 13625, "Unable to truncate lock file", _chsize(lockFile, 0) == 0); writePid( lockFile ); _commit( lockFile ); #else uassert( 13342, "Unable to truncate lock file", ftruncate(lockFile, 0) == 0); writePid( lockFile ); fsync( lockFile ); flushMyDirectory(name); #endif } #else void acquirePathLock(bool) { // TODO - this is very bad that the code above not running here. // Not related to lock file, but this is where we handle unclean shutdown if( !cmdLine.dur && dur::haveJournalFiles() ) { cout << "**************" << endl; cout << "Error: journal files are present in journal directory, yet starting without --journal enabled." << endl; cout << "It is recommended that you start with journaling enabled so that recovery may occur." << endl; cout << "Alternatively (not recommended), you can backup everything, then delete the journal files, and run --repair" << endl; cout << "**************" << endl; uasserted(13618, "can't start without --journal enabled when journal/ files are present"); } } #endif } // namespace mongo