diff options
54 files changed, 1392 insertions, 236 deletions
@@ -25,6 +25,17 @@ import buildscripts.bb import stat from buildscripts import utils + +def _rpartition(string, sep): + """A replacement for str.rpartition which is missing in Python < 2.5 + """ + idx = string.rfind(sep) + if idx == -1: + return '', '', string + return string[:idx], sep, string[idx + 1:] + + + buildscripts.bb.checkOk() def findSettingsSetup(): @@ -38,7 +49,7 @@ def getThirdPartyShortNames(): if not x.endswith( ".py" ) or x.find( "#" ) >= 0: continue - lst.append( x.rpartition( "." )[0] ) + lst.append( _rpartition( x, "." )[0] ) return lst @@ -334,6 +345,7 @@ class InstallSetup: self.clientTestsDir = "client/examples/" installSetup = InstallSetup() +env["installSetup"] = installSetup if distBuild: installSetup.bannerDir = "distsrc" @@ -805,6 +817,29 @@ def add_exe(target): return target + ".exe" return target +def smoke_python_name(): + # if this script is being run by py2.5 or greater, + # then we assume that "python" points to a 2.5 or + # greater python VM. otherwise, explicitly use 2.5 + # which we assume to be installed. + import subprocess + version = re.compile(r'[Pp]ython ([\d\.]+)', re.MULTILINE) + binaries = ['python2.5', 'python2.6', 'python2.7', 'python25', 'python26', 'python27', 'python'] + for binary in binaries: + try: + # py-2.4 compatible replacement for shell backticks + out, err = subprocess.Popen([binary, '-V'], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() + for stream in (out, err): + match = version.search(stream) + if match: + versiontuple = tuple(map(int, match.group(1).split('.'))) + if versiontuple >= (2, 5): + return binary + except: + pass + # if that all fails, fall back to "python" + return "python" + def setupBuildInfoFile( outFile ): version = utils.getGitVersion() if len(moduleNames) > 0: @@ -1218,7 +1253,7 @@ def addSmoketest( name, deps ): else: target = name[5].lower() + name[6:] - addTest(name, deps, [ "python buildscripts/smoke.py " + " ".join(smokeFlags) + ' ' + target ]) + addTest(name, deps, [ smoke_python_name() + " buildscripts/smoke.py " + " ".join(smokeFlags) + ' ' + target ]) addSmoketest( "smoke", [ add_exe( "test" ) ] ) addSmoketest( "smokePerf", [ "perftest" ] ) @@ -1480,7 +1515,7 @@ if installSetup.headers: if installSetup.clientSrc: for x in allClientFiles: x = str(x) - env.Install( installDir + "/mongo/" + x.rpartition( "/" )[0] , x ) + env.Install( installDir + "/mongo/" + _rpartition( x, "/" )[0] , x ) #lib if installSetup.libraries: @@ -1559,7 +1594,7 @@ def s3push( localName , remoteName=None , remotePrefix=None , fixName=True , pla remoteName = localName if fixName: - (root,dot,suffix) = localName.rpartition( "." ) + (root,dot,suffix) = _rpartition( localName, "." ) name = remoteName + "-" + getSystemInstallName() name += remotePrefix if dot == "." : @@ -1616,7 +1651,7 @@ def build_and_test_client(env, target, source): call(scons_command + ["libmongoclient.a", "clientTests"], cwd=installDir) - return bool(call(["python", "buildscripts/smoke.py", + return bool(call([smoke_python_name(), "buildscripts/smoke.py", "--test-path", installDir, "client"])) env.Alias("clientBuild", [mongod, installDir], [build_and_test_client]) env.AlwaysBuild("clientBuild") diff --git a/client/dbclient.cpp b/client/dbclient.cpp index 5faeccf..67ecea0 100644 --- a/client/dbclient.cpp +++ b/client/dbclient.cpp @@ -611,6 +611,7 @@ namespace mongo { if ( clientSet && isNotMasterErrorString( info["errmsg"] ) ) { clientSet->isntMaster(); + // At this point, we've probably deleted *this* object, do *not* use afterward } return false; diff --git a/client/dbclient_rs.cpp b/client/dbclient_rs.cpp index c57a52d..0189700 100644 --- a/client/dbclient_rs.cpp +++ b/client/dbclient_rs.cpp @@ -72,6 +72,15 @@ namespace mongo { } replicaSetMonitorWatcher; + string seedString( const vector<HostAndPort>& servers ){ + string seedStr; + for ( unsigned i = 0; i < servers.size(); i++ ){ + seedStr += servers[i].toString(); + if( i < servers.size() - 1 ) seedStr += ","; + } + + return seedStr; + } ReplicaSetMonitor::ReplicaSetMonitor( const string& name , const vector<HostAndPort>& servers ) : _lock( "ReplicaSetMonitor instance" ) , _checkConnectionLock( "ReplicaSetMonitor check connection lock" ), _name( name ) , _master(-1), _nextSlave(0) { @@ -82,28 +91,36 @@ namespace mongo { warning() << "replica set name empty, first node: " << servers[0] << endl; } - string errmsg; + log() << "starting new replica set monitor for replica set " << _name << " with seed of " << seedString( servers ) << endl; - for ( unsigned i=0; i<servers.size(); i++ ) { + string errmsg; + for ( unsigned i = 0; i < servers.size(); i++ ) { - bool haveAlready = false; - for ( unsigned n = 0; n < _nodes.size() && ! haveAlready; n++ ) - haveAlready = ( _nodes[n].addr == servers[i] ); - if( haveAlready ) continue; + // Don't check servers we have already + if( _find_inlock( servers[i] ) >= 0 ) continue; auto_ptr<DBClientConnection> conn( new DBClientConnection( true , 0, 5.0 ) ); - if (!conn->connect( servers[i] , errmsg ) ) { - log(1) << "error connecting to seed " << servers[i] << ": " << errmsg << endl; + try{ + if( ! conn->connect( servers[i] , errmsg ) ){ + throw DBException( errmsg, 15928 ); + } + log() << "successfully connected to seed " << servers[i] << " for replica set " << this->_name << endl; + } + catch( DBException& e ){ + log() << "error connecting to seed " << servers[i] << causedBy( e ) << endl; // skip seeds that don't work continue; } - _nodes.push_back( Node( servers[i] , conn.release() ) ); - - int myLoc = _nodes.size() - 1; string maybePrimary; - _checkConnection( _nodes[myLoc].conn.get() , maybePrimary, false, myLoc ); + _checkConnection( conn.get(), maybePrimary, false, -1 ); } + + // Check everything to get the first data + _check( true ); + + log() << "replica set monitor for replica set " << _name << " started, address is " << getServerAddress() << endl; + } ReplicaSetMonitor::~ReplicaSetMonitor() { @@ -164,18 +181,21 @@ namespace mongo { } string ReplicaSetMonitor::getServerAddress() const { + scoped_lock lk( _lock ); + return _getServerAddress_inlock(); + } + + string ReplicaSetMonitor::_getServerAddress_inlock() const { StringBuilder ss; if ( _name.size() ) ss << _name << "/"; - { - scoped_lock lk( _lock ); - for ( unsigned i=0; i<_nodes.size(); i++ ) { - if ( i > 0 ) - ss << ","; - ss << _nodes[i].addr.toString(); - } + for ( unsigned i=0; i<_nodes.size(); i++ ) { + if ( i > 0 ) + ss << ","; + ss << _nodes[i].addr.toString(); } + return ss.str(); } @@ -313,34 +333,130 @@ namespace mongo { } } - void ReplicaSetMonitor::_checkHosts( const BSONObj& hostList, bool& changed ) { + NodeDiff ReplicaSetMonitor::_getHostDiff_inlock( const BSONObj& hostList ){ + + NodeDiff diff; + set<int> nodesFound; + + int index = 0; + BSONObjIterator hi( hostList ); + while( hi.more() ){ + + string toCheck = hi.next().String(); + int nodeIndex = _find_inlock( toCheck ); + + // Node-to-add + if( nodeIndex < 0 ) diff.first.insert( toCheck ); + else nodesFound.insert( nodeIndex ); + + index++; + } + + for( size_t i = 0; i < _nodes.size(); i++ ){ + if( nodesFound.find( static_cast<int>(i) ) == nodesFound.end() ) diff.second.insert( static_cast<int>(i) ); + } + + return diff; + } + + bool ReplicaSetMonitor::_shouldChangeHosts( const BSONObj& hostList, bool inlock ){ + + int origHosts = 0; + if( ! inlock ){ + scoped_lock lk( _lock ); + origHosts = _nodes.size(); + } + else origHosts = _nodes.size(); + int numHosts = 0; + bool changed = false; + BSONObjIterator hi(hostList); while ( hi.more() ) { string toCheck = hi.next().String(); - if ( _find( toCheck ) >= 0 ) - continue; + numHosts++; + int index = 0; + if( ! inlock ) index = _find( toCheck ); + else index = _find_inlock( toCheck ); + + if ( index >= 0 ) continue; + + changed = true; + break; + } + + return changed || origHosts != numHosts; + + } + + void ReplicaSetMonitor::_checkHosts( const BSONObj& hostList, bool& changed ) { + + // Fast path, still requires intermittent locking + if( ! _shouldChangeHosts( hostList, false ) ){ + changed = false; + return; + } + + // Slow path, double-checked though + scoped_lock lk( _lock ); + + // Our host list may have changed while waiting for another thread in the meantime, + // so double-check here + // TODO: Do we really need this much protection, this should be pretty rare and not triggered + // from lots of threads, duping old behavior for safety + if( ! _shouldChangeHosts( hostList, true ) ){ + changed = false; + return; + } + + // LogLevel can be pretty low, since replica set reconfiguration should be pretty rare and we + // want to record our changes + log() << "changing hosts to " << hostList << " from " << _getServerAddress_inlock() << endl; - HostAndPort h( toCheck ); + NodeDiff diff = _getHostDiff_inlock( hostList ); + set<string> added = diff.first; + set<int> removed = diff.second; + + assert( added.size() > 0 || removed.size() > 0 ); + changed = true; + + // Delete from the end so we don't invalidate as we delete, delete indices are ascending + for( set<int>::reverse_iterator i = removed.rbegin(), end = removed.rend(); i != end; ++i ){ + + log() << "erasing host " << _nodes[ *i ] << " from replica set " << this->_name << endl; + + _nodes.erase( _nodes.begin() + *i ); + } + + // Add new nodes + for( set<string>::iterator i = added.begin(), end = added.end(); i != end; ++i ){ + + log() << "trying to add new host " << *i << " to replica set " << this->_name << endl; + + // Connect to new node + HostAndPort h( *i ); DBClientConnection * newConn = new DBClientConnection( true, 0, 5.0 ); - string temp; - newConn->connect( h , temp ); - { - scoped_lock lk( _lock ); - if ( _find_inlock( toCheck ) >= 0 ) { - // we need this check inside the lock so there isn't thread contention on adding to vector - continue; + + string errmsg; + try{ + if( ! newConn->connect( h , errmsg ) ){ + throw DBException( errmsg, 15927 ); } - _nodes.push_back( Node( h , newConn ) ); + log() << "successfully connected to new host " << *i << " in replica set " << this->_name << endl; } - log() << "updated set (" << _name << ") to: " << getServerAddress() << endl; - changed = true; + catch( DBException& e ){ + warning() << "cannot connect to new host " << *i << " to replica set " << this->_name << causedBy( e ) << endl; + } + + _nodes.push_back( Node( h , newConn ) ); } + } bool ReplicaSetMonitor::_checkConnection( DBClientConnection * c , string& maybePrimary , bool verbose , int nodesOffset ) { + assert( c ); scoped_lock lk( _checkConnectionLock ); bool isMaster = false; bool changed = false; @@ -348,7 +464,6 @@ namespace mongo { Timer t; BSONObj o; c->isMaster(isMaster, &o); - if ( o["setName"].type() != String || o["setName"].String() != _name ) { warning() << "node: " << c->getServerAddress() << " isn't a part of set: " << _name << " ismaster: " << o << endl; @@ -369,16 +484,20 @@ namespace mongo { log( ! verbose ) << "ReplicaSetMonitor::_checkConnection: " << c->toString() << ' ' << o << endl; // add other nodes + BSONArrayBuilder b; if ( o["hosts"].type() == Array ) { if ( o["primary"].type() == String ) maybePrimary = o["primary"].String(); - _checkHosts(o["hosts"].Obj(), changed); + BSONObjIterator it( o["hosts"].Obj() ); + while( it.more() ) b.append( it.next() ); } if (o.hasField("passives") && o["passives"].type() == Array) { - _checkHosts(o["passives"].Obj(), changed); + BSONObjIterator it( o["passives"].Obj() ); + while( it.more() ) b.append( it.next() ); } + _checkHosts( b.arr(), changed); _checkStatus(c); diff --git a/client/dbclient_rs.h b/client/dbclient_rs.h index b6948a0..b68af29 100644 --- a/client/dbclient_rs.h +++ b/client/dbclient_rs.h @@ -24,6 +24,7 @@ namespace mongo { class ReplicaSetMonitor; typedef shared_ptr<ReplicaSetMonitor> ReplicaSetMonitorPtr; + typedef pair<set<string>,set<int> > NodeDiff; /** * manages state about a replica set for client @@ -92,7 +93,7 @@ namespace mongo { string getName() const { return _name; } string getServerAddress() const; - + bool contains( const string& server ) const; void appendInfo( BSONObjBuilder& b ) const; @@ -132,6 +133,12 @@ namespace mongo { */ bool _checkConnection( DBClientConnection * c , string& maybePrimary , bool verbose , int nodesOffset ); + string _getServerAddress_inlock() const; + + NodeDiff _getHostDiff_inlock( const BSONObj& hostList ); + bool _shouldChangeHosts( const BSONObj& hostList, bool inlock ); + + int _find( const string& server ) const ; int _find_inlock( const string& server ) const ; int _find( const HostAndPort& server ) const ; @@ -144,6 +151,7 @@ namespace mongo { Node( const HostAndPort& a , DBClientConnection* c ) : addr( a ) , conn(c) , ok(true) , ismaster(false), secondary( false ) , hidden( false ) , pingTimeMillis(0) { + ok = conn.get() == NULL; } bool okForSecondaryQueries() const { diff --git a/client/distlock.cpp b/client/distlock.cpp index 595fc38..73ee130 100644 --- a/client/distlock.cpp +++ b/client/distlock.cpp @@ -111,6 +111,9 @@ namespace mongo { // replace it for a quite a while) // if the lock is taken, the take-over mechanism should handle the situation auto_ptr<DBClientCursor> c = conn->query( DistributedLock::locksNS , BSONObj() ); + // TODO: Would be good to make clear whether query throws or returns empty on errors + uassert( 16060, str::stream() << "cannot query locks collection on config server " << conn.getHost(), c.get() ); + set<string> pids; while ( c->more() ) { BSONObj lock = c->next(); diff --git a/db/cloner.cpp b/db/cloner.cpp index f13ea52..26c2f74 100644 --- a/db/cloner.cpp +++ b/db/cloner.cpp @@ -68,7 +68,7 @@ namespace mongo { /** copy the entire database */ bool go(const char *masterHost, string& errmsg, const string& fromdb, bool logForRepl, bool slaveOk, bool useReplAuth, bool snapshot, bool mayYield, bool mayBeInterrupted, int *errCode = 0); - bool copyCollection( const string& from , const string& ns , const BSONObj& query , string& errmsg , bool mayYield, bool mayBeInterrupted, bool copyIndexes = true, bool logForRepl = true ); + bool copyCollection( const string& ns , const BSONObj& query , string& errmsg , bool mayYield, bool mayBeInterrupted, bool copyIndexes = true, bool logForRepl = true ); }; /* for index info object: @@ -244,18 +244,19 @@ namespace mongo { } } - bool copyCollectionFromRemote(const string& host, const string& ns, const BSONObj& query, string& errmsg, bool logForRepl, bool mayYield, bool mayBeInterrupted) { + bool copyCollectionFromRemote(const string& host, const string& ns, string& errmsg) { Cloner c; - return c.copyCollection(host, ns, query, errmsg, mayYield, mayBeInterrupted, /*copyIndexes*/ true, logForRepl); - } - bool Cloner::copyCollection( const string& from , const string& ns , const BSONObj& query , string& errmsg , bool mayYield, bool mayBeInterrupted, bool copyIndexes, bool logForRepl ) { - auto_ptr<DBClientConnection> myconn; - myconn.reset( new DBClientConnection() ); - if ( ! myconn->connect( from , errmsg ) ) - return false; + DBClientConnection *conn = new DBClientConnection(); + // cloner owns conn in auto_ptr + c.setConnection(conn); + uassert(15908, errmsg, conn->connect(host, errmsg) && replAuthenticate(conn)); + + return c.copyCollection(ns, BSONObj(), errmsg, true, false, /*copyIndexes*/ true, false); + } - conn.reset( myconn.release() ); + bool Cloner::copyCollection( const string& ns, const BSONObj& query, string& errmsg, + bool mayYield, bool mayBeInterrupted, bool copyIndexes, bool logForRepl ) { writelock lk(ns); // TODO: make this lower down Client::Context ctx(ns); @@ -265,7 +266,7 @@ namespace mongo { string temp = ctx.db()->name + ".system.namespaces"; BSONObj config = conn->findOne( temp , BSON( "name" << ns ) ); if ( config["options"].isABSONObj() ) - if ( ! userCreateNS( ns.c_str() , config["options"].Obj() , errmsg, true , 0 ) ) + if ( ! userCreateNS( ns.c_str() , config["options"].Obj() , errmsg, logForRepl , 0 ) ) return false; } @@ -521,7 +522,14 @@ namespace mongo { << " query: " << query << " " << ( copyIndexes ? "" : ", not copying indexes" ) << endl; Cloner c; - return c.copyCollection( fromhost , collection , query, errmsg , true, false, copyIndexes ); + auto_ptr<DBClientConnection> myconn; + myconn.reset( new DBClientConnection() ); + if ( ! myconn->connect( fromhost , errmsg ) ) + return false; + + c.setConnection( myconn.release() ); + + return c.copyCollection( collection , query, errmsg , true, false, copyIndexes ); } } cmdclonecollection; diff --git a/db/cloner.h b/db/cloner.h index 94264f8..130fea0 100644 --- a/db/cloner.h +++ b/db/cloner.h @@ -34,6 +34,6 @@ namespace mongo { bool slaveOk, bool useReplAuth, bool snapshot, bool mayYield, bool mayBeInterrupted, int *errCode = 0); - bool copyCollectionFromRemote(const string& host, const string& ns, const BSONObj& query, string& errmsg, bool logForRepl, bool mayYield, bool mayBeInterrupted); + bool copyCollectionFromRemote(const string& host, const string& ns, string& errmsg); } // namespace mongo diff --git a/db/dbcommands.cpp b/db/dbcommands.cpp index b2e6218..fc6327c 100644 --- a/db/dbcommands.cpp +++ b/db/dbcommands.cpp @@ -514,7 +514,15 @@ namespace mongo { if( overhead > 4000 ) { t.append("note", "virtual minus mapped is large. could indicate a memory leak"); - log() << "warning: virtual size (" << v << "MB) - mapped size (" << m << "MB) is large (" << overhead << "MB). could indicate a memory leak" << endl; + + static time_t last = 0; + time_t now = time(0); + + if ( last + 60 < now ) { + last = now; + log() << "warning: virtual size (" << v << "MB) - mapped size (" << m << "MB) is large (" << overhead << "MB). could indicate a memory leak" << endl; + } + } t.done(); diff --git a/db/dbcommands_generic.cpp b/db/dbcommands_generic.cpp index 22cee22..c623574 100644 --- a/db/dbcommands_generic.cpp +++ b/db/dbcommands_generic.cpp @@ -334,7 +334,7 @@ namespace mongo { virtual bool slaveOk() const { return true; } virtual LockType locktype() const { return NONE; } - virtual bool requiresAuth() { return false; } + virtual bool requiresAuth() { return true; } virtual bool adminOnly() const { return true; } virtual void help( stringstream& help ) const { diff --git a/db/instance.cpp b/db/instance.cpp index 764571d..1d5d589 100644 --- a/db/instance.cpp +++ b/db/instance.cpp @@ -605,6 +605,8 @@ namespace mongo { break; js = d.nextJsObj(); // TODO: refactor to do objcheck outside of writelock } + + globalOpCounters.incInsertInWriteLock(n); } void receivedInsert(Message& m, CurOp& op) { diff --git a/db/oplog.cpp b/db/oplog.cpp index 5c1671c..6e62607 100644 --- a/db/oplog.cpp +++ b/db/oplog.cpp @@ -627,12 +627,22 @@ namespace mongo { bool shouldRetry(const BSONObj& o, const string& hn) { OplogReader missingObjReader; + const char *ns = o.getStringField("ns"); + + // capped collections + NamespaceDetails *nsd = nsdetails(ns); + if (nsd && nsd->capped) { + log() << "replication missing doc, but this is okay for a capped collection (" << ns << ")" << endl; + return false; + } + + // should already have write lock + Client::Context ctx(ns); // we don't have the object yet, which is possible on initial sync. get it. log() << "replication info adding missing object" << endl; // rare enough we can log uassert(15916, str::stream() << "Can no longer connect to initial sync source: " << hn, missingObjReader.connect(hn)); - const char *ns = o.getStringField("ns"); // might be more than just _id in the update criteria BSONObj query = BSONObjBuilder().append(o.getObjectField("o2")["_id"]).obj(); BSONObj missingObj; @@ -651,7 +661,6 @@ namespace mongo { return false; } else { - Client::Context ctx(ns); DiskLoc d = theDataFileMgr.insert(ns, (void*) missingObj.objdata(), missingObj.objsize()); uassert(15917, "Got bad disk location when attempting to insert", !d.isNull()); @@ -678,6 +687,7 @@ namespace mongo { o = fields[0].embeddedObject(); const char *ns = fields[1].valuestrsafe(); + NamespaceDetails *nsd = nsdetails(ns); // operation type -- see logOp() comments for types const char *opType = fields[2].valuestrsafe(); @@ -705,7 +715,7 @@ namespace mongo { } else { /* erh 10/16/2009 - this is probably not relevant any more since its auto-created, but not worth removing */ - RARELY ensureHaveIdIndex(ns); // otherwise updates will be slow + RARELY if (nsd && !nsd->capped) { ensureHaveIdIndex(ns); } // otherwise updates will be slow /* todo : it may be better to do an insert here, and then catch the dup key exception and do update then. very few upserts will not be inserts... @@ -722,9 +732,9 @@ namespace mongo { // - if not, updates would be slow // - but if were by id would be slow on primary too so maybe ok // - if on primary was by another key and there are other indexes, this could be very bad w/out an index - // - if do create, odd to have on secondary but not primary. also can cause secondary to block for - // quite a while on creation. - RARELY ensureHaveIdIndex(ns); // otherwise updates will be super slow + // - if do create, odd to have on secondary but not primary. also can cause secondary to block for + // quite a while on creation. + RARELY if (nsd && !nsd->capped) { ensureHaveIdIndex(ns); } // otherwise updates will be super slow OpDebug debug; BSONObj updateCriteria = op.getObjectField("o2"); bool upsert = fields[3].booleanSafe(); @@ -741,11 +751,17 @@ namespace mongo { // of the form // { _id:..., { x : {$size:...} } // thus this is not ideal. - else if( nsdetails(ns) == NULL || Helpers::findById(nsdetails(ns), updateCriteria).isNull() ) { - failedUpdate = true; - } - else { - // it's present; zero objects were updated because of additional specifiers in the query for idempotence + else { + + if (nsd == NULL || + (nsd->findIdIndex() >= 0 && Helpers::findById(nsd, updateCriteria).isNull()) || + // capped collections won't have an _id index + (nsd->findIdIndex() < 0 && Helpers::findOne(ns, updateCriteria, false).isNull())) { + failedUpdate = true; + } + + // Otherwise, it's present; zero objects were updated because of additional specifiers + // in the query for idempotence } } else { diff --git a/db/ops/query.cpp b/db/ops/query.cpp index cf4dc98..36f2536 100644 --- a/db/ops/query.cpp +++ b/db/ops/query.cpp @@ -221,7 +221,8 @@ namespace mongo { _skip( spec["skip"].numberLong() ), _limit( spec["limit"].numberLong() ), _nscanned(), - _bc() { + _bc(), + _yieldRecoveryFailed() { } virtual void _init() { @@ -251,6 +252,7 @@ namespace mongo { virtual void recoverFromYield() { if ( _cc && !ClientCursor::recoverFromYield( _yieldData ) ) { + _yieldRecoveryFailed = true; _c.reset(); _cc.reset(); @@ -309,7 +311,7 @@ namespace mongo { } long long count() const { return _count; } virtual bool mayRecordPlan() const { - return ( _myCount > _limit / 2 ) || ( complete() && !stopRequested() ); + return !_yieldRecoveryFailed && ( ( _myCount > _limit / 2 ) || ( complete() && !stopRequested() ) ); } private: @@ -343,6 +345,7 @@ namespace mongo { ClientCursor::CleanupPointer _cc; ClientCursor::YieldData _yieldData; + bool _yieldRecoveryFailed; }; /* { count: "collectionname"[, query: <query>] } @@ -474,7 +477,8 @@ namespace mongo { _oplogReplay( pq.hasOption( QueryOption_OplogReplay) ), _response( response ), _eb( eb ), - _curop( curop ) + _curop( curop ), + _yieldRecoveryFailed() {} virtual void _init() { @@ -531,6 +535,7 @@ namespace mongo { _findingStartCursor->recoverFromYield(); } else if ( _cc && !ClientCursor::recoverFromYield( _yieldData ) ) { + _yieldRecoveryFailed = true; _c.reset(); _cc.reset(); _so.reset(); @@ -723,7 +728,7 @@ namespace mongo { } virtual bool mayRecordPlan() const { - return ( _pq.getNumToReturn() != 1 ) && ( ( _n > _pq.getNumToReturn() / 2 ) || ( complete() && !stopRequested() ) ); + return !_yieldRecoveryFailed && ( _pq.getNumToReturn() != 1 ) && ( ( _n > _pq.getNumToReturn() / 2 ) || ( complete() && !stopRequested() ) ); } virtual QueryOp *_createChild() const { @@ -791,6 +796,8 @@ namespace mongo { ExplainBuilder &_eb; CurOp &_curop; OpTime _slaveReadTill; + + bool _yieldRecoveryFailed; }; /* run a query -- includes checking for and running a Command \ diff --git a/db/queryoptimizer.cpp b/db/queryoptimizer.cpp index 692e9fd..71ca657 100644 --- a/db/queryoptimizer.cpp +++ b/db/queryoptimizer.cpp @@ -482,7 +482,7 @@ doneCheckOrder: } massert( 10368 , "Unable to locate previously recorded index", p.get() ); - if ( !( _bestGuessOnly && p->scanAndOrderRequired() ) ) { + if ( !p->unhelpful() && !( _bestGuessOnly && p->scanAndOrderRequired() ) ) { _usingPrerecordedPlan = true; _mayRecordPlan = false; _plans.push_back( p ); diff --git a/db/queryoptimizercursor.cpp b/db/queryoptimizercursor.cpp index 9260889..f8b57f7 100644 --- a/db/queryoptimizercursor.cpp +++ b/db/queryoptimizercursor.cpp @@ -35,7 +35,9 @@ namespace mongo { * @param aggregateNscanned - shared int counting total nscanned for * query ops for all cursors. */ - QueryOptimizerCursorOp( long long &aggregateNscanned ) : _matchCount(), _mustAdvance(), _nscanned(), _aggregateNscanned( aggregateNscanned ) {} + QueryOptimizerCursorOp( long long &aggregateNscanned ) : + _matchCount(), _mustAdvance(), _nscanned(), _capped(), + _aggregateNscanned( aggregateNscanned ), _yieldRecoveryFailed() {} virtual void _init() { if ( qp().scanAndOrderRequired() ) { @@ -64,6 +66,7 @@ namespace mongo { virtual void recoverFromYield() { if ( _cc && !ClientCursor::recoverFromYield( _yieldData ) ) { + _yieldRecoveryFailed = true; _c.reset(); _cc.reset(); @@ -113,12 +116,15 @@ namespace mongo { DiskLoc currLoc() const { return _c ? _c->currLoc() : DiskLoc(); } BSONObj currKey() const { return _c ? _c->currKey() : BSONObj(); } virtual bool mayRecordPlan() const { - return complete() && !stopRequested(); + return !_yieldRecoveryFailed && complete() && !stopRequested(); } shared_ptr<Cursor> cursor() const { return _c; } private: void mayAdvance() { - if ( _mustAdvance && _c ) { + if ( !_c ) { + return; + } + if ( _mustAdvance ) { _c->advance(); _mustAdvance = false; } @@ -134,6 +140,7 @@ namespace mongo { DiskLoc _posBeforeYield; ClientCursor::YieldData _yieldData; long long &_aggregateNscanned; + bool _yieldRecoveryFailed; }; /** @@ -181,36 +188,7 @@ namespace mongo { return DiskLoc(); } virtual bool advance() { - if ( _takeover ) { - return _takeover->advance(); - } - - // Ok to advance if currOp in an error state due to failed yield recovery. - // This may be the case when advance() is called by recoverFromYield(). - if ( !( _currOp && _currOp->error() ) && !ok() ) { - return false; - } - - _currOp = 0; - shared_ptr<QueryOp> op = _mps->nextOp(); - rethrowOnError( op ); - - QueryOptimizerCursorOp *qocop = dynamic_cast<QueryOptimizerCursorOp*>( op.get() ); - if ( !op->complete() ) { - // 'qocop' will be valid until we call _mps->nextOp() again. - _currOp = qocop; - } - else if ( op->stopRequested() ) { - if ( qocop->cursor() ) { - _takeover.reset( new MultiCursor( _mps, - qocop->cursor(), - op->matcher( qocop->cursor() ), - *op, - _nscanned - qocop->cursor()->nscanned() ) ); - } - } - - return ok(); + return _advance( false ); } virtual BSONObj currKey() const { if ( _takeover ) { @@ -252,9 +230,9 @@ namespace mongo { } if ( _currOp ) { _mps->recoverFromYield(); - if ( _currOp->error() ) { - // See if we can advance to a non error op. - advance(); + if ( _currOp->error() || !ok() ) { + // Advance to a non error op or a following $or clause if possible. + _advance( true ); } } } @@ -304,6 +282,36 @@ namespace mongo { } private: + bool _advance( bool force ) { + if ( _takeover ) { + return _takeover->advance(); + } + + if ( !force && !ok() ) { + return false; + } + + _currOp = 0; + shared_ptr<QueryOp> op = _mps->nextOp(); + rethrowOnError( op ); + + QueryOptimizerCursorOp *qocop = dynamic_cast<QueryOptimizerCursorOp*>( op.get() ); + if ( !op->complete() ) { + // 'qocop' will be valid until we call _mps->nextOp() again. + _currOp = qocop; + } + else if ( op->stopRequested() ) { + if ( qocop->cursor() ) { + _takeover.reset( new MultiCursor( _mps, + qocop->cursor(), + op->matcher( qocop->cursor() ), + *op, + _nscanned - qocop->cursor()->nscanned() ) ); + } + } + + return ok(); + } void rethrowOnError( const shared_ptr< QueryOp > &op ) { // If all plans have erred out, assert. if ( op->error() ) { diff --git a/db/querypattern.h b/db/querypattern.h index d87cc64..2f7450e 100644 --- a/db/querypattern.h +++ b/db/querypattern.h @@ -36,7 +36,8 @@ namespace mongo { Equality, LowerBound, UpperBound, - UpperAndLowerBound + UpperAndLowerBound, + ConstraintPresent }; bool operator<( const QueryPattern &other ) const; /** for testing only */ diff --git a/db/queryutil.cpp b/db/queryutil.cpp index 717eac8..47f89ad 100644 --- a/db/queryutil.cpp +++ b/db/queryutil.cpp @@ -1007,6 +1007,8 @@ namespace mongo { qp._fieldTypes[ i->first ] = QueryPattern::UpperBound; else if ( lower ) qp._fieldTypes[ i->first ] = QueryPattern::LowerBound; + else + qp._fieldTypes[ i->first ] = QueryPattern::ConstraintPresent; } } qp.setSort( sort ); @@ -1019,13 +1021,13 @@ namespace mongo { BoundBuilders builders; builders.push_back( make_pair( shared_ptr<BSONObjBuilder>( new BSONObjBuilder() ), shared_ptr<BSONObjBuilder>( new BSONObjBuilder() ) ) ); BSONObjIterator i( keyPattern ); - bool ineq = false; // until ineq is true, we are just dealing with equality and $in bounds + bool equalityOnly = true; // until equalityOnly is false, we are just dealing with equality (no range or $in querys). while( i.more() ) { BSONElement e = i.next(); const FieldRange &fr = range( e.fieldName() ); int number = (int) e.number(); // returns 0.0 if not numeric bool forward = ( ( number >= 0 ? 1 : -1 ) * ( direction >= 0 ? 1 : -1 ) > 0 ); - if ( !ineq ) { + if ( equalityOnly ) { if ( fr.equality() ) { for( BoundBuilders::const_iterator j = builders.begin(); j != builders.end(); ++j ) { j->first->appendAs( fr.min(), "" ); @@ -1033,9 +1035,8 @@ namespace mongo { } } else { - if ( !fr.inQuery() ) { - ineq = true; - } + equalityOnly = false; + BoundBuilders newBuilders; const vector<FieldInterval> &intervals = fr.intervals(); for( BoundBuilders::const_iterator i = builders.begin(); i != builders.end(); ++i ) { diff --git a/db/repl/connections.h b/db/repl/connections.h index 61c581b..3ada71c 100644 --- a/db/repl/connections.h +++ b/db/repl/connections.h @@ -72,7 +72,8 @@ namespace mongo { struct X { mongo::mutex z; DBClientConnection cc; - X() : z("X"), cc(/*reconnect*/ true, 0, /*timeout*/ 10.0) { + bool connected; + X() : z("X"), cc(/*reconnect*/ true, 0, /*timeout*/ 10.0), connected(false) { cc._logLevel = 2; } } *x; @@ -88,6 +89,7 @@ namespace mongo { log() << "couldn't connect to " << _hostport << ": " << err << rsLog; return false; } + x->connected = true; // if we cannot authenticate against a member, then either its key file // or our key file has to change. if our key file has to change, we'll @@ -113,11 +115,19 @@ namespace mongo { connLock.reset( new scoped_lock(x->z) ); } } - if( !first ) { - connLock.reset( new scoped_lock(x->z) ); + + // already locked connLock above + if (first) { + connect(); + return; + } + + connLock.reset( new scoped_lock(x->z) ); + if (x->connected) { return; } + // Keep trying to connect if we're not yet connected connect(); } diff --git a/db/repl/rs_config.cpp b/db/repl/rs_config.cpp index 13352b1..c451d46 100644 --- a/db/repl/rs_config.cpp +++ b/db/repl/rs_config.cpp @@ -296,6 +296,26 @@ namespace mongo { _ok = false; } + void ReplSetConfig::setMajority() { + int total = members.size(); + int nonArbiters = total; + int strictMajority = total/2+1; + + for (vector<MemberCfg>::iterator it = members.begin(); it < members.end(); it++) { + if ((*it).arbiterOnly) { + nonArbiters--; + } + } + + // majority should be all "normal" members if we have something like 4 + // arbiters & 3 normal members + _majority = (strictMajority > nonArbiters) ? nonArbiters : strictMajority; + } + + int ReplSetConfig::getMajority() const { + return _majority; + } + void ReplSetConfig::checkRsConfig() const { uassert(13132, "nonmatching repl set name in _id field; check --replSet command line", @@ -533,6 +553,9 @@ namespace mongo { try { getLastErrorDefaults = settings["getLastErrorDefaults"].Obj().copy(); } catch(...) { } } + + // figure out the majority for this config + setMajority(); } static inline void configAssert(bool expr) { diff --git a/db/repl/rs_config.h b/db/repl/rs_config.h index b22b61e..da6552a 100644 --- a/db/repl/rs_config.h +++ b/db/repl/rs_config.h @@ -135,9 +135,20 @@ namespace mongo { BSONObj asBson() const; + /** + * Getter and setter for _majority. This is almost always + * members.size()/2+1, but can be the number of non-arbiter members if + * there are more arbiters than non-arbiters (writing to 3 out of 7 + * servers is safe if 4 of the servers are arbiters). + */ + void setMajority(); + int getMajority() const; + bool _constructed; private: bool _ok; + int _majority; + void from(BSONObj); void clear(); diff --git a/db/repl/rs_rollback.cpp b/db/repl/rs_rollback.cpp index f012e65..97a910e 100644 --- a/db/repl/rs_rollback.cpp +++ b/db/repl/rs_rollback.cpp @@ -388,24 +388,18 @@ namespace mongo { for( set<string>::iterator i = h.collectionsToResync.begin(); i != h.collectionsToResync.end(); i++ ) { string ns = *i; sethbmsg(str::stream() << "rollback 4.1 coll resync " << ns); - Client::Context c(*i); - try { + + Client::Context c(ns); + { bob res; string errmsg; dropCollection(ns, errmsg, res); { dbtemprelease r; - bool ok = copyCollectionFromRemote(them->getServerAddress(), ns, bo(), errmsg, false, true, false); - if( !ok ) { - log() << "replSet rollback error resyncing collection " << ns << ' ' << errmsg << rsLog; - throw "rollback error resyncing rollection [1]"; - } + bool ok = copyCollectionFromRemote(them->getServerAddress(), ns, errmsg); + uassert(15909, str::stream() << "replSet rollback error resyncing collection " << ns << ' ' << errmsg, ok); } } - catch(...) { - log() << "replset rollback error resyncing collection " << ns << rsLog; - throw "rollback error resyncing rollection [2]"; - } } /* we did more reading from primary, so check it again for a rollback (which would mess us up), and @@ -423,7 +417,7 @@ namespace mongo { setMinValid(newMinValid); } } - catch(...) { + catch (DBException& e) { err = "can't get/set minvalid"; } if( h.rbid != getRBID(r.conn()) ) { diff --git a/db/repl/rs_sync.cpp b/db/repl/rs_sync.cpp index 8cd3e14..c86dbbb 100644 --- a/db/repl/rs_sync.cpp +++ b/db/repl/rs_sync.cpp @@ -161,7 +161,7 @@ namespace mongo { } catch (DBException& e) { // skip duplicate key exceptions - if( e.getCode() == 11000 || e.getCode() == 11001 ) { + if( e.getCode() == 11000 || e.getCode() == 11001 || e.getCode() == 12582) { continue; } diff --git a/db/repl_block.cpp b/db/repl_block.cpp index dcac121..840bbb2 100644 --- a/db/repl_block.cpp +++ b/db/repl_block.cpp @@ -175,7 +175,7 @@ namespace mongo { if (wStr == "majority") { // use the entire set, including arbiters, to prevent writing // to a majority of the set but not a majority of voters - return replicatedToNum(op, theReplSet->config().members.size()/2+1); + return replicatedToNum(op, theReplSet->config().getMajority()); } map<string,ReplSetConfig::TagRule*>::const_iterator it = theReplSet->config().rules.find(wStr); diff --git a/db/security.cpp b/db/security.cpp index ae14770..05165cb 100644 --- a/db/security.cpp +++ b/db/security.cpp @@ -83,6 +83,9 @@ namespace mongo { string systemUsers = dbname + ".system.users"; // OCCASIONALLY Helpers::ensureIndex(systemUsers.c_str(), userPattern, false, "user_1"); { + mongolock lk(false); + Client::Context c(systemUsers, dbpath, &lk, false); + BSONObjBuilder b; b << "user" << user; BSONObj query = b.done(); @@ -101,10 +104,10 @@ namespace mongo { AuthenticationInfo *ai = cc().getAuthenticationInfo(); if ( readOnly ) { - ai->authorizeReadOnly( cc().database()->name.c_str() , user ); + ai->authorizeReadOnly( dbname.c_str() , user ); } else { - ai->authorize( cc().database()->name.c_str() , user ); + ai->authorize( dbname.c_str() , user ); } } diff --git a/db/security_common.h b/db/security_common.h index 2f2565f..c9a3e3a 100644 --- a/db/security_common.h +++ b/db/security_common.h @@ -57,7 +57,7 @@ namespace mongo { virtual bool slaveOk() const { return true; } - virtual LockType locktype() const { return READ; } + virtual LockType locktype() const { return NONE; } virtual void help(stringstream& ss) const { ss << "internal"; } CmdAuthenticate() : Command("authenticate") {} bool run(const string& dbname , BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool fromRepl); diff --git a/dbtests/queryoptimizertests.cpp b/dbtests/queryoptimizertests.cpp index 38d631e..59a9874 100644 --- a/dbtests/queryoptimizertests.cpp +++ b/dbtests/queryoptimizertests.cpp @@ -1921,6 +1921,33 @@ namespace QueryOptimizerTests { } }; + /** Yield and remove document with $or query. */ + class YieldRemoveOr : public Base { + public: + void run() { + _cli.insert( ns(), BSON( "_id" << 1 ) ); + _cli.insert( ns(), BSON( "_id" << 2 ) ); + + { + dblock lk; + Client::Context ctx( ns() ); + setQueryOptimizerCursor( BSON( "$or" << BSON_ARRAY( BSON( "_id" << 1 ) << BSON( "_id" << 2 ) ) ) ); + ASSERT_EQUALS( 1, current().getIntField( "_id" ) ); + ASSERT( prepareToYield() ); + } + + _cli.remove( ns(), BSON( "_id" << 1 ) ); + + { + dblock lk; + Client::Context ctx( ns() ); + recoverFromYield(); + ASSERT( ok() ); + ASSERT_EQUALS( 2, current().getIntField( "_id" ) ); + } + } + }; + /** Yield and overwrite current in capped collection. */ class YieldCappedOverwrite : public Base { public: @@ -2074,6 +2101,68 @@ namespace QueryOptimizerTests { } }; + /** Yielding with delete, multiple plans active, and $or clause. */ + class YieldMultiplePlansDeleteOr : public Base { + public: + void run() { + _cli.insert( ns(), BSON( "_id" << 1 << "a" << 2 ) ); + _cli.insert( ns(), BSON( "_id" << 2 << "a" << 1 ) ); + _cli.ensureIndex( ns(), BSON( "a" << 1 ) ); + + { + dblock lk; + Client::Context ctx( ns() ); + setQueryOptimizerCursor( BSON( "$or" << BSON_ARRAY( BSON( "_id" << 1 << "a" << 2 ) << BSON( "_id" << 2 << "a" << 1 ) ) ) ); + ASSERT_EQUALS( 1, current().getIntField( "_id" ) ); + ASSERT( prepareToYield() ); + } + + _cli.remove( ns(), BSON( "_id" << 1 ) ); + + { + dblock lk; + Client::Context ctx( ns() ); + c()->recoverFromYield(); + ASSERT( ok() ); + ASSERT_EQUALS( 2, current().getIntField( "_id" ) ); + ASSERT( !advance() ); + ASSERT( !ok() ); + } + } + }; + + /** Yielding with delete, multiple plans active with advancement to the second, and $or clause. */ + class YieldMultiplePlansDeleteOrAdvance : public Base { + public: + void run() { + _cli.insert( ns(), BSON( "_id" << 1 << "a" << 2 ) ); + _cli.insert( ns(), BSON( "_id" << 2 << "a" << 1 ) ); + _cli.ensureIndex( ns(), BSON( "a" << 1 ) ); + + { + dblock lk; + Client::Context ctx( ns() ); + setQueryOptimizerCursor( BSON( "$or" << BSON_ARRAY( BSON( "_id" << 1 << "a" << 2 ) << BSON( "_id" << 2 << "a" << 1 ) ) ) ); + ASSERT_EQUALS( 1, current().getIntField( "_id" ) ); + ASSERT( prepareToYield() ); + c()->advance(); + ASSERT_EQUALS( 1, current().getIntField( "_id" ) ); + } + + _cli.remove( ns(), BSON( "_id" << 1 ) ); + + { + dblock lk; + Client::Context ctx( ns() ); + c()->recoverFromYield(); + ASSERT( ok() ); + ASSERT_EQUALS( 2, current().getIntField( "_id" ) ); + ASSERT( !advance() ); + ASSERT( !ok() ); + } + } + }; + /** Yielding with multiple plans and capped overwrite. */ class YieldMultiplePlansCappedOverwrite : public Base { public: @@ -2703,11 +2792,14 @@ namespace QueryOptimizerTests { add<QueryOptimizerCursorTests::YieldUpdate>(); add<QueryOptimizerCursorTests::YieldDrop>(); add<QueryOptimizerCursorTests::YieldDropOr>(); + add<QueryOptimizerCursorTests::YieldRemoveOr>(); add<QueryOptimizerCursorTests::YieldCappedOverwrite>(); add<QueryOptimizerCursorTests::YieldDropIndex>(); add<QueryOptimizerCursorTests::YieldMultiplePlansNoOp>(); add<QueryOptimizerCursorTests::YieldMultiplePlansAdvanceNoOp>(); add<QueryOptimizerCursorTests::YieldMultiplePlansDelete>(); + add<QueryOptimizerCursorTests::YieldMultiplePlansDeleteOr>(); + add<QueryOptimizerCursorTests::YieldMultiplePlansDeleteOrAdvance>(); add<QueryOptimizerCursorTests::YieldMultiplePlansCappedOverwrite>(); add<QueryOptimizerCursorTests::YieldMultiplePlansCappedOverwriteManual>(); add<QueryOptimizerCursorTests::YieldMultiplePlansCappedOverwriteManual2>(); diff --git a/dbtests/queryutiltests.cpp b/dbtests/queryutiltests.cpp index e825b4f..c3dd64d 100644 --- a/dbtests/queryutiltests.cpp +++ b/dbtests/queryutiltests.cpp @@ -238,7 +238,14 @@ namespace QueryUtilTests { } }; - class QueryPatternTest { + class QueryPatternBase { + protected: + static QueryPattern p( const BSONObj &query, const BSONObj &sort = BSONObj() ) { + return FieldRangeSet( "", query, true ).pattern( sort ); + } + }; + + class QueryPatternTest : public QueryPatternBase { public: void run() { ASSERT( p( BSON( "a" << 1 ) ) == p( BSON( "a" << 1 ) ) ); @@ -258,12 +265,17 @@ namespace QueryUtilTests { ASSERT( p( BSON( "a" << 1 ), BSON( "b" << 1 << "c" << 1 ) ) != p( BSON( "a" << 4 ), BSON( "b" << 1 ) ) ); ASSERT( p( BSON( "a" << 1 ), BSON( "b" << 1 ) ) != p( BSON( "a" << 4 ), BSON( "b" << 1 << "c" << 1 ) ) ); } - private: - static QueryPattern p( const BSONObj &query, const BSONObj &sort = BSONObj() ) { - return FieldRangeSet( "", query, true ).pattern( sort ); + }; + + class QueryPatternNeConstraint : public QueryPatternBase { + public: + void run() { + ASSERT( p( BSON( "a" << NE << 5 ) ) != p( BSON( "a" << GT << 1 ) ) ); + ASSERT( p( BSON( "a" << NE << 5 ) ) != p( BSONObj() ) ); + ASSERT( p( BSON( "a" << NE << 5 ) ) == p( BSON( "a" << NE << "a" ) ) ); } }; - + class NoWhere { public: void run() { @@ -902,6 +914,7 @@ namespace QueryUtilTests { add< FieldRangeTests::Equality >(); add< FieldRangeTests::SimplifiedQuery >(); add< FieldRangeTests::QueryPatternTest >(); + add< FieldRangeTests::QueryPatternNeConstraint >(); add< FieldRangeTests::NoWhere >(); add< FieldRangeTests::Numeric >(); add< FieldRangeTests::InLowerBound >(); diff --git a/doxygenConfig b/doxygenConfig index be8ef9e..1fbcce0 100644 --- a/doxygenConfig +++ b/doxygenConfig @@ -3,7 +3,7 @@ #--------------------------------------------------------------------------- DOXYFILE_ENCODING = UTF-8 PROJECT_NAME = MongoDB -PROJECT_NUMBER = 2.0.2 +PROJECT_NUMBER = 2.0.3 OUTPUT_DIRECTORY = docs/doxygen CREATE_SUBDIRS = NO OUTPUT_LANGUAGE = English diff --git a/jstests/auth/auth1.js b/jstests/auth/auth1.js index c837085..8639202 100644 --- a/jstests/auth/auth1.js +++ b/jstests/auth/auth1.js @@ -16,6 +16,14 @@ db.addUser( "eliot" , "eliot" ); db.addUser( "guest" , "guest", true ); db.getSisterDB( "admin" ).addUser( "super", "super" ); +print("make sure we can't run certain commands w/out auth"); +var errmsg = "need to login"; +res = db.adminCommand({getLog : "global"}); +printjson( res ); +assert( ! res.log || res.log.length == 0 , "getLog should fail: " + tojson( res ) ) +assert.eq( res.errmsg , "need to login" , tojson( res ) ); + + assert.throws( function() { t.findOne() }, [], "read without login" ); assert( db.auth( "eliot" , "eliot" ) , "auth failed" ); diff --git a/jstests/distinct3.js b/jstests/distinct3.js new file mode 100644 index 0000000..f945ec9 --- /dev/null +++ b/jstests/distinct3.js @@ -0,0 +1,27 @@ +// Yield and delete test case for query optimizer cursor. + +t = db.jstests_distinct3; +t.drop(); + +t.ensureIndex({a:1}); +t.ensureIndex({b:1}); + +for( i = 0; i < 50; ++i ) { + for( j = 0; j < 20; ++j ) { + t.save({a:i,c:i,d:j}); + } +} +for( i = 0; i < 1000; ++i ) { + t.save({b:i,c:i+50}); +} +db.getLastError(); + +// The idea here is to try and remove the last match for the {a:1} index scan while distinct is yielding. +p = startParallelShell( 'for( i = 0; i < 2500; ++i ) { db.jstests_distinct3.remove({a:49}); for( j = 0; j < 20; ++j ) { db.jstests_distinct3.save({a:49,c:49,d:j}) } }' ); + +for( i = 0; i < 100; ++i ) { + count = t.distinct( 'c', {$or:[{a:{$gte:0},d:0},{b:{$gte:0}}]} ).length; + assert.gt( count, 1000 ); +} + +p(); diff --git a/jstests/queryoptimizer3.js b/jstests/queryoptimizer3.js new file mode 100644 index 0000000..76bc5b6 --- /dev/null +++ b/jstests/queryoptimizer3.js @@ -0,0 +1,33 @@ +// Check cases where index scans are aborted due to the collection being dropped. + +t = db.jstests_queryoptimizer3; +t.drop(); + +p = startParallelShell( 'for( i = 0; i < 400; ++i ) { sleep( 50 ); db.jstests_queryoptimizer3.drop(); }' ); + +for( i = 0; i < 100; ++i ) { + t.drop(); + t.ensureIndex({a:1}); + t.ensureIndex({b:1}); + for( j = 0; j < 100; ++j ) { + t.save({a:j,b:j}); + } + m = i % 5; + if ( m == 0 ) { + t.count({a:{$gte:0},b:{$gte:0}}); + } + else if ( m == 1 ) { + t.find({a:{$gte:0},b:{$gte:0}}).itcount(); + } + else if ( m == 2 ) { + t.remove({a:{$gte:0},b:{$gte:0}}); + } + else if ( m == 3 ) { + t.update({a:{$gte:0},b:{$gte:0}},{}); + } + else if ( m == 4 ) { + t.distinct('x',{a:{$gte:0},b:{$gte:0}}); + } +} + +p(); diff --git a/jstests/queryoptimizer6.js b/jstests/queryoptimizer6.js new file mode 100644 index 0000000..fce92d7 --- /dev/null +++ b/jstests/queryoptimizer6.js @@ -0,0 +1,28 @@ +// Test that $ne constraints are accounted for in QueryPattern. SERVER-4665 + +t = db.jstests_queryoptimizer6; + +function reset() { + t.drop(); + t.save( {a:1} ); + t.ensureIndex( {b:1}, {sparse:true} ); +} + +reset(); +// The sparse index will be used, and recorded for this query pattern. +assert.eq( 0, t.find( {a:1,b:{$ne:1}} ).itcount() ); +// The query pattern should be different, and the sparse index should not be used. +assert.eq( 1, t.find( {a:1} ).itcount() ); + +reset(); +// The sparse index will be used, and (for better or worse) recorded for this query pattern. +assert.eq( 0, t.find( {a:1} ).min({b:1}).itcount() ); +// The sparse index should not be used, even though the query patterns match. +assert.eq( 1, t.find( {a:1} ).itcount() ); + +reset(); +t.ensureIndex( {a:1,b:1} ); +// The sparse index will be used, and (for better or worse) recorded for this query pattern. +assert.eq( 0, t.find( {a:1,b:null} ).min({b:1}).itcount() ); +// Descriptive test - the recorded {b:1} index is used, because it is not useless. +assert.eq( 0, t.find( {a:1,b:null} ).itcount() ); diff --git a/jstests/replsets/majority.js b/jstests/replsets/majority.js index 6df1a41..5bb3cde 100644 --- a/jstests/replsets/majority.js +++ b/jstests/replsets/majority.js @@ -1,4 +1,11 @@ -var num = 5; +var testInsert = function() { + master.getDB("foo").bar.insert({x:1}); + var result = master.getDB("foo").runCommand({getLastError:1, w:"majority", wtimeout:timeout}); + printjson(result); + return result; +}; + +var num = 7; var host = getHostName(); var name = "tags"; var timeout = 10000; @@ -6,28 +13,57 @@ var timeout = 10000; var replTest = new ReplSetTest( {name: name, nodes: num, startPort:31000} ); var nodes = replTest.startSet(); var port = replTest.ports; -replTest.initiate({_id : name, members : +var config = {_id : name, members : [ {_id:0, host : host+":"+port[0], priority : 2}, - {_id:1, host : host+":"+port[1]}, + {_id:1, host : host+":"+port[1], votes : 3}, {_id:2, host : host+":"+port[2]}, {_id:3, host : host+":"+port[3], arbiterOnly : true}, {_id:4, host : host+":"+port[4], arbiterOnly : true}, + {_id:5, host : host+":"+port[5], arbiterOnly : true}, + {_id:6, host : host+":"+port[6], arbiterOnly : true}, ], - }); + }; +replTest.initiate(config); replTest.awaitReplication(); -replTest.bridge(); - -var testInsert = function() { - master.getDB("foo").bar.insert({x:1}); - var result = master.getDB("foo").runCommand({getLastError:1, w:"majority", wtimeout:timeout}); - printjson(result); - return result; -}; var master = replTest.getMaster(); +print("try taking down 4 arbiters"); +replTest.stop(3); +replTest.stop(4); + +replTest.stop(6); +replTest.remove(6); +replTest.stop(5); +replTest.remove(5); + +print("should still be able to write to a majority"); +assert.eq(testInsert().err, null); + +print("start up some of the arbiters again"); +replTest.restart(3); +replTest.restart(4); + +print("remove 2 of the arbiters"); +config.version = 2; +config.members.pop(); +config.members.pop(); + +try { + master.getDB("admin").runCommand({replSetReconfig : config}); +} +catch (e) { + print("reconfig error: "+e); +} + +replTest.awaitReplication(); + +replTest.bridge(); + +master = replTest.getMaster(); + print("get back in the groove"); testInsert(); replTest.awaitReplication(); diff --git a/jstests/sharding/inTiming.js b/jstests/sharding/inTiming.js new file mode 100644 index 0000000..51387a6 --- /dev/null +++ b/jstests/sharding/inTiming.js @@ -0,0 +1,58 @@ +// Check that shard selection does not take a really long time on $in queries: SERVER-4745 + +s = new ShardingTest( 'sharding_inqueries', 3, 0, 1, {chunksize:1}); + +db = s.getDB( 'test' ); + +s.adminCommand( { enablesharding: 'test' } ); +s.adminCommand( { shardcollection: 'test.foo', key: { a:1, b:1 } } ); + +var lst = []; +for (var i = 0; i < 500; i++) { lst.push(i); } + +/* +* Time how long it takes to do $in querys on a sharded and unsharded collection. +* There is no data in either collection, so the query time is coming almost +* entirely from the code that selects which shard(s) to send the query to. +*/ +unshardedQuery = function() {db.bar.find({a:{$in:lst}, b:{$in:lst}}).itcount()}; +shardedQuery = function() {db.foo.find({a:{$in:lst}, b:{$in:lst}}).itcount()}; +// Run queries a few times to warm memory +for (var i = 0; i < 3; i++) { + unshardedQuery(); + shardedQuery(); +} + +unshardedTime = Date.timeFunc(unshardedQuery , 5); +shardedTime = Date.timeFunc(shardedQuery, 5); + +print("Unsharded $in query ran in " + unshardedTime); +print("Sharded $in query ran in " + shardedTime); +assert(unshardedTime * 10 > shardedTime, "Sharded query is more than 10 times as slow as unsharded query"); + +s.getDB('config').settings.update( { _id: "balancer" }, { $set : { stopped: true } } , true ); + +db.adminCommand({split : "test.foo", middle : { a:1, b:10}}); +db.adminCommand({split : "test.foo", middle : { a:3, b:0}}); + +db.adminCommand({moveChunk : "test.foo", find : {a:1, b:0}, to : "shard0000"}); +db.adminCommand({moveChunk : "test.foo", find : {a:1, b:15}, to : "shard0001"}); +db.adminCommand({moveChunk : "test.foo", find : {a:3, b:15}, to : "shard0002"}); + +// Now make sure we get the same results from sharded and unsharded query. + +for (var i = 0; i < 20; i++) { + db.foo.save({a:1, b:i}); + db.foo.save({a:2, b:i}); + db.foo.save({a:3, b:i}); + db.foo.save({a:4, b:i}); +} + +db.printShardingStatus(); + +assert.eq(6, db.foo.find({a : {$in : [1, 2]}, b : {$in : [0, 3, 5]}}).itcount()); +assert.eq(14, db.foo.find({a : {$in : [1, 2]}, b : {$in : [0, 3, 5, 10, 11, 15, 19]}}).itcount()); +assert.eq(28, db.foo.find({a : {$in : [1, 2, 3, 4]}, b : {$in : [0, 3, 5, 10, 11, 15, 19]}}).itcount()); +assert.eq(14, db.foo.find({a : {$in : [3, 4]}, b : {$in : [0, 3, 5, 10, 11, 15, 19]}}).itcount()); + +s.stop(); diff --git a/jstests/sharding/sharding_with_keyfile.js b/jstests/sharding/sharding_with_keyfile.js index 94aea57..bd8d038 100644 --- a/jstests/sharding/sharding_with_keyfile.js +++ b/jstests/sharding/sharding_with_keyfile.js @@ -1,9 +1,15 @@ // Tests sharding with a key file -var st = new ShardingTest({ name : jsTestName(), +myTestName = "sharding_with_keyfile" + +keyFile = "jstests/sharding/" + myTestName + ".key"; + +run( "chmod" , "600" , keyFile ); + +var st = new ShardingTest({ name : myTestName , shards : 2, mongos : 1, - keyFile : keyFile = "jstests/sharding/" + jsTestName() + ".key" }) + keyFile : keyFile }) // Make sure all our instances got the key var configs = st._configDB.split(",") diff --git a/jstests/sharding/sharding_with_keyfile.key b/jstests/sharding/sharding_with_keyfile.key index fe3344b..fe3344b 100755..100644 --- a/jstests/sharding/sharding_with_keyfile.key +++ b/jstests/sharding/sharding_with_keyfile.key diff --git a/jstests/slowNightly/replReads.js b/jstests/slowNightly/replReads.js index 4fe9130..dadc2c6 100644 --- a/jstests/slowNightly/replReads.js +++ b/jstests/slowNightly/replReads.js @@ -43,6 +43,8 @@ function testReadLoadBalancing(numReplicas) { assert.soon( function(){ return secondaries[i].getDB("test").foo.count() > 0; } ) secondaries[i].getDB('test').setProfilingLevel(2) } + // Primary may change with reconfig + primary.getDB('test').setProfilingLevel(2) for (var i = 0; i < secondaries.length * 10; i++) { conn = new Mongo(s._mongos[0].host) @@ -75,13 +77,21 @@ function testReadLoadBalancing(numReplicas) { var x = rsStats(); printjson(x); var numOk = 0; + // Now wait until the host disappears, since now we actually update our + // replica sets via isMaster in mongos + if( x.hosts.length == rs.conf()["members"].length - 1 ) return true + /* for ( var i=0; i<x.hosts.length; i++ ) if ( x.hosts[i].hidden ) return true; + */ return false; } , "one slave not ok" , 180000 , 5000 ); - + + // Secondaries may change here + secondaries = s._rs[0].test.liveNodes.slaves + for (var i = 0; i < secondaries.length * 10; i++) { conn = new Mongo(s._mongos[0].host) conn.setSlaveOk() @@ -95,7 +105,7 @@ function testReadLoadBalancing(numReplicas) { } counts = counts.sort(); - assert.eq( 20 , counts[1] - counts[0] , "counts wrong: " + tojson( counts ) ); + assert.eq( 20 , Math.abs( counts[1] - counts[0] ), "counts wrong: " + tojson( counts ) ); s.stop() } diff --git a/rpm/init.d-mongod b/rpm/init.d-mongod index 423eed5..62b45a5 100644 --- a/rpm/init.d-mongod +++ b/rpm/init.d-mongod @@ -42,7 +42,7 @@ start() stop() { echo -n $"Stopping mongod: " - killproc -p "$DBPATH"/mongod.lock -t30 -TERM /usr/bin/mongod + killproc -p "$DBPATH"/mongod.lock -d 300 /usr/bin/mongod RETVAL=$? echo [ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/mongod diff --git a/rpm/mongo.spec b/rpm/mongo.spec index 38e2bb6..995310b 100644 --- a/rpm/mongo.spec +++ b/rpm/mongo.spec @@ -1,5 +1,5 @@ Name: mongo -Version: 2.0.2 +Version: 2.0.3 Release: mongodb_1%{?dist} Summary: mongo client shell and tools License: AGPL 3.0 diff --git a/s/client.cpp b/s/client.cpp index 9058b31..493a8fb 100644 --- a/s/client.cpp +++ b/s/client.cpp @@ -18,7 +18,7 @@ #include "pch.h" #include "server.h" - +#include "../util/scopeguard.h" #include "../db/commands.h" #include "../db/dbmessage.h" #include "../db/stats/counters.h" @@ -140,28 +140,31 @@ namespace mongo { // handle single server if ( shards->size() == 1 ) { string theShard = *(shards->begin() ); - - ShardConnection conn( theShard , "" ); + + BSONObj res; bool ok = false; - try{ - ok = conn->runCommand( "admin" , options , res ); - } - catch( std::exception &e ){ - - warning() << "could not get last error from shard " << theShard << causedBy( e ) << endl; - - // Catch everything that happens here, since we need to ensure we return our connection when we're - // finished. - conn.done(); + { + ShardConnection conn( theShard , "" ); + try { + ok = conn->runCommand( "admin" , options , res ); + } + catch( std::exception &e ) { - return false; - } + warning() << "could not get last error from shard " << theShard << causedBy( e ) << endl; + + // Catch everything that happens here, since we need to ensure we return our connection when we're + // finished. + conn.done(); + + return false; + } - res = res.getOwned(); - conn.done(); + res = res.getOwned(); + conn.done(); + } _addWriteBack( writebacks , res ); @@ -171,16 +174,16 @@ namespace mongo { if ( temp == theShard ) continue; - ShardConnection conn( temp , "" ); - try { + ShardConnection conn( temp , "" ); + ON_BLOCK_EXIT_OBJ( conn, &ShardConnection::done ); _addWriteBack( writebacks , conn->getLastErrorDetailed() ); + } catch( std::exception &e ){ warning() << "could not clear last error from shard " << temp << causedBy( e ) << endl; } - - conn.done(); + } clearSinceLastGetError(); @@ -224,11 +227,12 @@ namespace mongo { for ( set<string>::iterator i = shards->begin(); i != shards->end(); i++ ) { string theShard = *i; bbb.append( theShard ); - ShardConnection conn( theShard , "" ); + boost::scoped_ptr<ShardConnection> conn; BSONObj res; bool ok = false; try { - ok = conn->runCommand( "admin" , options , res ); + conn.reset( new ShardConnection( theShard , "" ) ); // constructor can throw if shard is down + ok = (*conn)->runCommand( "admin" , options , res ); shardRawGLE.append( theShard , res ); } catch( std::exception &e ){ @@ -237,7 +241,7 @@ namespace mongo { // responses. warning() << "could not get last error from a shard " << theShard << causedBy( e ) << endl; - conn.done(); + conn->done(); return false; } @@ -245,7 +249,7 @@ namespace mongo { _addWriteBack( writebacks, res ); string temp = DBClientWithCommands::getLastErrorString( res ); - if ( conn->type() != ConnectionString::SYNC && ( ok == false || temp.size() ) ) { + if ( (*conn)->type() != ConnectionString::SYNC && ( ok == false || temp.size() ) ) { errors.push_back( temp ); errorObjects.push_back( res ); } @@ -258,7 +262,7 @@ namespace mongo { updatedExistingStat = -1; } - conn.done(); + conn->done(); } bbb.done(); diff --git a/s/commands_public.cpp b/s/commands_public.cpp index 23dd7fe..c8914ea 100644 --- a/s/commands_public.cpp +++ b/s/commands_public.cpp @@ -83,7 +83,7 @@ namespace mongo { bool ok = conn->runCommand( db , cmdObj , res , passOptions() ? options : 0 ); if ( ! ok && res["code"].numberInt() == StaleConfigInContextCode ) { conn.done(); - throw StaleConfigException("foo","command failed because of stale config"); + throw StaleConfigException( res["ns"].toString(), "command failed because of stale config" ); } result.appendElements( res ); conn.done(); diff --git a/s/shard_version.cpp b/s/shard_version.cpp index 9c55019..3fdd243 100644 --- a/s/shard_version.cpp +++ b/s/shard_version.cpp @@ -62,6 +62,7 @@ namespace mongo { } void setInitialized( DBClientBase * conn ){ + // At this point, conn may be deleted, *do not access* scoped_lock lk( _mutex ); _init.insert( conn ); } @@ -145,7 +146,10 @@ namespace mongo { LOG(2) << "initial sharding settings : " << cmd << endl; bool ok = conn->runCommand( "admin" , cmd , result ); + + // Conn may be deleted here - *do not access again* - css is an exception, since just uses ptr address connectionShardStatus.setInitialized( conn ); + conn = NULL; // HACK for backwards compatibility with v1.8.x, v2.0.0 and v2.0.1 // Result is false, but will still initialize serverID and configdb @@ -221,6 +225,8 @@ namespace mongo { << endl; BSONObj result; + // Save the server address, cannot access if fails + string serverAddress = conn->getServerAddress(); if ( setShardVersion( *conn , ns , version , authoritative , result ) ) { // success! LOG(1) << " setShardVersion success: " << result << endl; @@ -228,13 +234,16 @@ namespace mongo { return true; } + // At this point, it is no longer safe to use the pointer to conn, we do not know its state + conn = NULL; + LOG(1) << " setShardVersion failed!\n" << result << endl; if ( result["need_authoritative"].trueValue() ) massert( 10428 , "need_authoritative set but in authoritative mode already" , ! authoritative ); if ( ! authoritative ) { - checkShardVersion( *conn , ns , 1 , tryNumber + 1 ); + checkShardVersion( conn_in , ns , 1 , tryNumber + 1 ); return true; } @@ -252,13 +261,13 @@ namespace mongo { const int maxNumTries = 7; if ( tryNumber < maxNumTries ) { LOG( tryNumber < ( maxNumTries / 2 ) ? 1 : 0 ) - << "going to retry checkShardVersion host: " << conn->getServerAddress() << " " << result << endl; + << "going to retry checkShardVersion host: " << serverAddress << " " << result << endl; sleepmillis( 10 * tryNumber ); - checkShardVersion( *conn , ns , true , tryNumber + 1 ); + checkShardVersion( conn_in , ns , true , tryNumber + 1 ); return true; } - string errmsg = str::stream() << "setShardVersion failed host: " << conn->getServerAddress() << " " << result; + string errmsg = str::stream() << "setShardVersion failed host: " << serverAddress << " " << result; log() << " " << errmsg << endl; massert( 10429 , errmsg , 0 ); return true; diff --git a/s/strategy_single.cpp b/s/strategy_single.cpp index fc206e5..a91ac2c 100644 --- a/s/strategy_single.cpp +++ b/s/strategy_single.cpp @@ -21,6 +21,7 @@ #include "cursors.h" #include "../client/connpool.h" #include "../db/commands.h" +#include "grid.h" namespace mongo { @@ -68,7 +69,15 @@ namespace mongo { throw e; loops--; - log() << "retrying command: " << q.query << endl; + log() << "retrying command: " << q.query << " (" << loops << " attempts remain)" << endl; + if( loops < 4 ){ + // In newer versions, can just use forceRemoteCheckShardVersion + DBConfigPtr conf = grid.getDBConfig( e.getns() ); + if ( conf ){ + conf->reload(); + conf->getChunkManagerIfExists( e.getns(), true, true ); + } + } ShardConnection::checkMyConnectionVersions( e.getns() ); } catch ( AssertionException& e ) { @@ -192,7 +201,7 @@ namespace mongo { for ( unsigned i=0; i<shards.size(); i++ ) { Shard shard = shards[i]; ScopedDbConnection conn( shard ); - BSONObj temp = conn->findOne( r.getns() , BSONObj() ); + BSONObj temp = conn->findOne( r.getns() , q.query ); if ( temp["inprog"].isABSONObj() ) { BSONObjIterator i( temp["inprog"].Obj() ); while ( i.more() ) { diff --git a/third_party/snappy.py b/third_party/snappy.py index e53ee63..e988110 100644 --- a/third_party/snappy.py +++ b/third_party/snappy.py @@ -11,4 +11,4 @@ def configure( env , fileLists , options ): fileLists["serverOnlyFiles"] += [ myenv.Object(f) for f in files ] def configureSystem( env , fileLists , options ): - configure( env , fileLists , options ) + env.Append( LIBS=[ "snappy" ] ) diff --git a/tools/dump.cpp b/tools/dump.cpp index a1690b2..971575d 100644 --- a/tools/dump.cpp +++ b/tools/dump.cpp @@ -37,7 +37,7 @@ class Dump : public Tool { FILE* _f; }; public: - Dump() : Tool( "dump" , ALL , "*" , "*" , false ) { + Dump() : Tool( "dump" , ALL , "*" , "*" , true ) { add_options() ("out,o", po::value<string>()->default_value("dump"), "output directory or \"-\" for stdout") ("query,q", po::value<string>() , "json query" ) @@ -47,6 +47,19 @@ public: ; } + virtual void preSetup() { + string out = getParam("out"); + if ( out == "-" ) { + // write output to standard error to avoid mangling output + // must happen early to avoid sending junk to stdout + useStandardOutput(false); + } + } + + virtual void printExtraHelp(ostream& out) { + out << "Export MongoDB data to BSON files.\n" << endl; + } + // This is a functor that writes a BSONObj to a file struct Writer { Writer(FILE* out, ProgressMeter* m) :_out(out), _m(m) {} diff --git a/tools/export.cpp b/tools/export.cpp index c3a5420..0d9f022 100644 --- a/tools/export.cpp +++ b/tools/export.cpp @@ -45,6 +45,19 @@ public: _usesstdout = false; } + virtual void preSetup() { + string out = getParam("out"); + if ( out == "-" ) { + // write output to standard error to avoid mangling output + // must happen early to avoid sending junk to stdout + useStandardOutput(false); + } + } + + virtual void printExtraHelp( ostream & out ) { + out << "Export MongoDB data to CSV, TSV or JSON files.\n" << endl; + } + // Turn every double quote character into two double quote characters // If hasSurroundingQuotes is true, doesn't escape the first and last // characters of the string, if it's false, add a double quote character diff --git a/tools/restore.cpp b/tools/restore.cpp index c08c14f..ec0e1d3 100644 --- a/tools/restore.cpp +++ b/tools/restore.cpp @@ -73,6 +73,11 @@ public: _drop = hasParam( "drop" ); _keepIndexVersion = hasParam("keepIndexVersion"); + if (isMongos() && _db == "" && exists(root / "config")) { + cout << "Cannot do a full restore on a sharded system" << endl; + return -1; + } + bool doOplog = hasParam( "oplogReplay" ); if (doOplog) { // fail early if errors diff --git a/tools/stat.cpp b/tools/stat.cpp index e1eda8d..8b1b5aa 100644 --- a/tools/stat.cpp +++ b/tools/stat.cpp @@ -365,11 +365,6 @@ namespace mongo { } int run() { - if ( !(_username.empty() || _password.empty()) && isMongos()) { - cout << "You cannot use mongostat on a mongos running with authentication enabled" << endl; - return -1; - } - _sleep = getParam( "sleep" , _sleep ); _all = hasParam( "all" ); if ( _many ) @@ -480,6 +475,9 @@ namespace mongo { string error; bool mongos; + + string username; + string password; }; static void serverThread( shared_ptr<ServerState> state ) { @@ -489,8 +487,9 @@ namespace mongo { string errmsg; if ( ! conn.connect( state->host , errmsg ) ) state->error = errmsg; - long long cycleNumber = 0; + + conn.auth("admin", state->username, state->password, errmsg); while ( ++cycleNumber ) { try { @@ -549,6 +548,9 @@ namespace mongo { state.reset( new ServerState() ); state->host = host; state->thr.reset( new boost::thread( boost::bind( serverThread , state ) ) ); + state->username = _username; + state->password = _password; + return true; } @@ -605,7 +607,7 @@ namespace mongo { int runMany() { StateMap threads; - + { string orig = getParam( "host" ); if ( orig == "" ) diff --git a/tools/tool.h b/tools/tool.h index e6694f3..e401093 100644 --- a/tools/tool.h +++ b/tools/tool.h @@ -82,9 +82,13 @@ namespace mongo { return _db + "." + _coll; } + void useStandardOutput( bool mode ) { + _usesstdout = mode; + } + bool isMaster(); bool isMongos(); - + virtual void preSetup() {} virtual int run() = 0; diff --git a/util/log.cpp b/util/log.cpp index bc48584..0dc75ed 100644 --- a/util/log.cpp +++ b/util/log.cpp @@ -25,12 +25,16 @@ using namespace std; #ifdef _WIN32 # include <io.h> +# include <fcntl.h> #else # include <cxxabi.h> # include <sys/file.h> #endif -//#include "../db/jsobj.h" +#ifdef _WIN32 +# define dup2 _dup2 // Microsoft headers use ISO C names +# define fileno _fileno +#endif namespace mongo { @@ -85,55 +89,72 @@ namespace mongo { } if ( _file ) { -#ifdef _WIN32 - cout << "log rotation net yet supported on windows" << endl; - return; -#else - struct tm t; - localtime_r( &_opened , &t ); +#ifdef POSIX_FADV_DONTNEED + posix_fadvise(fileno(_file), 0, 0, POSIX_FADV_DONTNEED); +#endif + + // Rename the (open) existing log file to a timestamped name stringstream ss; - ss << _path << "." << terseCurrentTime(false); + ss << _path << "." << terseCurrentTime( false ); string s = ss.str(); rename( _path.c_str() , s.c_str() ); -#endif } - - FILE* tmp = freopen(_path.c_str(), (_append ? "a" : "w"), stdout); - if (!tmp) { + FILE* tmp = 0; // The new file using the original logpath name + +#if _WIN32 + // We rename an open log file (above, on next rotation) and the trick to getting Windows to do that is + // to open the file with FILE_SHARE_DELETE. So, we can't use the freopen() call that non-Windows + // versions use because it would open the file without the FILE_SHARE_DELETE flag we need. + // + HANDLE newFileHandle = CreateFileA( + _path.c_str(), + GENERIC_WRITE, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, + NULL, + OPEN_ALWAYS, + FILE_ATTRIBUTE_NORMAL, + NULL + ); + if ( INVALID_HANDLE_VALUE != newFileHandle ) { + int newFileDescriptor = _open_osfhandle( reinterpret_cast<intptr_t>(newFileHandle), _O_APPEND ); + tmp = _fdopen( newFileDescriptor, _append ? "a" : "w" ); + } +#else + tmp = freopen(_path.c_str(), _append ? "a" : "w", stdout); +#endif + if ( !tmp ) { cerr << "can't open: " << _path.c_str() << " for log file" << endl; dbexit( EXIT_BADOPTIONS ); - assert(0); + assert( 0 ); } -#ifdef _WIN32 // windows has these functions it just gives them a funny name -# define dup2 _dup2 -# define fileno _fileno -#endif - // redirect stderr to log file - dup2(fileno(tmp), 2); + // redirect stdout and stderr to log file + dup2( fileno( tmp ), 1 ); // stdout + dup2( fileno( tmp ), 2 ); // stderr Logstream::setLogFile(tmp); // after this point no thread will be using old file +#if _WIN32 + if ( _file ) + fclose( _file ); // In Windows, we still have the old file open, close it now +#endif + #if 0 // enable to test redirection cout << "written to cout" << endl; cerr << "written to cerr" << endl; log() << "written to log()" << endl; #endif - _file = tmp; - _opened = time(0); + _file = tmp; // Save new file for next rotation } private: - bool _enabled; string _path; bool _append; - FILE * _file; - time_t _opened; } loggingManager; @@ -507,6 +507,12 @@ namespace mongo { int x = errno; cout << "Failed to write to logfile: " << errnoWithDescription(x) << ": " << out << endl; } + +#ifdef POSIX_FADV_DONTNEED + // This only applies to pages that have already been flushed + RARELY posix_fadvise(fileno(logfile), 0, 0, POSIX_FADV_DONTNEED); +#endif + } _init(); } diff --git a/util/net/sock.cpp b/util/net/sock.cpp index 69c42f2..ac565c3 100644 --- a/util/net/sock.cpp +++ b/util/net/sock.cpp @@ -342,6 +342,67 @@ namespace mongo { // ------------ SSLManager ----------------- #ifdef MONGO_SSL + static unsigned long _ssl_id_callback(); + static void _ssl_locking_callback(int mode, int type, const char *file, int line); + + class SSLThreadInfo { + public: + + SSLThreadInfo() { + _id = ++_next; + CRYPTO_set_id_callback(_ssl_id_callback); + CRYPTO_set_locking_callback(_ssl_locking_callback); + } + + ~SSLThreadInfo() { + CRYPTO_set_id_callback(0); + } + + unsigned long id() const { return _id; } + + void lock_callback( int mode, int type, const char *file, int line ) { + if ( mode & CRYPTO_LOCK ) { + _mutex[type]->lock(); + } + else { + _mutex[type]->unlock(); + } + } + + static void init() { + while ( (int)_mutex.size() < CRYPTO_num_locks() ) + _mutex.push_back( new SimpleMutex("SSLThreadInfo") ); + } + + static SSLThreadInfo* get() { + SSLThreadInfo* me = _thread.get(); + if ( ! me ) { + me = new SSLThreadInfo(); + _thread.reset( me ); + } + return me; + } + + private: + unsigned _id; + + static AtomicUInt _next; + static vector<SimpleMutex*> _mutex; + static boost::thread_specific_ptr<SSLThreadInfo> _thread; + }; + + static unsigned long _ssl_id_callback() { + return SSLThreadInfo::get()->id(); + } + static void _ssl_locking_callback(int mode, int type, const char *file, int line) { + SSLThreadInfo::get()->lock_callback( mode , type , file , line ); + } + + AtomicUInt SSLThreadInfo::_next; + vector<SimpleMutex*> SSLThreadInfo::_mutex; + boost::thread_specific_ptr<SSLThreadInfo> SSLThreadInfo::_thread; + + SSLManager::SSLManager( bool client ) { _client = client; SSL_library_init(); @@ -352,6 +413,8 @@ namespace mongo { massert( 15864 , mongoutils::str::stream() << "can't create SSL Context: " << ERR_error_string(ERR_get_error(), NULL) , _context ); SSL_CTX_set_options( _context, SSL_OP_ALL); + SSLThreadInfo::init(); + SSLThreadInfo::get(); } void SSLManager::setupPubPriv( const string& privateKeyFile , const string& publicKeyFile ) { @@ -387,6 +450,7 @@ namespace mongo { } SSL * SSLManager::secure( int fd ) { + SSLThreadInfo::get(); SSL * ssl = SSL_new( _context ); massert( 15861 , "can't create SSL" , ssl ); SSL_set_fd( ssl , fd ); @@ -415,13 +479,18 @@ namespace mongo { _bytesOut = 0; _bytesIn = 0; #ifdef MONGO_SSL + _ssl = 0; _sslAccepted = 0; #endif } void Socket::close() { #ifdef MONGO_SSL - _ssl.reset(); + if ( _ssl ) { + SSL_shutdown( _ssl ); + SSL_free( _ssl ); + _ssl = 0; + } #endif if ( _fd >= 0 ) { closesocket( _fd ); @@ -433,8 +502,8 @@ namespace mongo { void Socket::secure( SSLManager * ssl ) { assert( ssl ); assert( _fd >= 0 ); - _ssl.reset( ssl->secure( _fd ) ); - SSL_connect( _ssl.get() ); + _ssl = ssl->secure( _fd ); + SSL_connect( _ssl ); } void Socket::secureAccepted( SSLManager * ssl ) { @@ -446,8 +515,8 @@ namespace mongo { #ifdef MONGO_SSL if ( _sslAccepted ) { assert( _fd ); - _ssl.reset( _sslAccepted->secure( _fd ) ); - SSL_accept( _ssl.get() ); + _ssl = _sslAccepted->secure( _fd ); + SSL_accept( _ssl ); _sslAccepted = 0; } #endif @@ -510,7 +579,7 @@ namespace mongo { int Socket::_send( const char * data , int len ) { #ifdef MONGO_SSL if ( _ssl ) { - return SSL_write( _ssl.get() , data , len ); + return SSL_write( _ssl , data , len ); } #endif return ::send( _fd , data , len , portSendFlags ); @@ -524,7 +593,7 @@ namespace mongo { #ifdef MONGO_SSL if ( _ssl ) { - log() << "SSL Error ret: " << ret << " err: " << SSL_get_error( _ssl.get() , ret ) + log() << "SSL Error ret: " << ret << " err: " << SSL_get_error( _ssl , ret ) << " " << ERR_error_string(ERR_get_error(), NULL) << endl; } @@ -679,7 +748,7 @@ namespace mongo { int Socket::_recv( char *buf, int max ) { #ifdef MONGO_SSL if ( _ssl ){ - return SSL_read( _ssl.get() , buf , max ); + return SSL_read( _ssl , buf , max ); } #endif return ::recv( _fd , buf , max , portRecvFlags ); diff --git a/util/net/sock.h b/util/net/sock.h index 1cd5133..f91c288 100644 --- a/util/net/sock.h +++ b/util/net/sock.h @@ -243,7 +243,7 @@ namespace mongo { long long _bytesOut; #ifdef MONGO_SSL - shared_ptr<SSL> _ssl; + SSL* _ssl; SSLManager * _sslAccepted; #endif diff --git a/util/scopeguard.h b/util/scopeguard.h new file mode 100644 index 0000000..8ced04f --- /dev/null +++ b/util/scopeguard.h @@ -0,0 +1,432 @@ +//////////////////////////////////////////////////////////////////////////////// +// The Loki Library +// Copyright (c) 2000 Andrei Alexandrescu +// Copyright (c) 2000 Petru Marginean +// Copyright (c) 2005 Joshua Lehrer +// +// Permission to use, copy, modify, distribute and sell this software for any +// purpose is hereby granted without fee, provided that the above copyright +// notice appear in all copies and that both that copyright notice and this +// permission notice appear in supporting documentation. +// The author makes no representations about the +// suitability of this software for any purpose. It is provided "as is" +// without express or implied warranty. +//////////////////////////////////////////////////////////////////////////////// +#ifndef LOKI_SCOPEGUARD_H_ +#define LOKI_SCOPEGUARD_H_ + +namespace mongo +{ + + //////////////////////////////////////////////////////////////////////////////// + /// \class RefToValue + /// + /// Transports a reference as a value + /// Serves to implement the Colvin/Gibbons trick for SmartPtr/ScopeGuard + //////////////////////////////////////////////////////////////////////////////// + + template <class T> + class RefToValue + { + public: + + RefToValue(T& ref) : ref_(ref) + {} + + RefToValue(const RefToValue& rhs) : ref_(rhs.ref_) + {} + + operator T& () const + { + return ref_; + } + + private: + // Disable - not implemented + RefToValue(); + RefToValue& operator=(const RefToValue&); + + T& ref_; + }; + + + //////////////////////////////////////////////////////////////////////////////// + /// RefToValue creator. + //////////////////////////////////////////////////////////////////////////////// + + template <class T> + inline RefToValue<T> ByRef(T& t) + { + return RefToValue<T>(t); + } + + + + + //////////////////////////////////////////// + /// ScopeGuard + /* + Trivial example for use: + + FILE* f = fopen("myfile.txt", "w+"); + if (!f) + return error; + ON_BLOCK_EXIT(fclose, f); + + + More complicated example: + + ScopeGuard guard = MakeGuard(my_rollback_func, myparam); + ... + if (successful) { + guard.Dismiss(); + return; + } + // guard is still active here and will fire at scope exit + ... + + + */ + + + class ScopeGuardImplBase + { + ScopeGuardImplBase& operator =(const ScopeGuardImplBase&); + + protected: + + ~ScopeGuardImplBase() + {} + + ScopeGuardImplBase(const ScopeGuardImplBase& other) throw() + : dismissed_(other.dismissed_) + { + other.Dismiss(); + } + + template <typename J> + static void SafeExecute(J& j) throw() + { + if (!j.dismissed_) + try + { + j.Execute(); + } + catch(...) + {} + } + + mutable bool dismissed_; + + public: + ScopeGuardImplBase() throw() : dismissed_(false) + {} + + void Dismiss() const throw() + { + dismissed_ = true; + } + }; + + //////////////////////////////////////////////////////////////// + /// + /// \typedef typedef const ScopeGuardImplBase& ScopeGuard + /// + /// See Andrei's and Petru Marginean's CUJ article + /// http://www.cuj.com/documents/s=8000/cujcexp1812alexandr/alexandr.htm + /// + /// Changes to the original code by Joshua Lehrer: + /// http://www.lehrerfamily.com/scopeguard.html + //////////////////////////////////////////////////////////////// + + typedef const ScopeGuardImplBase& ScopeGuard; + + template <typename F> + class ScopeGuardImpl0 : public ScopeGuardImplBase + { + public: + static ScopeGuardImpl0<F> MakeGuard(F fun) + { + return ScopeGuardImpl0<F>(fun); + } + + ~ScopeGuardImpl0() throw() + { + SafeExecute(*this); + } + + void Execute() + { + fun_(); + } + + protected: + ScopeGuardImpl0(F fun) : fun_(fun) + {} + + F fun_; + }; + + template <typename F> + inline ScopeGuardImpl0<F> MakeGuard(F fun) + { + return ScopeGuardImpl0<F>::MakeGuard(fun); + } + + template <typename F, typename P1> + class ScopeGuardImpl1 : public ScopeGuardImplBase + { + public: + static ScopeGuardImpl1<F, P1> MakeGuard(F fun, P1 p1) + { + return ScopeGuardImpl1<F, P1>(fun, p1); + } + + ~ScopeGuardImpl1() throw() + { + SafeExecute(*this); + } + + void Execute() + { + fun_(p1_); + } + + protected: + ScopeGuardImpl1(F fun, P1 p1) : fun_(fun), p1_(p1) + {} + + F fun_; + const P1 p1_; + }; + + template <typename F, typename P1> + inline ScopeGuardImpl1<F, P1> MakeGuard(F fun, P1 p1) + { + return ScopeGuardImpl1<F, P1>::MakeGuard(fun, p1); + } + + template <typename F, typename P1, typename P2> + class ScopeGuardImpl2: public ScopeGuardImplBase + { + public: + static ScopeGuardImpl2<F, P1, P2> MakeGuard(F fun, P1 p1, P2 p2) + { + return ScopeGuardImpl2<F, P1, P2>(fun, p1, p2); + } + + ~ScopeGuardImpl2() throw() + { + SafeExecute(*this); + } + + void Execute() + { + fun_(p1_, p2_); + } + + protected: + ScopeGuardImpl2(F fun, P1 p1, P2 p2) : fun_(fun), p1_(p1), p2_(p2) + {} + + F fun_; + const P1 p1_; + const P2 p2_; + }; + + template <typename F, typename P1, typename P2> + inline ScopeGuardImpl2<F, P1, P2> MakeGuard(F fun, P1 p1, P2 p2) + { + return ScopeGuardImpl2<F, P1, P2>::MakeGuard(fun, p1, p2); + } + + template <typename F, typename P1, typename P2, typename P3> + class ScopeGuardImpl3 : public ScopeGuardImplBase + { + public: + static ScopeGuardImpl3<F, P1, P2, P3> MakeGuard(F fun, P1 p1, P2 p2, P3 p3) + { + return ScopeGuardImpl3<F, P1, P2, P3>(fun, p1, p2, p3); + } + + ~ScopeGuardImpl3() throw() + { + SafeExecute(*this); + } + + void Execute() + { + fun_(p1_, p2_, p3_); + } + + protected: + ScopeGuardImpl3(F fun, P1 p1, P2 p2, P3 p3) : fun_(fun), p1_(p1), p2_(p2), p3_(p3) + {} + + F fun_; + const P1 p1_; + const P2 p2_; + const P3 p3_; + }; + + template <typename F, typename P1, typename P2, typename P3> + inline ScopeGuardImpl3<F, P1, P2, P3> MakeGuard(F fun, P1 p1, P2 p2, P3 p3) + { + return ScopeGuardImpl3<F, P1, P2, P3>::MakeGuard(fun, p1, p2, p3); + } + + //************************************************************ + + template <class Obj, typename MemFun> + class ObjScopeGuardImpl0 : public ScopeGuardImplBase + { + public: + static ObjScopeGuardImpl0<Obj, MemFun> MakeObjGuard(Obj& obj, MemFun memFun) + { + return ObjScopeGuardImpl0<Obj, MemFun>(obj, memFun); + } + + ~ObjScopeGuardImpl0() throw() + { + SafeExecute(*this); + } + + void Execute() + { + (obj_.*memFun_)(); + } + + protected: + ObjScopeGuardImpl0(Obj& obj, MemFun memFun) : obj_(obj), memFun_(memFun) + {} + + Obj& obj_; + MemFun memFun_; + }; + + template <class Obj, typename MemFun> + inline ObjScopeGuardImpl0<Obj, MemFun> MakeObjGuard(Obj& obj, MemFun memFun) + { + return ObjScopeGuardImpl0<Obj, MemFun>::MakeObjGuard(obj, memFun); + } + + template <typename Ret, class Obj1, class Obj2> + inline ObjScopeGuardImpl0<Obj1,Ret(Obj2::*)()> MakeGuard(Ret(Obj2::*memFun)(), Obj1 &obj) + { + return ObjScopeGuardImpl0<Obj1,Ret(Obj2::*)()>::MakeObjGuard(obj,memFun); + } + + template <typename Ret, class Obj1, class Obj2> + inline ObjScopeGuardImpl0<Obj1,Ret(Obj2::*)()> MakeGuard(Ret(Obj2::*memFun)(), Obj1 *obj) + { + return ObjScopeGuardImpl0<Obj1,Ret(Obj2::*)()>::MakeObjGuard(*obj,memFun); + } + + template <class Obj, typename MemFun, typename P1> + class ObjScopeGuardImpl1 : public ScopeGuardImplBase + { + public: + static ObjScopeGuardImpl1<Obj, MemFun, P1> MakeObjGuard(Obj& obj, MemFun memFun, P1 p1) + { + return ObjScopeGuardImpl1<Obj, MemFun, P1>(obj, memFun, p1); + } + + ~ObjScopeGuardImpl1() throw() + { + SafeExecute(*this); + } + + void Execute() + { + (obj_.*memFun_)(p1_); + } + + protected: + ObjScopeGuardImpl1(Obj& obj, MemFun memFun, P1 p1) : obj_(obj), memFun_(memFun), p1_(p1) + {} + + Obj& obj_; + MemFun memFun_; + const P1 p1_; + }; + + template <class Obj, typename MemFun, typename P1> + inline ObjScopeGuardImpl1<Obj, MemFun, P1> MakeObjGuard(Obj& obj, MemFun memFun, P1 p1) + { + return ObjScopeGuardImpl1<Obj, MemFun, P1>::MakeObjGuard(obj, memFun, p1); + } + + template <typename Ret, class Obj1, class Obj2, typename P1a, typename P1b> + inline ObjScopeGuardImpl1<Obj1,Ret(Obj2::*)(P1a),P1b> MakeGuard(Ret(Obj2::*memFun)(P1a), Obj1 &obj, P1b p1) + { + return ObjScopeGuardImpl1<Obj1,Ret(Obj2::*)(P1a),P1b>::MakeObjGuard(obj,memFun,p1); + } + + template <typename Ret, class Obj1, class Obj2, typename P1a, typename P1b> + inline ObjScopeGuardImpl1<Obj1,Ret(Obj2::*)(P1a),P1b> MakeGuard(Ret(Obj2::*memFun)(P1a), Obj1 *obj, P1b p1) + { + return ObjScopeGuardImpl1<Obj1,Ret(Obj2::*)(P1a),P1b>::MakeObjGuard(*obj,memFun,p1); + } + + template <class Obj, typename MemFun, typename P1, typename P2> + class ObjScopeGuardImpl2 : public ScopeGuardImplBase + { + public: + static ObjScopeGuardImpl2<Obj, MemFun, P1, P2> MakeObjGuard(Obj& obj, MemFun memFun, P1 p1, P2 p2) + { + return ObjScopeGuardImpl2<Obj, MemFun, P1, P2>(obj, memFun, p1, p2); + } + + ~ObjScopeGuardImpl2() throw() + { + SafeExecute(*this); + } + + void Execute() + { + (obj_.*memFun_)(p1_, p2_); + } + + protected: + ObjScopeGuardImpl2(Obj& obj, MemFun memFun, P1 p1, P2 p2) : obj_(obj), memFun_(memFun), p1_(p1), p2_(p2) + {} + + Obj& obj_; + MemFun memFun_; + const P1 p1_; + const P2 p2_; + }; + + template <class Obj, typename MemFun, typename P1, typename P2> + inline ObjScopeGuardImpl2<Obj, MemFun, P1, P2> MakeObjGuard(Obj& obj, MemFun memFun, P1 p1, P2 p2) + { + return ObjScopeGuardImpl2<Obj, MemFun, P1, P2>::MakeObjGuard(obj, memFun, p1, p2); + } + + template <typename Ret, class Obj1, class Obj2, typename P1a, typename P1b, typename P2a, typename P2b> + inline ObjScopeGuardImpl2<Obj1,Ret(Obj2::*)(P1a,P2a),P1b,P2b> MakeGuard(Ret(Obj2::*memFun)(P1a,P2a), Obj1 &obj, P1b p1, P2b p2) + { + return ObjScopeGuardImpl2<Obj1,Ret(Obj2::*)(P1a,P2a),P1b,P2b>::MakeObjGuard(obj,memFun,p1,p2); + } + + template <typename Ret, class Obj1, class Obj2, typename P1a, typename P1b, typename P2a, typename P2b> + inline ObjScopeGuardImpl2<Obj1,Ret(Obj2::*)(P1a,P2a),P1b,P2b> MakeGuard(Ret(Obj2::*memFun)(P1a,P2a), Obj1 *obj, P1b p1, P2b p2) + { + return ObjScopeGuardImpl2<Obj1,Ret(Obj2::*)(P1a,P2a),P1b,P2b>::MakeObjGuard(*obj,memFun,p1,p2); + } + +} // namespace Loki + +#define LOKI_CONCATENATE_DIRECT(s1, s2) s1##s2 +#define LOKI_CONCATENATE(s1, s2) LOKI_CONCATENATE_DIRECT(s1, s2) +#define LOKI_ANONYMOUS_VARIABLE(str) LOKI_CONCATENATE(str, __LINE__) + +#ifdef __GNUC__ +#define ON_BLOCK_EXIT ScopeGuard LOKI_ANONYMOUS_VARIABLE(scopeGuard) __attribute__ ((unused)) = MakeGuard +#define ON_BLOCK_EXIT_OBJ ScopeGuard LOKI_ANONYMOUS_VARIABLE(scopeGuard) __attribute__ ((unused)) = MakeObjGuard +#else +#define ON_BLOCK_EXIT ScopeGuard LOKI_ANONYMOUS_VARIABLE(scopeGuard) = MakeGuard +#define ON_BLOCK_EXIT_OBJ ScopeGuard LOKI_ANONYMOUS_VARIABLE(scopeGuard) = MakeObjGuard +#endif + +#endif //LOKI_SCOPEGUARD_H_ diff --git a/util/version.cpp b/util/version.cpp index c43180c..883c208 100644 --- a/util/version.cpp +++ b/util/version.cpp @@ -38,7 +38,7 @@ namespace mongo { * 1.2.3-rc4-pre- * If you really need to do something else you'll need to fix _versionArray() */ - const char versionString[] = "2.0.2"; + const char versionString[] = "2.0.3"; // See unit test for example outputs static BSONArray _versionArray(const char* version){ |