From cbe2d992e9cd1ea66af9fa91df006106775d3073 Mon Sep 17 00:00:00 2001 From: Antonin Kral Date: Tue, 23 Aug 2011 08:47:17 +0200 Subject: Imported Upstream version 1.8.3 --- client/dbclient_rs.cpp | 41 +++++++++- client/dbclient_rs.h | 6 ++ client/dbclientcursor.h | 5 ++ db/btree.cpp | 1 + db/btreecursor.cpp | 1 + db/cloner.cpp | 2 + db/db.cpp | 34 +++++++++ db/dbcommands.cpp | 15 +++- db/dbhelpers.cpp | 2 +- db/dur_commitjob.cpp | 5 +- db/geo/2d.cpp | 9 ++- db/index.h | 7 ++ db/namespace.cpp | 13 +++- db/namespace.h | 1 + db/pdfile.cpp | 1 + db/update.cpp | 3 +- doxygenConfig | 2 +- jstests/drop2.js | 6 +- jstests/geo_update.js | 37 +++++++++ jstests/index_fornew.js | 13 ++++ jstests/slowNightly/sharding_balance4.js | 4 +- rpm/init.d-mongod | 12 ++- rpm/mongo.spec | 2 +- s/chunk.cpp | 6 +- s/chunk.h | 1 + s/client.cpp | 4 +- s/config.cpp | 48 +++++++++++- s/config.h | 3 +- s/d_migrate.cpp | 126 ++++++++++++++++++------------- s/shard.h | 8 +- s/shardconnection.cpp | 72 +++++++++++++----- s/strategy_shard.cpp | 2 +- s/writeback_listener.cpp | 15 +++- scripting/engine_spidermonkey.cpp | 2 +- tools/dump.cpp | 13 +++- util/message_server_port.cpp | 33 ++++++++ util/version.cpp | 2 +- 37 files changed, 444 insertions(+), 113 deletions(-) create mode 100644 jstests/geo_update.js create mode 100644 jstests/index_fornew.js diff --git a/client/dbclient_rs.cpp b/client/dbclient_rs.cpp index ae01da3..37f6225 100644 --- a/client/dbclient_rs.cpp +++ b/client/dbclient_rs.cpp @@ -463,9 +463,12 @@ namespace mongo { if ( ! _slave->isFailed() ) return _slave.get(); _monitor->notifySlaveFailure( _slaveHost ); + _slaveHost = _monitor->getSlave(); } - - _slaveHost = _monitor->getSlave(); + else { + _slaveHost = h; + } + _slave.reset( new DBClientConnection( true , this ) ); _slave->connect( _slaveHost ); _auth( _slave.get() ); @@ -544,7 +547,7 @@ namespace mongo { // checkSlave will try a different slave automatically after a failure for ( int i=0; i<2; i++ ) { try { - return checkSlave()->query(ns,query,nToReturn,nToSkip,fieldsToReturn,queryOptions,batchSize); + return checkSlaveQueryResult( checkSlave()->query(ns,query,nToReturn,nToSkip,fieldsToReturn,queryOptions,batchSize) ); } catch ( DBException &e ) { log() << "can't query replica set slave " << i << " : " << _slaveHost << e.what() << endl; @@ -581,6 +584,38 @@ namespace mongo { assert(0); } + auto_ptr DBClientReplicaSet::checkSlaveQueryResult( auto_ptr result ){ + + bool isError = result->hasResultFlag( ResultFlag_ErrSet ); + if( ! isError ) return result; + + BSONObj error = result->peekOne(); + + BSONElement code = error["code"]; + if( code.eoo() || ! code.isNumber() ){ + warning() << "no code for error from secondary host " << _slaveHost << ", error was " << error << endl; + return result; + } + + // We only check for "not master or secondary" errors here + + // If the error code here ever changes, we need to change this code also + if( code.Int() == 13436 /* not master or secondary */ ){ + isntSecondary(); + throw DBException( str::stream() << "slave " << _slaveHost.toString() << " is no longer secondary", 14812 ); + } + + return result; + } + + void DBClientReplicaSet::isntSecondary() { + log() << "slave no longer has secondary status: " << _slaveHost << endl; + // Failover to next slave + _monitor->notifySlaveFailure( _slaveHost ); + _slave.reset(); + } + + void DBClientReplicaSet::isntMaster() { log() << "got not master for: " << _masterHost << endl; _monitor->notifyFailure( _masterHost ); diff --git a/client/dbclient_rs.h b/client/dbclient_rs.h index e942d7b..548b46a 100644 --- a/client/dbclient_rs.h +++ b/client/dbclient_rs.h @@ -215,6 +215,9 @@ namespace mongo { /* this is the callback from our underlying connections to notify us that we got a "not master" error. */ void isntMaster(); + /* this is used to indicate we got a "not master or secondary" error from a secondary. + */ + void isntSecondary(); // ----- status ------ @@ -240,6 +243,9 @@ namespace mongo { private: + // Used to simplify slave-handling logic on errors + auto_ptr checkSlaveQueryResult( auto_ptr result ); + DBClientConnection * checkMaster(); DBClientConnection * checkSlave(); diff --git a/client/dbclientcursor.h b/client/dbclientcursor.h index 5d795f4..d176b89 100644 --- a/client/dbclientcursor.h +++ b/client/dbclientcursor.h @@ -86,6 +86,11 @@ namespace mongo { WARNING: no support for _putBack yet! */ void peek(vector&, int atMost); + BSONObj peekOne(){ + vector v; + peek( v, 1 ); + return v.size() > 0 ? v[0] : BSONObj(); + } /** iterate the rest of the cursor and return the number if items diff --git a/db/btree.cpp b/db/btree.cpp index 242c534..299c212 100644 --- a/db/btree.cpp +++ b/db/btree.cpp @@ -1642,6 +1642,7 @@ namespace mongo { } DiskLoc BtreeBucket::findSingle( const IndexDetails& indexdetails , const DiskLoc& thisLoc, const BSONObj& key ) const { + indexdetails.checkVersion(); int pos; bool found; // TODO: is it really ok here that the order is a default? diff --git a/db/btreecursor.cpp b/db/btreecursor.cpp index 9cab95f..ce841ce 100644 --- a/db/btreecursor.cpp +++ b/db/btreecursor.cpp @@ -73,6 +73,7 @@ namespace mongo { } void BtreeCursor::audit() { + indexDetails.checkVersion(); dassert( d->idxNo((IndexDetails&) indexDetails) == idxNo ); if ( otherTraceLevel >= 12 ) { diff --git a/db/cloner.cpp b/db/cloner.cpp index fe57463..ec5ba99 100644 --- a/db/cloner.cpp +++ b/db/cloner.cpp @@ -624,6 +624,8 @@ namespace mongo { nsToDatabase( target.c_str(), to ); if ( strcmp( from, to ) == 0 ) { renameNamespace( source.c_str(), target.c_str() ); + // make sure we drop counters etc + Top::global.collectionDropped( source ); return true; } } diff --git a/db/db.cpp b/db/db.cpp index 579b4a1..4f4575c 100644 --- a/db/db.cpp +++ b/db/db.cpp @@ -46,6 +46,7 @@ # include "../util/ntservice.h" #else # include +# include #endif namespace mongo { @@ -108,7 +109,36 @@ namespace mongo { } try { +#ifndef __linux__ // TODO: consider making this ifdef _WIN32 boost::thread thr(boost::bind(&connThread,mp)); +#else + pthread_attr_t attrs; + pthread_attr_init(&attrs); + pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); + + static const size_t STACK_SIZE = 4*1024*1024; + + struct rlimit limits; + assert(getrlimit(RLIMIT_STACK, &limits) == 0); + if (limits.rlim_cur > STACK_SIZE) { + pthread_attr_setstacksize(&attrs, (DEBUG_BUILD + ? (STACK_SIZE / 2) + : STACK_SIZE)); + } + else if (limits.rlim_cur < 1024*1024) { + warning() << "Stack size set to " << (limits.rlim_cur/1024) << "KB. We suggest at least 1MB" << endl; + } + + pthread_t thread; + int failed = pthread_create(&thread, &attrs, (void*(*)(void*)) &connThread, mp); + + pthread_attr_destroy(&attrs); + + if (failed) { + log() << "pthread_create failed: " << errnoWithDescription(failed) << endl; + throw boost::thread_resource_error(); // for consistency with boost::thread + } +#endif } catch ( boost::thread_resource_error& ) { log() << "can't create new thread, closing connection" << endl; @@ -699,6 +729,7 @@ int main(int argc, char* argv[]) { ("pairwith", po::value(), "address of server to pair with DEPRECATED") ("arbiter", po::value(), "address of replica pair arbiter server DEPRECATED") ("nodur", "disable journaling (currently the default)") + ("nojournal", "disable journaling (currently the default)") ("appsrvpath", po::value(), "root directory for the babble app server") ("nocursors", "diagnostic/debugging option that turns off cursors DO NOT USE IN PRODUCTION") ("nohints", "ignore query hints") @@ -800,6 +831,9 @@ int main(int argc, char* argv[]) { if( params.count("nodur") ) { cmdLine.dur = false; } + if( params.count("nojournal") ) { + cmdLine.dur = false; + } if( params.count("dur") || params.count( "journal" ) ) { cmdLine.dur = true; } diff --git a/db/dbcommands.cpp b/db/dbcommands.cpp index cf0857a..59dd78c 100644 --- a/db/dbcommands.cpp +++ b/db/dbcommands.cpp @@ -851,6 +851,17 @@ namespace mongo { for ( list::iterator i=all.begin(); i!=all.end(); i++ ) { BSONObj o = *i; + if ( o.getIntField("v") > 0 ) { + BSONObjBuilder b; + BSONObjIterator i( o ); + while ( i.more() ) { + BSONElement e = i.next(); + if ( str::equals( e.fieldName() , "v" ) ) + continue; + b.append( e ); + } + o = b.obj(); + } theDataFileMgr.insertWithObjMod( Namespace( toDeleteNs.c_str() ).getSisterNS( "system.indexes" ).c_str() , o , true ); } @@ -1753,6 +1764,7 @@ namespace mongo { } if ( cmdObj["help"].trueValue() ) { + client.curop()->ensureStarted(); stringstream ss; ss << "help for: " << c->name << " "; c->help( ss ); @@ -1777,6 +1789,7 @@ namespace mongo { if ( c->locktype() == Command::NONE ) { // we also trust that this won't crash + client.curop()->ensureStarted(); string errmsg; int ok = c->run( dbname , cmdObj , errmsg , result , fromRepl ); if ( ! ok ) @@ -1791,6 +1804,7 @@ namespace mongo { } mongolock lk( needWriteLock ); + client.curop()->ensureStarted(); Client::Context ctx( dbname , dbpath , &lk , c->requiresAuth() ); try { @@ -1824,7 +1838,6 @@ namespace mongo { returns true if ran a cmd */ bool _runCommands(const char *ns, BSONObj& _cmdobj, BufBuilder &b, BSONObjBuilder& anObjBuilder, bool fromRepl, int queryOptions) { - cc().curop()->ensureStarted(); string dbname = nsToDatabase( ns ); if( logLevel >= 1 ) diff --git a/db/dbhelpers.cpp b/db/dbhelpers.cpp index 3079aad..5e49589 100644 --- a/db/dbhelpers.cpp +++ b/db/dbhelpers.cpp @@ -269,7 +269,7 @@ namespace mongo { getDur().commitIfNeeded(); - if ( yield && ! cc->yieldSometimes() ) { + if ( yield && ! cc->yield() ) { // cursor got finished by someone else, so we're done cc.release(); // if the collection/db is dropped, cc may be deleted break; diff --git a/db/dur_commitjob.cpp b/db/dur_commitjob.cpp index c67f37c..af77c4f 100644 --- a/db/dur_commitjob.cpp +++ b/db/dur_commitjob.cpp @@ -206,9 +206,10 @@ namespace mongo { // throttle logging if( ++nComplains < 100 || time(0) - lastComplain >= 60 ) { lastComplain = time(0); - log() << "replSet warning DR102 too much data written uncommitted " << _bytes/1000000.0 << "MB" << endl; + warning() << "DR102 too much data written uncommitted " << _bytes/1000000.0 << "MB" << endl; if( nComplains < 10 || nComplains % 10 == 0 ) { - wassert(!"replSet warning DR102 too much data written uncommitted"); + // wassert makes getLastError show an error, so we just print stack trace + printStackTrace(); } } } diff --git a/db/geo/2d.cpp b/db/geo/2d.cpp index d6c97f6..7b2bf17 100644 --- a/db/geo/2d.cpp +++ b/db/geo/2d.cpp @@ -1125,7 +1125,14 @@ namespace mongo { virtual Record* _current() { assert(ok()); return _cur->_loc.rec(); } virtual BSONObj current() { assert(ok()); return _cur->_o; } virtual DiskLoc currLoc() { assert(ok()); return _cur->_loc; } - virtual bool advance() { _cur++; incNscanned(); return ok(); } + virtual bool advance() { + if( ok() ){ + _cur++; + incNscanned(); + return ok(); + } + return false; + } virtual BSONObj currKey() const { return _cur->_key; } virtual string toString() { diff --git a/db/index.h b/db/index.h index 8578ed3..d13bd1d 100644 --- a/db/index.h +++ b/db/index.h @@ -145,6 +145,13 @@ namespace mongo { const IndexSpec& getSpec() const; + void checkVersion() const { + // TODO: cache? + massert( 13658 , + str::stream() << "using a newer index version: " << info.obj() << " v: " << info.obj().getIntField("v" ) , + info.obj().getIntField("v") <= 0 ); + } + string toString() const { return info.obj().toString(); } diff --git a/db/namespace.cpp b/db/namespace.cpp index fcdaee2..0cb0e74 100644 --- a/db/namespace.cpp +++ b/db/namespace.cpp @@ -598,6 +598,17 @@ namespace mongo { } } + void NamespaceDetailsTransient::eraseForPrefix(const char *prefix) { + assertInWriteLock(); + vector< string > found; + for( ouriter i = _map.begin(); i != _map.end(); ++i ) + if ( strncmp( i->first.c_str(), prefix, strlen( prefix ) ) == 0 ) + found.push_back( i->first ); + for( vector< string >::iterator i = found.begin(); i != found.end(); ++i ) { + _map.erase(*i); + } + } + void NamespaceDetailsTransient::computeIndexKeys() { _keysComputed = true; _indexKeys.clear(); @@ -648,7 +659,7 @@ namespace mongo { // index details across commands are in cursors and nsd // transient (including query cache) so clear these. ClientCursor::invalidate( from ); - NamespaceDetailsTransient::clearForPrefix( from ); + NamespaceDetailsTransient::eraseForPrefix( from ); NamespaceDetails *details = ni->details( from ); ni->add_ns( to, *details ); diff --git a/db/namespace.h b/db/namespace.h index 4ec1edd..ef3d04e 100644 --- a/db/namespace.h +++ b/db/namespace.h @@ -425,6 +425,7 @@ namespace mongo { Can be useful as index namespaces share the same start as the regular collection. SLOW - sequential scan of all NamespaceDetailsTransient objects */ static void clearForPrefix(const char *prefix); + static void eraseForPrefix(const char *prefix); /* indexKeys() cache ---------------------------------------------------- */ /* assumed to be in write lock for this */ diff --git a/db/pdfile.cpp b/db/pdfile.cpp index 663ae05..2aedfd4 100644 --- a/db/pdfile.cpp +++ b/db/pdfile.cpp @@ -806,6 +806,7 @@ namespace mongo { result.append("ns", name.c_str()); ClientCursor::invalidate(name.c_str()); Top::global.collectionDropped( name ); + NamespaceDetailsTransient::eraseForPrefix( name.c_str() ); dropNS(name); } diff --git a/db/update.cpp b/db/update.cpp index e53f2af..8dc6c85 100644 --- a/db/update.cpp +++ b/db/update.cpp @@ -1225,8 +1225,7 @@ namespace mongo { } } - if (atomic) - getDur().commitIfNeeded(); + getDur().commitIfNeeded(); continue; } diff --git a/doxygenConfig b/doxygenConfig index 041c65f..0356d10 100644 --- a/doxygenConfig +++ b/doxygenConfig @@ -3,7 +3,7 @@ #--------------------------------------------------------------------------- DOXYFILE_ENCODING = UTF-8 PROJECT_NAME = MongoDB -PROJECT_NUMBER = 1.8.2 +PROJECT_NUMBER = 1.8.3 OUTPUT_DIRECTORY = docs/doxygen CREATE_SUBDIRS = NO OUTPUT_LANGUAGE = English diff --git a/jstests/drop2.js b/jstests/drop2.js index fa239fd..a1d619d 100644 --- a/jstests/drop2.js +++ b/jstests/drop2.js @@ -2,7 +2,7 @@ t = db.jstests_drop2; t.drop(); function debug( x ) { -// printjson( x ); + //printjson( x ); } t.save( {} ); @@ -14,11 +14,11 @@ function op( drop ) { for ( var i in p ) { var o = p[ i ]; if ( drop ) { - if ( o.active && o.query && o.query.drop && o.query.drop == "jstests_drop2" ) { + if ( o.query && o.query.drop && o.query.drop == "jstests_drop2" ) { return o.opid; } } else { - if ( o.active && o.query && o.query.query && o.query.query.$where && o.ns == "test.jstests_drop2" ) { + if ( o.query && o.query.query && o.query.query.$where && o.ns == "test.jstests_drop2" ) { return o.opid; } } diff --git a/jstests/geo_update.js b/jstests/geo_update.js new file mode 100644 index 0000000..dd4b28c --- /dev/null +++ b/jstests/geo_update.js @@ -0,0 +1,37 @@ +// Tests geo queries w/ update & upsert +// from SERVER-3428 + +var coll = db.testGeoUpdate +coll.drop() + +coll.ensureIndex({ loc : "2d" }) + +// Test normal update +print( "Updating..." ) + +coll.insert({ loc : [1.0, 2.0] }) + +coll.update({ loc : { $near : [1.0, 2.0] } }, + { x : true, loc : [1.0, 2.0] }) + +// Test upsert +print( "Upserting..." ) + +coll.update({ loc : { $within : { $center : [[10, 20], 1] } } }, + { x : true }, + true) + +coll.update({ loc : { $near : [10.0, 20.0], $maxDistance : 1 } }, + { x : true }, + true) + + +coll.update({ loc : { $near : [100, 100], $maxDistance : 1 } }, + { $set : { loc : [100, 100] }, $push : { people : "chris" } }, + true) + +coll.update({ loc : { $near : [100, 100], $maxDistance : 1 } }, + { $set : { loc : [100, 100] }, $push : { people : "john" } }, + true) + +assert.eq( 4, coll.find().itcount() ) diff --git a/jstests/index_fornew.js b/jstests/index_fornew.js new file mode 100644 index 0000000..6c3c158 --- /dev/null +++ b/jstests/index_fornew.js @@ -0,0 +1,13 @@ + +t = db.index_fornew; +t.drop(); + +t.insert( { x : 1 } ) +t.ensureIndex( { x : 1 } , { v : 1 } ) +assert.eq( 1 , t.getIndexes()[1].v , tojson( t.getIndexes() ) ); + +assert.throws( function(){ t.findOne( { x : 1 } ); } ) + +t.reIndex(); +assert.eq( 0 , t.getIndexes()[1].v , tojson( t.getIndexes() ) ); +assert( t.findOne( { x : 1 } ) ); diff --git a/jstests/slowNightly/sharding_balance4.js b/jstests/slowNightly/sharding_balance4.js index 4cbbba6..c7f76dd 100644 --- a/jstests/slowNightly/sharding_balance4.js +++ b/jstests/slowNightly/sharding_balance4.js @@ -90,8 +90,8 @@ function diff(){ if ( le.err ) print( "ELIOT ELIOT : " + tojson( le ) + "\t" + myid ); - assert( le.updatedExisting , "GLE diff 1: " + tojson(le) ) - assert.eq( 1 , le.n , "GLE diff 2: " + tojson(le) ) + assert( le.updatedExisting , "GLE diff 1 myid: " + myid + " " + tojson(le) ) + assert.eq( 1 , le.n , "GLE diff 2 myid: " + myid + " " + tojson(le) ) if ( Math.random() > .99 ){ diff --git a/rpm/init.d-mongod b/rpm/init.d-mongod index b7d4567..423eed5 100644 --- a/rpm/init.d-mongod +++ b/rpm/init.d-mongod @@ -13,10 +13,16 @@ # things from mongod.conf get there by mongod reading it - -OPTIONS=" -f /etc/mongod.conf" +# NOTE: if you change any OPTIONS here, you get what you pay for: +# this script assumes all options are in the config file. +CONFIGFILE="/etc/mongod.conf" +OPTIONS=" -f $CONFIGFILE" SYSCONFIG="/etc/sysconfig/mongod" +# FIXME: 1.9.x has a --shutdown flag that parses the config file and +# shuts down the correct running pid, but that's unavailable in 1.8 +# for now. This can go away when this script stops supporting 1.8. +DBPATH=`awk -F= '/^dbpath=/{print $2}' "$CONFIGFILE"` mongod=${MONGOD-/usr/bin/mongod} MONGO_USER=mongod @@ -36,7 +42,7 @@ start() stop() { echo -n $"Stopping mongod: " - killproc -p /var/lib/mongo/mongod.lock -t30 -TERM /usr/bin/mongod + killproc -p "$DBPATH"/mongod.lock -t30 -TERM /usr/bin/mongod RETVAL=$? echo [ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/mongod diff --git a/rpm/mongo.spec b/rpm/mongo.spec index 03a9bc4..94dba51 100644 --- a/rpm/mongo.spec +++ b/rpm/mongo.spec @@ -1,5 +1,5 @@ Name: mongo -Version: 1.8.2 +Version: 1.8.3 Release: mongodb_1%{?dist} Summary: mongo client shell and tools License: AGPL 3.0 diff --git a/s/chunk.cpp b/s/chunk.cpp index 1e473e2..2d0ad5d 100644 --- a/s/chunk.cpp +++ b/s/chunk.cpp @@ -749,7 +749,7 @@ namespace mongo { FieldRange range = frs->range(_key.key().firstElement().fieldName()); if ( !range.nontrivial() ) { DEV PRINT(range.nontrivial()); - getAllShards(shards); + getAllShards_inlock(shards); return; } } @@ -806,6 +806,10 @@ namespace mongo { void ChunkManager::getAllShards( set& all ) { rwlock lk( _lock , false ); + getAllShards_inlock( all ); + } + + void ChunkManager::getAllShards_inlock( set& all ){ all.insert(_shards.begin(), _shards.end()); } diff --git a/s/chunk.h b/s/chunk.h index 21e1fbf..6054afc 100644 --- a/s/chunk.h +++ b/s/chunk.h @@ -350,6 +350,7 @@ namespace mongo { void _load(); void ensureIndex_inlock(); + void getAllShards_inlock( set& all ); string _ns; ShardKeyPattern _key; diff --git a/s/client.cpp b/s/client.cpp index c0d25fb..c053289 100644 --- a/s/client.cpp +++ b/s/client.cpp @@ -141,7 +141,7 @@ namespace mongo { if ( shards->size() == 1 ) { string theShard = *(shards->begin() ); - ShardConnection conn( theShard , "" ); + ShardConnection conn( theShard , "", true ); BSONObj res; bool ok = false; @@ -211,7 +211,7 @@ namespace mongo { for ( set::iterator i = shards->begin(); i != shards->end(); i++ ) { string theShard = *i; bbb.append( theShard ); - ShardConnection conn( theShard , "" ); + ShardConnection conn( theShard , "", true ); BSONObj res; bool ok = false; try { diff --git a/s/config.cpp b/s/config.cpp index 9ed3207..0766717 100644 --- a/s/config.cpp +++ b/s/config.cpp @@ -20,6 +20,8 @@ #include "../util/message.h" #include "../util/stringutils.h" #include "../util/unittest.h" +#include "../util/timer.h" + #include "../client/connpool.h" #include "../client/model.h" #include "../db/pdfile.h" @@ -53,8 +55,15 @@ namespace mongo { DBConfig::CollectionInfo::CollectionInfo( const BSONObj& in ) { _dirty = false; _dropped = in["dropped"].trueValue(); - if ( in["key"].isABSONObj() ) + if ( in["key"].isABSONObj() ) { + Timer t; shard( in["_id"].String() , in["key"].Obj() , in["unique"].trueValue() ); + log() << "creating ChunkManager ns: " << in["_id"] + << " took: " << t.millis() << "ms" + << " sequenceNumber: " << _cm->getSequenceNumber() + << endl; + _dirty = false; + } } @@ -87,6 +96,32 @@ namespace mongo { _dirty = false; } + bool DBConfig::CollectionInfo::needsReloading( DBClientBase * conn , const BSONObj& collectionInfo ) { + if ( ! _cm ) { + return true; + } + + if ( _dirty || _dropped ) { + return true; + } + + if ( collectionInfo["dropped"].trueValue() ) { + return true; + } + + BSONObj newest = conn->findOne( ShardNS::chunk , + Query( BSON( "ns" << collectionInfo["_id"].String() ) ).sort( "lastmod" , -1 ) ); + + if ( newest.isEmpty() ) { + // either a drop or something else weird + return true; + } + + ShardChunkVersion fromdb = newest["lastmod"]; + ShardChunkVersion inmemory = _cm->getVersion(); + return fromdb != inmemory; + } + bool DBConfig::isSharded( const string& ns ) { if ( ! _shardingEnabled ) return false; @@ -232,13 +267,20 @@ namespace mongo { unserialize( o ); BSONObjBuilder b; - b.appendRegex( "_id" , (string)"^" + _name + "." ); + b.appendRegex( "_id" , (string)"^" + _name + "\\." ); auto_ptr cursor = conn->query( ShardNS::collection ,b.obj() ); assert( cursor.get() ); while ( cursor->more() ) { BSONObj o = cursor->next(); - _collections[o["_id"].String()] = CollectionInfo( o ); + string ns = o["_id"].String(); + + Collections::iterator i = _collections.find( ns ); + if ( i != _collections.end() && ! i->second.needsReloading( conn.get() , o ) ) { + continue; + } + + _collections[ns] = CollectionInfo( o ); } conn.done(); diff --git a/s/config.h b/s/config.h index 0636835..13afe23 100644 --- a/s/config.h +++ b/s/config.h @@ -88,7 +88,8 @@ namespace mongo { bool wasDropped() const { return _dropped; } void save( const string& ns , DBClientBase* conn ); - + + bool needsReloading( DBClientBase * conn , const BSONObj& collectionInfo ); private: ChunkManagerPtr _cm; diff --git a/s/d_migrate.cpp b/s/d_migrate.cpp index df12e54..6f2607d 100644 --- a/s/d_migrate.cpp +++ b/s/d_migrate.cpp @@ -165,59 +165,6 @@ namespace mongo { static const char * const cleanUpThreadName = "cleanupOldData"; - void _cleanupOldData( OldDataCleanup cleanup ) { - Client::initThread( cleanUpThreadName ); - log() << " (start) waiting to cleanup " << cleanup.ns << " from " << cleanup.min << " -> " << cleanup.max << " # cursors:" << cleanup.initial.size() << endl; - - int loops = 0; - Timer t; - while ( t.seconds() < 900 ) { // 15 minutes - assert( dbMutex.getState() == 0 ); - sleepmillis( 20 ); - - set now; - ClientCursor::find( cleanup.ns , now ); - - set left; - for ( set::iterator i=cleanup.initial.begin(); i!=cleanup.initial.end(); ++i ) { - CursorId id = *i; - if ( now.count(id) ) - left.insert( id ); - } - - if ( left.size() == 0 ) - break; - cleanup.initial = left; - - if ( ( loops++ % 200 ) == 0 ) { - log() << " (looping " << loops << ") waiting to cleanup " << cleanup.ns << " from " << cleanup.min << " -> " << cleanup.max << " # cursors:" << cleanup.initial.size() << endl; - - stringstream ss; - for ( set::iterator i=cleanup.initial.begin(); i!=cleanup.initial.end(); ++i ) { - CursorId id = *i; - ss << id << " "; - } - log() << " cursors: " << ss.str() << endl; - } - } - - cleanup.doRemove(); - - cc().shutdown(); - } - - void cleanupOldData( OldDataCleanup cleanup ) { - try { - _cleanupOldData( cleanup ); - } - catch ( std::exception& e ) { - log() << " error cleaning old data:" << e.what() << endl; - } - catch ( ... ) { - log() << " unknown error cleaning old data" << endl; - } - } - class ChunkCommandHelper : public Command { public: ChunkCommandHelper( const char * name ) @@ -243,13 +190,14 @@ namespace mongo { class MigrateFromStatus { public: - MigrateFromStatus() : _m("MigrateFromStatus") { + MigrateFromStatus() : _m("MigrateFromStatus") , _workLock( "MigrateFromStatus::WorkLock" ) { _active = false; _inCriticalSection = false; _memoryUsed = 0; } void start( string ns , const BSONObj& min , const BSONObj& max ) { + scoped_lock lk( _workLock ); scoped_lock l(_m); // reads and writes _active assert( ! _active ); @@ -568,6 +516,20 @@ namespace mongo { bool isActive() const { return _getActive(); } + + void doRemove( OldDataCleanup& cleanup ) { + while ( true ) { + { + scoped_lock lk( _workLock ); + if ( ! _active ) { + cleanup.doRemove(); + return; + } + } + sleepmillis( 100 ); + } + } + private: mutable mongo::mutex _m; // protect _inCriticalSection and _active bool _inCriticalSection; @@ -591,6 +553,9 @@ namespace mongo { list _deleted; // objects deleted during clone that should be deleted later long long _memoryUsed; // bytes in _reload + _deleted + mutable mongo::mutex _workLock; // this is used to make sure only 1 thread is doing serious work + // for now, this means migrate or removing old chunk data + bool _getActive() const { scoped_lock l(_m); return _active; } void _setActive( bool b ) { scoped_lock l(_m); _active = b; } @@ -605,6 +570,59 @@ namespace mongo { } }; + void _cleanupOldData( OldDataCleanup cleanup ) { + Client::initThread( cleanUpThreadName ); + log() << " (start) waiting to cleanup " << cleanup.ns << " from " << cleanup.min << " -> " << cleanup.max << " # cursors:" << cleanup.initial.size() << endl; + + int loops = 0; + Timer t; + while ( t.seconds() < 900 ) { // 15 minutes + assert( dbMutex.getState() == 0 ); + sleepmillis( 20 ); + + set now; + ClientCursor::find( cleanup.ns , now ); + + set left; + for ( set::iterator i=cleanup.initial.begin(); i!=cleanup.initial.end(); ++i ) { + CursorId id = *i; + if ( now.count(id) ) + left.insert( id ); + } + + if ( left.size() == 0 ) + break; + cleanup.initial = left; + + if ( ( loops++ % 200 ) == 0 ) { + log() << " (looping " << loops << ") waiting to cleanup " << cleanup.ns << " from " << cleanup.min << " -> " << cleanup.max << " # cursors:" << cleanup.initial.size() << endl; + + stringstream ss; + for ( set::iterator i=cleanup.initial.begin(); i!=cleanup.initial.end(); ++i ) { + CursorId id = *i; + ss << id << " "; + } + log() << " cursors: " << ss.str() << endl; + } + } + + migrateFromStatus.doRemove( cleanup ); + + cc().shutdown(); + } + + void cleanupOldData( OldDataCleanup cleanup ) { + try { + _cleanupOldData( cleanup ); + } + catch ( std::exception& e ) { + log() << " error cleaning old data:" << e.what() << endl; + } + catch ( ... ) { + log() << " unknown error cleaning old data" << endl; + } + } + void logOpForSharding( const char * opstr , const char * ns , const BSONObj& obj , BSONObj * patt ) { migrateFromStatus.logOp( opstr , ns , obj , patt ); } diff --git a/s/shard.h b/s/shard.h index 836ffe7..70e478c 100644 --- a/s/shard.h +++ b/s/shard.h @@ -213,9 +213,9 @@ namespace mongo { class ShardConnection : public AScopedConnection { public: - ShardConnection( const Shard * s , const string& ns ); - ShardConnection( const Shard& s , const string& ns ); - ShardConnection( const string& addr , const string& ns ); + ShardConnection( const Shard * s , const string& ns, bool ignoreDirect = false ); + ShardConnection( const Shard& s , const string& ns, bool ignoreDirect = false ); + ShardConnection( const string& addr , const string& ns, bool ignoreDirect = false ); ~ShardConnection(); @@ -265,7 +265,7 @@ namespace mongo { static void checkMyConnectionVersions( const string & ns ); private: - void _init(); + void _init( bool ignoreDirect = false ); void _finishInit(); bool _finishedInit; diff --git a/s/shardconnection.cpp b/s/shardconnection.cpp index d05f5b1..ec14139 100644 --- a/s/shardconnection.cpp +++ b/s/shardconnection.cpp @@ -41,6 +41,9 @@ namespace mongo { boost::function4 checkShardVersionCB = defaultCheckShardVersion; boost::function1 resetShardVersionCB = defaultResetShardVersion; + // Only print the non-top-level-shard-conn warning once if not verbose + volatile bool printedShardConnWarning = false; + /** * holds all the actual db connections for a client to various servers * 1 pre thread, so don't have to worry about thread safety @@ -76,9 +79,35 @@ namespace mongo { _hosts.clear(); } - DBClientBase * get( const string& addr , const string& ns ) { + DBClientBase * get( const string& addr , const string& ns, bool ignoreDirect = false ) { _check( ns ); + // Determine if non-shard conn is RS member for warning + // All shards added to _hosts if not present in _check() + if( ( logLevel >= 1 || ! printedShardConnWarning ) && ! ignoreDirect && _hosts.find( addr ) == _hosts.end() ){ + + vector all; + Shard::getAllShards( all ); + + bool isRSMember = false; + string parentShard; + for ( unsigned i = 0; i < all.size(); i++ ) { + string connString = all[i].getConnString(); + if( connString.find( addr ) != string::npos && connString.find( '/' ) != string::npos ){ + isRSMember = true; + parentShard = connString; + break; + } + } + + if( isRSMember ){ + printedShardConnWarning = true; + warning() << "adding shard sub-connection " << addr << " (parent " << parentShard << ") as sharded, this is safe but unexpected" << endl; + printStackTrace(); + } + } + + Status* &s = _hosts[addr]; if ( ! s ) s = new Status(); @@ -120,22 +149,25 @@ namespace mongo { } void checkVersions( const string& ns ) { + vector all; Shard::getAllShards( all ); + + // Now only check top-level shard connections for ( unsigned i=0; iavail ) + s->avail = pool.get( sconnString ); + + checkShardVersionCB( *s->avail, ns, false, 1 ); - for ( HostMap::iterator i=_hosts.begin(); i!=_hosts.end(); ++i ) { - if ( ! Shard::isAShardNode( i->first ) ) - continue; - Status* ss = i->second; - assert( ss ); - if ( ! ss->avail ) - ss->avail = pool.get( i->first ); - checkShardVersionCB( *ss->avail , ns , false , 1 ); } } @@ -189,24 +221,24 @@ namespace mongo { thread_specific_ptr ClientConnections::_perThread; - ShardConnection::ShardConnection( const Shard * s , const string& ns ) + ShardConnection::ShardConnection( const Shard * s , const string& ns, bool ignoreDirect ) : _addr( s->getConnString() ) , _ns( ns ) { - _init(); + _init( ignoreDirect ); } - ShardConnection::ShardConnection( const Shard& s , const string& ns ) + ShardConnection::ShardConnection( const Shard& s , const string& ns, bool ignoreDirect ) : _addr( s.getConnString() ) , _ns( ns ) { - _init(); + _init( ignoreDirect ); } - ShardConnection::ShardConnection( const string& addr , const string& ns ) + ShardConnection::ShardConnection( const string& addr , const string& ns, bool ignoreDirect ) : _addr( addr ) , _ns( ns ) { - _init(); + _init( ignoreDirect ); } - void ShardConnection::_init() { + void ShardConnection::_init( bool ignoreDirect ) { assert( _addr.size() ); - _conn = ClientConnections::threadInstance()->get( _addr , _ns ); + _conn = ClientConnections::threadInstance()->get( _addr , _ns, ignoreDirect ); _finishedInit = false; } diff --git a/s/strategy_shard.cpp b/s/strategy_shard.cpp index 26ea79a..337fa58 100644 --- a/s/strategy_shard.cpp +++ b/s/strategy_shard.cpp @@ -151,7 +151,7 @@ namespace mongo { // Many operations benefit from having the shard key early in the object o = manager->getShardKey().moveToFront(o); - const int maxTries = 10; + const int maxTries = 30; bool gotThrough = false; for ( int i=0; igetChunkManager( ns )->getVersion() ) { + ShardChunkVersion start = db->getChunkManager( ns )->getVersion(); + + if ( needVersion.isSet() && needVersion <= start ) { // this means when the write went originally, the version was old // if we're here, it means we've already updated the config, so don't need to do again //db->getChunkManager( ns , true ); // SERVER-1349 @@ -176,7 +178,16 @@ namespace mongo { // we received a writeback object that was sent to a previous version of a shard // the actual shard may not have the object the writeback operation is for // we need to reload the chunk manager and get the new shard versions - db->getChunkManager( ns , true ); + bool good = false; + for ( int i=0; i<100; i++ ) { + if ( db->getChunkManager( ns , true )->getVersion() >= needVersion ) { + good = true; + break; + } + log() << "writeback getChunkManager didn't update?" << endl; + sleepmillis(10); + } + assert( good ); } // do request and then call getLastError diff --git a/scripting/engine_spidermonkey.cpp b/scripting/engine_spidermonkey.cpp index 73ebfaa..aed7b13 100644 --- a/scripting/engine_spidermonkey.cpp +++ b/scripting/engine_spidermonkey.cpp @@ -1073,7 +1073,7 @@ namespace mongo { JS_SetCStringsAreUTF8(); #endif - _runtime = JS_NewRuntime(8L * 1024L * 1024L); + _runtime = JS_NewRuntime(64L * 1024L * 1024L); uassert( 10221 , "JS_NewRuntime failed" , _runtime ); if ( ! utf8Ok() ) { diff --git a/tools/dump.cpp b/tools/dump.cpp index 155f84b..29553f4 100644 --- a/tools/dump.cpp +++ b/tools/dump.cpp @@ -333,13 +333,22 @@ public: auth( "admin" ); BSONObj res = conn( true ).findOne( "admin.$cmd" , BSON( "listDatabases" << 1 ) ); - BSONObj dbs = res.getField( "databases" ).embeddedObjectUserCheck(); + if ( ! res["databases"].isABSONObj() ) { + error() << "output of listDatabases isn't what we expected, no 'databases' field:\n" << res << endl; + return -2; + } + BSONObj dbs = res["databases"].embeddedObjectUserCheck(); set keys; dbs.getFieldNames( keys ); for ( set::iterator i = keys.begin() ; i != keys.end() ; i++ ) { string key = *i; + + if ( ! dbs[key].isABSONObj() ) { + error() << "database field not an object key: " << key << " value: " << dbs[key] << endl; + return -3; + } - BSONObj dbobj = dbs.getField( key ).embeddedObjectUserCheck(); + BSONObj dbobj = dbs[key].embeddedObjectUserCheck(); const char * dbName = dbobj.getField( "name" ).valuestr(); if ( (string)dbName == "local" ) diff --git a/util/message_server_port.cpp b/util/message_server_port.cpp index 76bd78d..409b0c7 100644 --- a/util/message_server_port.cpp +++ b/util/message_server_port.cpp @@ -26,6 +26,10 @@ #include "../db/lasterror.h" #include "../db/stats/counters.h" +#ifdef __linux__ +# include +#endif + namespace mongo { namespace pms { @@ -105,7 +109,36 @@ namespace mongo { } try { +#ifndef __linux__ // TODO: consider making this ifdef _WIN32 boost::thread thr( boost::bind( &pms::threadRun , p ) ); +#else + pthread_attr_t attrs; + pthread_attr_init(&attrs); + pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); + + static const size_t STACK_SIZE = 4*1024*1024; + + struct rlimit limits; + assert(getrlimit(RLIMIT_STACK, &limits) == 0); + if (limits.rlim_cur > STACK_SIZE) { + pthread_attr_setstacksize(&attrs, (DEBUG_BUILD + ? (STACK_SIZE / 2) + : STACK_SIZE)); + } + else if (limits.rlim_cur < 1024*1024) { + warning() << "Stack size set to " << (limits.rlim_cur/1024) << "KB. We suggest at least 1MB" << endl; + } + + pthread_t thread; + int failed = pthread_create(&thread, &attrs, (void*(*)(void*)) &pms::threadRun, p); + + pthread_attr_destroy(&attrs); + + if (failed) { + log() << "pthread_create failed: " << errnoWithDescription(failed) << endl; + throw boost::thread_resource_error(); // for consistency with boost::thread + } +#endif } catch ( boost::thread_resource_error& ) { connTicketHolder.release(); diff --git a/util/version.cpp b/util/version.cpp index 78a31be..4045cb5 100644 --- a/util/version.cpp +++ b/util/version.cpp @@ -27,7 +27,7 @@ namespace mongo { - const char versionString[] = "1.8.2"; + const char versionString[] = "1.8.3"; string mongodVersion() { stringstream ss; -- cgit v1.2.3