1 files changed, 68 insertions, 46 deletions
diff --git a/s/chunk.cpp b/s/chunk.cpp
index 09dc994..066265e 100644
--- a/s/chunk.cpp
+++ b/s/chunk.cpp
@@ -208,7 +208,7 @@ namespace mongo {
                 // no split points means there isn't enough data to split on
                 // 1 split point means we have between half the chunk size to full chunk size
                 // so we shouldn't split
-                LOG(1) << "chunk not full enough to trigger auto-split" << endl;
+                LOG(1) << "chunk not full enough to trigger auto-split " << ( candidates.size() == 0 ? "no split entry" : candidates[0].toString() ) << endl;
                 return BSONObj();
             }
 
@@ -342,6 +342,12 @@ namespace mongo {
 
             if ( _dataWritten < splitThreshold / 5 )
                 return false;
+            
+            if ( ! getManager()->_splitTickets.tryAcquire() ) {
+                LOG(1) << "won't auto split becaue not enough tickets: " << getManager()->getns() << endl;
+                return false;
+            }
+            TicketHolderReleaser releaser( &getManager()->_splitTickets );
 
             // this is a bit ugly
             // we need it so that mongos blocks for the writes to actually be committed
@@ -352,8 +358,6 @@ namespace mongo {
 
             LOG(1) << "about to initiate autosplit: " << *this << " dataWritten: " << _dataWritten << " splitThreshold: " << splitThreshold << endl;
 
-            _dataWritten = 0; // reset so we check often enough
-
             BSONObj res;
             BSONObj splitPoint = singleSplit( false /* does not force a split if not enough data */ , res );
             if ( splitPoint.isEmpty() ) {
@@ -361,6 +365,15 @@ namespace mongo {
                 _dataWritten = 0; // this means there wasn't enough data to split, so don't want to try again until considerable more data
                 return false;
             }
+            
+            if ( maxIsInf() || minIsInf() ) {
+                // we don't want to reset _dataWritten since we kind of want to check the other side right away
+            }
+            else {
+                _dataWritten = 0; // we're splitting, so should wait a bit
+            }
+
+
 
             log() << "autosplitted " << _manager->getns() << " shard: " << toString()
                   << " on: " << splitPoint << "(splitThreshold " << splitThreshold << ")"
@@ -497,7 +510,9 @@ namespace mongo {
         // The shard versioning mechanism hinges on keeping track of the number of times we reloaded ChunkManager's.
         // Increasing this number here will prompt checkShardVersion() to refresh the connection-level versions to
         // the most up to date value.
-        _sequenceNumber(++NextSequenceNumber)
+        _sequenceNumber(++NextSequenceNumber),
+
+        _splitTickets( 5 )
 
     {
         int tries = 3;
@@ -611,44 +626,65 @@ namespace mongo {
         return _key.hasShardKey( obj );
     }
 
-    void ChunkManager::createFirstChunk( const Shard& shard ) const {
+    void ChunkManager::createFirstChunks( const Shard& shard ) const {
         // TODO distlock?
         assert( _chunkMap.size() == 0 );
 
-        Chunk c (this, _key.globalMin(), _key.globalMax(), shard);
+        unsigned long long numObjects = 0;
+        {
+            // get stats to see if there is any data
+            ScopedDbConnection shardConn( shard.getConnString() );
+            numObjects = shardConn->count( getns() );
+            shardConn.done();
+        }
 
         // this is the first chunk; start the versioning from scratch
         ShardChunkVersion version;
         version.incMajor();
 
-        // build update for the chunk collection
-        BSONObjBuilder chunkBuilder;
-        c.serialize( chunkBuilder , version );
-        BSONObj chunkCmd = chunkBuilder.obj();
+        Chunk c(this, _key.globalMin(), _key.globalMax(), shard);
 
-        log() << "about to create first chunk for: " << _ns << endl;
+        vector<BSONObj> splitPoints;
+        if ( numObjects > 0 )
+            c.pickSplitVector( splitPoints , Chunk::MaxChunkSize );
+        
+        log() << "going to create " << splitPoints.size() + 1 << " chunk(s) for: " << _ns << endl;
+        
 
-        ScopedDbConnection conn( configServer.modelServer() );
-        BSONObj res;
-        conn->update( Chunk::chunkMetadataNS, QUERY( "_id" << c.genID() ), chunkCmd,  true, false );
+        ScopedDbConnection conn( configServer.modelServer() );        
+
+        for ( unsigned i=0; i<=splitPoints.size(); i++ ) {
+            BSONObj min = i == 0 ? _key.globalMin() : splitPoints[i-1];
+            BSONObj max = i < splitPoints.size() ? splitPoints[i] : _key.globalMax();
+            
+            Chunk temp( this , min , max , shard );
+        
+            BSONObjBuilder chunkBuilder;
+            temp.serialize( chunkBuilder , version );
+            BSONObj chunkObj = chunkBuilder.obj();
+        
+            conn->update( Chunk::chunkMetadataNS, QUERY( "_id" << temp.genID() ), chunkObj,  true, false );
+
+            version.incMinor();
+        }
 
         string errmsg = conn->getLastError();
         if ( errmsg.size() ) {
-            stringstream ss;
-            ss << "saving first chunk failed.  cmd: " << chunkCmd << " result: " << errmsg;
-            log( LL_ERROR ) << ss.str() << endl;
-            msgasserted( 13592 , ss.str() );
+            string ss = str::stream() << "creating first chunks failed. result: " << errmsg;
+            error() << ss << endl;
+            msgasserted( 15903 , ss );
         }
-
+        
         conn.done();
 
-        // the ensure index will have the (desired) indirect effect of creating the collection on the
-        // assigned shard, as it sets up the index over the sharding keys.
-        ScopedDbConnection shardConn( c.getShard().getConnString() );
-        shardConn->ensureIndex( getns() , getShardKey().key() , _unique , "" , false /* do not cache ensureIndex SERVER-1691 */ );
-        shardConn.done();
+        if ( numObjects == 0 ) {
+            // the ensure index will have the (desired) indirect effect of creating the collection on the
+            // assigned shard, as it sets up the index over the sharding keys.
+            ScopedDbConnection shardConn( c.getShard().getConnString() );
+            shardConn->ensureIndex( getns() , getShardKey().key() , _unique , "" , false ); // do not cache ensureIndex SERVER-1691 
+            shardConn.done();
+        }
 
-        log() << "successfully created first chunk for " << c.toString() << endl;
     }
 
     ChunkPtr ChunkManager::findChunk( const BSONObj & obj ) const {
@@ -836,26 +872,6 @@ namespace mongo {
         configServer.logChange( "dropCollection" , _ns , BSONObj() );
     }
 
-    void ChunkManager::maybeChunkCollection() const {
-        uassert( 13346 , "can't pre-split already splitted collection" , (_chunkMap.size() == 1) );
-
-        ChunkPtr soleChunk = _chunkMap.begin()->second;
-        vector<BSONObj> splitPoints;
-        soleChunk->pickSplitVector( splitPoints , Chunk::MaxChunkSize );
-        if ( splitPoints.empty() ) {
-            LOG(1) << "not enough data to warrant chunking " << getns() << endl;
-            return;
-        }
-
-        BSONObj res;
-        ChunkPtr p;
-        bool worked = soleChunk->multiSplit( splitPoints , res );
-        if (!worked) {
-            log( LL_WARNING ) << "could not split '" << getns() << "': " << res << endl;
-            return;
-        }
-    }
-
     ShardChunkVersion ChunkManager::getVersion( const Shard& shard ) const {
         ShardVersionMap::const_iterator i = _shardVersions.find( shard );
         if ( i == _shardVersions.end() )
@@ -956,7 +972,13 @@ namespace mongo {
 
         int nc = numChunks();
 
-        if ( nc < 10 ) {
+        if ( nc <= 1 ) {
+            return 1024;
+        }
+        else if ( nc < 3 ) {
+            return minChunkSize / 2;
+        }
+        else if ( nc < 10 ) {
             splitThreshold = max( splitThreshold / 4 , minChunkSize );
         }
         else if ( nc < 20 ) {