summaryrefslogtreecommitdiff
path: root/s/d_split.cpp
diff options
context:
space:
mode:
Diffstat (limited to 's/d_split.cpp')
-rw-r--r--s/d_split.cpp197
1 files changed, 197 insertions, 0 deletions
diff --git a/s/d_split.cpp b/s/d_split.cpp
new file mode 100644
index 0000000..7ae8384
--- /dev/null
+++ b/s/d_split.cpp
@@ -0,0 +1,197 @@
+// d_split.cpp
+
+/**
+* Copyright (C) 2008 10gen Inc.
+*
+* This program is free software: you can redistribute it and/or modify
+* it under the terms of the GNU Affero General Public License, version 3,
+* as published by the Free Software Foundation.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU Affero General Public License for more details.
+*
+* You should have received a copy of the GNU Affero General Public License
+* along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#include "pch.h"
+#include <map>
+#include <string>
+
+#include "../db/btree.h"
+#include "../db/commands.h"
+#include "../db/dbmessage.h"
+#include "../db/jsobj.h"
+#include "../db/query.h"
+#include "../db/queryoptimizer.h"
+
+namespace mongo {
+
+ // TODO: Fold these checks into each command.
+ static IndexDetails *cmdIndexDetailsForRange( const char *ns, string &errmsg, BSONObj &min, BSONObj &max, BSONObj &keyPattern ) {
+ if ( ns[ 0 ] == '\0' || min.isEmpty() || max.isEmpty() ) {
+ errmsg = "invalid command syntax (note: min and max are required)";
+ return 0;
+ }
+ return indexDetailsForRange( ns, errmsg, min, max, keyPattern );
+ }
+
+
+ class CmdMedianKey : public Command {
+ public:
+ CmdMedianKey() : Command( "medianKey" ) {}
+ virtual bool slaveOk() const { return true; }
+ virtual LockType locktype() const { return READ; }
+ virtual void help( stringstream &help ) const {
+ help <<
+ "Internal command.\n"
+ "example: { medianKey:\"blog.posts\", keyPattern:{x:1}, min:{x:10}, max:{x:55} }\n"
+ "NOTE: This command may take a while to run";
+ }
+ bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){
+ const char *ns = jsobj.getStringField( "medianKey" );
+ BSONObj min = jsobj.getObjectField( "min" );
+ BSONObj max = jsobj.getObjectField( "max" );
+ BSONObj keyPattern = jsobj.getObjectField( "keyPattern" );
+
+ Client::Context ctx( ns );
+
+ IndexDetails *id = cmdIndexDetailsForRange( ns, errmsg, min, max, keyPattern );
+ if ( id == 0 )
+ return false;
+
+ Timer timer;
+ int num = 0;
+ NamespaceDetails *d = nsdetails(ns);
+ int idxNo = d->idxNo(*id);
+ for( BtreeCursor c( d, idxNo, *id, min, max, false, 1 ); c.ok(); c.advance(), ++num );
+ num /= 2;
+ BtreeCursor c( d, idxNo, *id, min, max, false, 1 );
+ for( ; num; c.advance(), --num );
+
+ ostringstream os;
+ os << "Finding median for index: " << keyPattern << " between " << min << " and " << max;
+ logIfSlow( timer , os.str() );
+
+ if ( !c.ok() ) {
+ errmsg = "no index entries in the specified range";
+ return false;
+ }
+
+ BSONObj median = c.prettyKey( c.currKey() );
+ result.append( "median", median );
+
+ int x = median.woCompare( min , BSONObj() , false );
+ int y = median.woCompare( max , BSONObj() , false );
+ if ( x == 0 || y == 0 ){
+ // its on an edge, ok
+ }
+ else if ( x < 0 && y < 0 ){
+ log( LL_ERROR ) << "median error (1) min: " << min << " max: " << max << " median: " << median << endl;
+ errmsg = "median error 1";
+ return false;
+ }
+ else if ( x > 0 && y > 0 ){
+ log( LL_ERROR ) << "median error (2) min: " << min << " max: " << max << " median: " << median << endl;
+ errmsg = "median error 2";
+ return false;
+ }
+
+ return true;
+ }
+ } cmdMedianKey;
+
+ class SplitVector : public Command {
+ public:
+ SplitVector() : Command( "splitVector" , false ){}
+ virtual bool slaveOk() const { return false; }
+ virtual LockType locktype() const { return READ; }
+ virtual void help( stringstream &help ) const {
+ help <<
+ "Internal command.\n"
+ "example: { splitVector : \"myLargeCollection\" , keyPattern : {x:1} , maxChunkSize : 200 }\n"
+ "maxChunkSize unit in MBs\n"
+ "NOTE: This command may take a while to run";
+ }
+ bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ){
+ const char* ns = jsobj.getStringField( "splitVector" );
+ BSONObj keyPattern = jsobj.getObjectField( "keyPattern" );
+
+ long long maxChunkSize = 0;
+ BSONElement maxSizeElem = jsobj[ "maxChunkSize" ];
+ if ( ! maxSizeElem.eoo() ){
+ maxChunkSize = maxSizeElem.numberLong() * 1<<20;
+ } else {
+ errmsg = "need to specify the desired max chunk size";
+ return false;
+ }
+
+ Client::Context ctx( ns );
+
+ BSONObjBuilder minBuilder;
+ BSONObjBuilder maxBuilder;
+ BSONForEach(key, keyPattern){
+ minBuilder.appendMinKey( key.fieldName() );
+ maxBuilder.appendMaxKey( key.fieldName() );
+ }
+ BSONObj min = minBuilder.obj();
+ BSONObj max = maxBuilder.obj();
+
+ IndexDetails *idx = cmdIndexDetailsForRange( ns , errmsg , min , max , keyPattern );
+ if ( idx == NULL ){
+ errmsg = "couldn't find index over splitting key";
+ return false;
+ }
+
+ NamespaceDetails *d = nsdetails( ns );
+ BtreeCursor c( d , d->idxNo(*idx) , *idx , min , max , false , 1 );
+
+ // We'll use the average object size and number of object to find approximately how many keys
+ // each chunk should have. We'll split a little smaller than the specificied by 'maxSize'
+ // assuming a recently sharded collectio is still going to grow.
+
+ const long long dataSize = d->datasize;
+ const long long recCount = d->nrecords;
+ long long keyCount = 0;
+ if (( dataSize > 0 ) && ( recCount > 0 )){
+ const long long avgRecSize = dataSize / recCount;
+ keyCount = 90 * maxChunkSize / (100 * avgRecSize);
+ }
+
+ // We traverse the index and add the keyCount-th key to the result vector. If that key
+ // appeared in the vector before, we omit it. The assumption here is that all the
+ // instances of a key value live in the same chunk.
+
+ Timer timer;
+ long long currCount = 0;
+ vector<BSONObj> splitKeys;
+ BSONObj currKey;
+ while ( c.ok() ){
+ currCount++;
+ if ( currCount > keyCount ){
+ if ( ! currKey.isEmpty() && (currKey.woCompare( c.currKey() ) == 0 ) )
+ continue;
+
+ currKey = c.currKey();
+ splitKeys.push_back( c.prettyKey( currKey ) );
+ currCount = 0;
+ }
+ c.advance();
+ }
+
+ ostringstream os;
+ os << "Finding the split vector for " << ns << " over "<< keyPattern;
+ logIfSlow( timer , os.str() );
+
+ // Warning: we are sending back an array of keys but are currently limited to
+ // 4MB work of 'result' size. This should be okay for now.
+
+ result.append( "splitKeys" , splitKeys );
+ return true;
+
+ }
+ } cmdSplitVector;
+
+} // namespace mongo