diff options
Diffstat (limited to 'db/queryoptimizer.cpp')
-rw-r--r-- | db/queryoptimizer.cpp | 657 |
1 files changed, 351 insertions, 306 deletions
diff --git a/db/queryoptimizer.cpp b/db/queryoptimizer.cpp index e7068c2..0b9dce7 100644 --- a/db/queryoptimizer.cpp +++ b/db/queryoptimizer.cpp @@ -24,24 +24,25 @@ #include "queryoptimizer.h" #include "cmdline.h" #include "clientcursor.h" +#include <queue> //#define DEBUGQO(x) cout << x << endl; #define DEBUGQO(x) namespace mongo { - void checkTableScanAllowed( const char * ns ){ - if ( ! cmdLine.notablescan ) + void checkTableScanAllowed( const char * ns ) { + if ( ! cmdLine.noTableScan ) return; - + if ( strstr( ns , ".system." ) || - strstr( ns , "local." ) ) + strstr( ns , "local." ) ) return; - + if ( ! nsdetails( ns ) ) return; - uassert( 10111 , (string)"table scans not allowed:" + ns , ! cmdLine.notablescan ); + uassert( 10111 , (string)"table scans not allowed:" + ns , ! cmdLine.noTableScan ); } double elementDirection( const BSONElement &e ) { @@ -49,58 +50,59 @@ namespace mongo { return e.number(); return 1; } - - QueryPlan::QueryPlan( - NamespaceDetails *_d, int _idxNo, - const FieldRangeSet &fbs, const BSONObj &originalQuery, const BSONObj &order, const BSONObj &startKey, const BSONObj &endKey , string special ) : - d(_d), idxNo(_idxNo), - fbs_( fbs ), - _originalQuery( originalQuery ), - order_( order ), - index_( 0 ), - optimal_( false ), - scanAndOrderRequired_( true ), - exactKeyMatch_( false ), - direction_( 0 ), - endKeyInclusive_( endKey.isEmpty() ), - unhelpful_( false ), - _special( special ), - _type(0), - _startOrEndSpec( !startKey.isEmpty() || !endKey.isEmpty() ){ - - if ( !fbs_.matchPossible() ) { - unhelpful_ = true; - scanAndOrderRequired_ = false; + + QueryPlan::QueryPlan( + NamespaceDetails *d, int idxNo, + const FieldRangeSet &fbs, const FieldRangeSet &originalFrs, const BSONObj &originalQuery, const BSONObj &order, const BSONObj &startKey, const BSONObj &endKey , string special ) : + _d(d), _idxNo(idxNo), + _fbs( fbs ), + _originalQuery( originalQuery ), + _order( order ), + _index( 0 ), + _optimal( false ), + _scanAndOrderRequired( true ), + _exactKeyMatch( false ), + _direction( 0 ), + _endKeyInclusive( endKey.isEmpty() ), + _unhelpful( false ), + _special( special ), + _type(0), + _startOrEndSpec( !startKey.isEmpty() || !endKey.isEmpty() ) { + + if ( !_fbs.matchPossible() ) { + _unhelpful = true; + _scanAndOrderRequired = false; return; } - if( idxNo >= 0 ) { - index_ = &d->idx(idxNo); - } else { + if( _idxNo >= 0 ) { + _index = &d->idx(_idxNo); + } + else { // full table scan case - if ( order_.isEmpty() || !strcmp( order_.firstElement().fieldName(), "$natural" ) ) - scanAndOrderRequired_ = false; + if ( _order.isEmpty() || !strcmp( _order.firstElement().fieldName(), "$natural" ) ) + _scanAndOrderRequired = false; return; } - if ( _special.size() ){ - optimal_ = true; - _type = index_->getSpec().getType(); + if ( _special.size() ) { + _optimal = true; + _type = _index->getSpec().getType(); massert( 13040 , (string)"no type for special: " + _special , _type ); // hopefully safe to use original query in these contexts - don't think we can mix special with $or clause separation yet - scanAndOrderRequired_ = _type->scanAndOrderRequired( _originalQuery , order ); + _scanAndOrderRequired = _type->scanAndOrderRequired( _originalQuery , order ); return; } - BSONObj idxKey = index_->keyPattern(); + BSONObj idxKey = _index->keyPattern(); BSONObjIterator o( order ); BSONObjIterator k( idxKey ); if ( !o.moreWithEOO() ) - scanAndOrderRequired_ = false; + _scanAndOrderRequired = false; while( o.moreWithEOO() ) { BSONElement oe = o.next(); if ( oe.eoo() ) { - scanAndOrderRequired_ = false; + _scanAndOrderRequired = false; break; } if ( !k.moreWithEOO() ) @@ -116,14 +118,14 @@ namespace mongo { goto doneCheckOrder; } int d = elementDirection( oe ) == elementDirection( ke ) ? 1 : -1; - if ( direction_ == 0 ) - direction_ = d; - else if ( direction_ != d ) + if ( _direction == 0 ) + _direction = d; + else if ( _direction != d ) break; } - doneCheckOrder: - if ( scanAndOrderRequired_ ) - direction_ = 0; +doneCheckOrder: + if ( _scanAndOrderRequired ) + _direction = 0; BSONObjIterator i( idxKey ); int exactIndexedQueryCount = 0; int optimalIndexedQueryCount = 0; @@ -140,7 +142,8 @@ namespace mongo { ++optimalIndexedQueryCount; if ( !fb.equality() ) stillOptimalIndexedQueryCount = false; - } else { + } + else { if ( fb.nontrivial() ) optimalIndexedQueryCount = -1; } @@ -151,16 +154,17 @@ namespace mongo { } orderFieldsUnindexed.erase( e.fieldName() ); } - if ( !scanAndOrderRequired_ && - ( optimalIndexedQueryCount == fbs.nNontrivialRanges() ) ) - optimal_ = true; + if ( !_scanAndOrderRequired && + ( optimalIndexedQueryCount == fbs.nNontrivialRanges() ) ) + _optimal = true; if ( exactIndexedQueryCount == fbs.nNontrivialRanges() && - orderFieldsUnindexed.size() == 0 && - exactIndexedQueryCount == index_->keyPattern().nFields() && - exactIndexedQueryCount == _originalQuery.nFields() ) { - exactKeyMatch_ = true; + orderFieldsUnindexed.size() == 0 && + exactIndexedQueryCount == _index->keyPattern().nFields() && + exactIndexedQueryCount == _originalQuery.nFields() ) { + _exactKeyMatch = true; } - _frv.reset( new FieldRangeVector( fbs, idxKey, direction_ ) ); + _frv.reset( new FieldRangeVector( fbs, idxKey, _direction ) ); + _originalFrv.reset( new FieldRangeVector( originalFrs, idxKey, _direction ) ); if ( _startOrEndSpec ) { BSONObj newStart, newEnd; if ( !startKey.isEmpty() ) @@ -173,100 +177,124 @@ namespace mongo { _endKey = _frv->endKey(); } - if ( ( scanAndOrderRequired_ || order_.isEmpty() ) && - !fbs.range( idxKey.firstElement().fieldName() ).nontrivial() ) { - unhelpful_ = true; + if ( ( _scanAndOrderRequired || _order.isEmpty() ) && + !fbs.range( idxKey.firstElement().fieldName() ).nontrivial() ) { + _unhelpful = true; } } - + shared_ptr<Cursor> QueryPlan::newCursor( const DiskLoc &startLoc , int numWanted ) const { if ( _type ) { - // hopefully safe to use original query in these contexts - don't think we can mix type with $or clause separation yet - return _type->newCursor( _originalQuery , order_ , numWanted ); + // hopefully safe to use original query in these contexts - don't think we can mix type with $or clause separation yet + return _type->newCursor( _originalQuery , _order , numWanted ); } - - if ( !fbs_.matchPossible() ){ - if ( fbs_.nNontrivialRanges() ) - checkTableScanAllowed( fbs_.ns() ); + + if ( !_fbs.matchPossible() ) { + if ( _fbs.nNontrivialRanges() ) + checkTableScanAllowed( _fbs.ns() ); return shared_ptr<Cursor>( new BasicCursor( DiskLoc() ) ); } - if ( !index_ ){ - if ( fbs_.nNontrivialRanges() ) - checkTableScanAllowed( fbs_.ns() ); - return findTableScan( fbs_.ns(), order_, startLoc ); + if ( !_index ) { + if ( _fbs.nNontrivialRanges() ) + checkTableScanAllowed( _fbs.ns() ); + return findTableScan( _fbs.ns(), _order, startLoc ); } massert( 10363 , "newCursor() with start location not implemented for indexed plans", startLoc.isNull() ); - + if ( _startOrEndSpec ) { - // we are sure to spec endKeyInclusive_ - return shared_ptr<Cursor>( new BtreeCursor( d, idxNo, *index_, _startKey, _endKey, endKeyInclusive_, direction_ >= 0 ? 1 : -1 ) ); - } else if ( index_->getSpec().getType() ) { - return shared_ptr<Cursor>( new BtreeCursor( d, idxNo, *index_, _frv->startKey(), _frv->endKey(), true, direction_ >= 0 ? 1 : -1 ) ); - } else { - return shared_ptr<Cursor>( new BtreeCursor( d, idxNo, *index_, _frv, direction_ >= 0 ? 1 : -1 ) ); + // we are sure to spec _endKeyInclusive + return shared_ptr<Cursor>( new BtreeCursor( _d, _idxNo, *_index, _startKey, _endKey, _endKeyInclusive, _direction >= 0 ? 1 : -1 ) ); + } + else if ( _index->getSpec().getType() ) { + return shared_ptr<Cursor>( new BtreeCursor( _d, _idxNo, *_index, _frv->startKey(), _frv->endKey(), true, _direction >= 0 ? 1 : -1 ) ); + } + else { + return shared_ptr<Cursor>( new BtreeCursor( _d, _idxNo, *_index, _frv, _direction >= 0 ? 1 : -1 ) ); } } - + shared_ptr<Cursor> QueryPlan::newReverseCursor() const { - if ( !fbs_.matchPossible() ) + if ( !_fbs.matchPossible() ) return shared_ptr<Cursor>( new BasicCursor( DiskLoc() ) ); - if ( !index_ ) { - int orderSpec = order_.getIntField( "$natural" ); + if ( !_index ) { + int orderSpec = _order.getIntField( "$natural" ); if ( orderSpec == INT_MIN ) orderSpec = 1; - return findTableScan( fbs_.ns(), BSON( "$natural" << -orderSpec ) ); + return findTableScan( _fbs.ns(), BSON( "$natural" << -orderSpec ) ); } massert( 10364 , "newReverseCursor() not implemented for indexed plans", false ); return shared_ptr<Cursor>(); } - + BSONObj QueryPlan::indexKey() const { - if ( !index_ ) + if ( !_index ) return BSON( "$natural" << 1 ); - return index_->keyPattern(); + return _index->keyPattern(); } - + void QueryPlan::registerSelf( long long nScanned ) const { - if ( fbs_.matchPossible() ) { + if ( _fbs.matchPossible() ) { scoped_lock lk(NamespaceDetailsTransient::_qcMutex); - NamespaceDetailsTransient::get_inlock( ns() ).registerIndexForPattern( fbs_.pattern( order_ ), indexKey(), nScanned ); - } - } - - QueryPlanSet::QueryPlanSet( const char *_ns, auto_ptr< FieldRangeSet > frs, const BSONObj &originalQuery, const BSONObj &order, const BSONElement *hint, bool honorRecordedPlan, const BSONObj &min, const BSONObj &max, bool bestGuessOnly, bool mayYield ) : - ns(_ns), - _originalQuery( originalQuery ), - fbs_( frs ), - mayRecordPlan_( true ), - usingPrerecordedPlan_( false ), - hint_( BSONObj() ), - order_( order.getOwned() ), - oldNScanned_( 0 ), - honorRecordedPlan_( honorRecordedPlan ), - min_( min.getOwned() ), - max_( max.getOwned() ), - _bestGuessOnly( bestGuessOnly ), - _mayYield( mayYield ), - _yieldSometimesTracker( 256, 20 ){ + NamespaceDetailsTransient::get_inlock( ns() ).registerIndexForPattern( _fbs.pattern( _order ), indexKey(), nScanned ); + } + } + + bool QueryPlan::isMultiKey() const { + if ( _idxNo < 0 ) + return false; + return _d->isMultikey( _idxNo ); + } + + QueryPlanSet::QueryPlanSet( const char *ns, auto_ptr< FieldRangeSet > frs, auto_ptr< FieldRangeSet > originalFrs, const BSONObj &originalQuery, const BSONObj &order, const BSONElement *hint, bool honorRecordedPlan, const BSONObj &min, const BSONObj &max, bool bestGuessOnly, bool mayYield ) : + _ns(ns), + _originalQuery( originalQuery ), + _fbs( frs ), + _originalFrs( originalFrs ), + _mayRecordPlan( true ), + _usingPrerecordedPlan( false ), + _hint( BSONObj() ), + _order( order.getOwned() ), + _oldNScanned( 0 ), + _honorRecordedPlan( honorRecordedPlan ), + _min( min.getOwned() ), + _max( max.getOwned() ), + _bestGuessOnly( bestGuessOnly ), + _mayYield( mayYield ), + _yieldSometimesTracker( 256, 20 ) { if ( hint && !hint->eoo() ) { - hint_ = hint->wrap(); + _hint = hint->wrap(); } init(); } - + + bool QueryPlanSet::modifiedKeys() const { + for( PlanSet::const_iterator i = _plans.begin(); i != _plans.end(); ++i ) + if ( (*i)->isMultiKey() ) + return true; + return false; + } + + bool QueryPlanSet::hasMultiKey() const { + for( PlanSet::const_iterator i = _plans.begin(); i != _plans.end(); ++i ) + if ( (*i)->isMultiKey() ) + return true; + return false; + } + + void QueryPlanSet::addHint( IndexDetails &id ) { - if ( !min_.isEmpty() || !max_.isEmpty() ) { + if ( !_min.isEmpty() || !_max.isEmpty() ) { string errmsg; BSONObj keyPattern = id.keyPattern(); - // This reformats min_ and max_ to be used for index lookup. - massert( 10365 , errmsg, indexDetailsForRange( fbs_->ns(), errmsg, min_, max_, keyPattern ) ); + // This reformats _min and _max to be used for index lookup. + massert( 10365 , errmsg, indexDetailsForRange( _fbs->ns(), errmsg, _min, _max, keyPattern ) ); } - NamespaceDetails *d = nsdetails(ns); - plans_.push_back( PlanPtr( new QueryPlan( d, d->idxNo(id), *fbs_, _originalQuery, order_, min_, max_ ) ) ); + NamespaceDetails *d = nsdetails(_ns); + _plans.push_back( QueryPlanPtr( new QueryPlan( d, d->idxNo(id), *_fbs, *_originalFrs, _originalQuery, _order, _min, _max ) ) ); } - + // returns an IndexDetails * for a hint, 0 if hint is $natural. // hint must not be eoo() IndexDetails *parseHint( const BSONElement &hint, NamespaceDetails *d ) { @@ -281,7 +309,7 @@ namespace mongo { } } } - else if( hint.type() == Object ) { + else if( hint.type() == Object ) { BSONObj hintobj = hint.embeddedObject(); uassert( 10112 , "bad hint", !hintobj.isEmpty() ); if ( !strcmp( hintobj.firstElement().fieldName(), "$natural" ) ) { @@ -294,92 +322,93 @@ namespace mongo { return ⅈ } } - } + } uassert( 10113 , "bad hint", false ); return 0; } - + void QueryPlanSet::init() { DEBUGQO( "QueryPlanSet::init " << ns << "\t" << _originalQuery ); - plans_.clear(); - mayRecordPlan_ = true; - usingPrerecordedPlan_ = false; - - const char *ns = fbs_->ns(); + _plans.clear(); + _mayRecordPlan = true; + _usingPrerecordedPlan = false; + + const char *ns = _fbs->ns(); NamespaceDetails *d = nsdetails( ns ); - if ( !d || !fbs_->matchPossible() ) { + if ( !d || !_fbs->matchPossible() ) { // Table scan plan, when no matches are possible - plans_.push_back( PlanPtr( new QueryPlan( d, -1, *fbs_, _originalQuery, order_ ) ) ); + _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_fbs, *_originalFrs, _originalQuery, _order ) ) ); return; } - - BSONElement hint = hint_.firstElement(); + + BSONElement hint = _hint.firstElement(); if ( !hint.eoo() ) { - mayRecordPlan_ = false; + _mayRecordPlan = false; IndexDetails *id = parseHint( hint, d ); if ( id ) { addHint( *id ); - } else { - massert( 10366 , "natural order cannot be specified with $min/$max", min_.isEmpty() && max_.isEmpty() ); + } + else { + massert( 10366 , "natural order cannot be specified with $min/$max", _min.isEmpty() && _max.isEmpty() ); // Table scan plan - plans_.push_back( PlanPtr( new QueryPlan( d, -1, *fbs_, _originalQuery, order_ ) ) ); + _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_fbs, *_originalFrs, _originalQuery, _order ) ) ); } return; } - - if ( !min_.isEmpty() || !max_.isEmpty() ) { + + if ( !_min.isEmpty() || !_max.isEmpty() ) { string errmsg; BSONObj keyPattern; - IndexDetails *idx = indexDetailsForRange( ns, errmsg, min_, max_, keyPattern ); + IndexDetails *idx = indexDetailsForRange( ns, errmsg, _min, _max, keyPattern ); massert( 10367 , errmsg, idx ); - plans_.push_back( PlanPtr( new QueryPlan( d, d->idxNo(*idx), *fbs_, _originalQuery, order_, min_, max_ ) ) ); + _plans.push_back( QueryPlanPtr( new QueryPlan( d, d->idxNo(*idx), *_fbs, *_originalFrs, _originalQuery, _order, _min, _max ) ) ); return; } - if ( isSimpleIdQuery( _originalQuery ) ){ + if ( isSimpleIdQuery( _originalQuery ) ) { int idx = d->findIdIndex(); - if ( idx >= 0 ){ - usingPrerecordedPlan_ = true; - mayRecordPlan_ = false; - plans_.push_back( PlanPtr( new QueryPlan( d , idx , *fbs_ , _originalQuery, order_ ) ) ); + if ( idx >= 0 ) { + _usingPrerecordedPlan = true; + _mayRecordPlan = false; + _plans.push_back( QueryPlanPtr( new QueryPlan( d , idx , *_fbs , *_fbs , _originalQuery, _order ) ) ); return; } } - if ( _originalQuery.isEmpty() && order_.isEmpty() ){ - plans_.push_back( PlanPtr( new QueryPlan( d, -1, *fbs_, _originalQuery, order_ ) ) ); + if ( _originalQuery.isEmpty() && _order.isEmpty() ) { + _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_fbs, *_originalFrs, _originalQuery, _order ) ) ); return; } - DEBUGQO( "\t special : " << fbs_->getSpecial() ); - if ( fbs_->getSpecial().size() ){ - _special = fbs_->getSpecial(); + DEBUGQO( "\t special : " << _fbs->getSpecial() ); + if ( _fbs->getSpecial().size() ) { + _special = _fbs->getSpecial(); NamespaceDetails::IndexIterator i = d->ii(); while( i.more() ) { int j = i.pos(); IndexDetails& ii = i.next(); const IndexSpec& spec = ii.getSpec(); - if ( spec.getTypeName() == _special && spec.suitability( _originalQuery , order_ ) ){ - usingPrerecordedPlan_ = true; - mayRecordPlan_ = false; - plans_.push_back( PlanPtr( new QueryPlan( d , j , *fbs_ , _originalQuery, order_ , - BSONObj() , BSONObj() , _special ) ) ); + if ( spec.getTypeName() == _special && spec.suitability( _originalQuery , _order ) ) { + _usingPrerecordedPlan = true; + _mayRecordPlan = false; + _plans.push_back( QueryPlanPtr( new QueryPlan( d , j , *_fbs , *_fbs , _originalQuery, _order , + BSONObj() , BSONObj() , _special ) ) ); return; } } uassert( 13038 , (string)"can't find special index: " + _special + " for: " + _originalQuery.toString() , 0 ); } - if ( honorRecordedPlan_ ) { + if ( _honorRecordedPlan ) { scoped_lock lk(NamespaceDetailsTransient::_qcMutex); NamespaceDetailsTransient& nsd = NamespaceDetailsTransient::get_inlock( ns ); - BSONObj bestIndex = nsd.indexForPattern( fbs_->pattern( order_ ) ); + BSONObj bestIndex = nsd.indexForPattern( _fbs->pattern( _order ) ); if ( !bestIndex.isEmpty() ) { - PlanPtr p; - oldNScanned_ = nsd.nScannedForPattern( fbs_->pattern( order_ ) ); + QueryPlanPtr p; + _oldNScanned = nsd.nScannedForPattern( _fbs->pattern( _order ) ); if ( !strcmp( bestIndex.firstElement().fieldName(), "$natural" ) ) { // Table scan plan - p.reset( new QueryPlan( d, -1, *fbs_, _originalQuery, order_ ) ); + p.reset( new QueryPlan( d, -1, *_fbs, *_originalFrs, _originalQuery, _order ) ); } NamespaceDetails::IndexIterator i = d->ii(); @@ -387,55 +416,56 @@ namespace mongo { int j = i.pos(); IndexDetails& ii = i.next(); if( ii.keyPattern().woCompare(bestIndex) == 0 ) { - p.reset( new QueryPlan( d, j, *fbs_, _originalQuery, order_ ) ); + p.reset( new QueryPlan( d, j, *_fbs, *_originalFrs, _originalQuery, _order ) ); } } massert( 10368 , "Unable to locate previously recorded index", p.get() ); if ( !( _bestGuessOnly && p->scanAndOrderRequired() ) ) { - usingPrerecordedPlan_ = true; - mayRecordPlan_ = false; - plans_.push_back( p ); + _usingPrerecordedPlan = true; + _mayRecordPlan = false; + _plans.push_back( p ); return; } } } - + addOtherPlans( false ); } - + void QueryPlanSet::addOtherPlans( bool checkFirst ) { - const char *ns = fbs_->ns(); + const char *ns = _fbs->ns(); NamespaceDetails *d = nsdetails( ns ); if ( !d ) return; // If table scan is optimal or natural order requested or tailable cursor requested - if ( !fbs_->matchPossible() || ( fbs_->nNontrivialRanges() == 0 && order_.isEmpty() ) || - ( !order_.isEmpty() && !strcmp( order_.firstElement().fieldName(), "$natural" ) ) ) { + if ( !_fbs->matchPossible() || ( _fbs->nNontrivialRanges() == 0 && _order.isEmpty() ) || + ( !_order.isEmpty() && !strcmp( _order.firstElement().fieldName(), "$natural" ) ) ) { // Table scan plan - addPlan( PlanPtr( new QueryPlan( d, -1, *fbs_, _originalQuery, order_ ) ), checkFirst ); + addPlan( QueryPlanPtr( new QueryPlan( d, -1, *_fbs, *_originalFrs, _originalQuery, _order ) ), checkFirst ); return; } - - bool normalQuery = hint_.isEmpty() && min_.isEmpty() && max_.isEmpty(); + + bool normalQuery = _hint.isEmpty() && _min.isEmpty() && _max.isEmpty(); PlanSet plans; for( int i = 0; i < d->nIndexes; ++i ) { IndexDetails& id = d->idx(i); const IndexSpec& spec = id.getSpec(); IndexSuitability suitability = HELPFUL; - if ( normalQuery ){ - suitability = spec.suitability( fbs_->simplifiedQuery() , order_ ); + if ( normalQuery ) { + suitability = spec.suitability( _fbs->simplifiedQuery() , _order ); if ( suitability == USELESS ) continue; } - PlanPtr p( new QueryPlan( d, i, *fbs_, _originalQuery, order_ ) ); + QueryPlanPtr p( new QueryPlan( d, i, *_fbs, *_originalFrs, _originalQuery, _order ) ); if ( p->optimal() ) { addPlan( p, checkFirst ); return; - } else if ( !p->unhelpful() ) { + } + else if ( !p->unhelpful() ) { plans.push_back( p ); } } @@ -443,29 +473,29 @@ namespace mongo { addPlan( *i, checkFirst ); // Table scan plan - addPlan( PlanPtr( new QueryPlan( d, -1, *fbs_, _originalQuery, order_ ) ), checkFirst ); + addPlan( QueryPlanPtr( new QueryPlan( d, -1, *_fbs, *_originalFrs, _originalQuery, _order ) ), checkFirst ); } - + shared_ptr< QueryOp > QueryPlanSet::runOp( QueryOp &op ) { - if ( usingPrerecordedPlan_ ) { + if ( _usingPrerecordedPlan ) { Runner r( *this, op ); shared_ptr< QueryOp > res = r.run(); - // plans_.size() > 1 if addOtherPlans was called in Runner::run(). - if ( _bestGuessOnly || res->complete() || plans_.size() > 1 ) + // _plans.size() > 1 if addOtherPlans was called in Runner::run(). + if ( _bestGuessOnly || res->complete() || _plans.size() > 1 ) return res; { scoped_lock lk(NamespaceDetailsTransient::_qcMutex); - NamespaceDetailsTransient::get_inlock( fbs_->ns() ).registerIndexForPattern( fbs_->pattern( order_ ), BSONObj(), 0 ); + NamespaceDetailsTransient::get_inlock( _fbs->ns() ).registerIndexForPattern( _fbs->pattern( _order ), BSONObj(), 0 ); } init(); } Runner r( *this, op ); return r.run(); } - + BSONObj QueryPlanSet::explain() const { vector< BSONObj > arr; - for( PlanSet::const_iterator i = plans_.begin(); i != plans_.end(); ++i ) { + for( PlanSet::const_iterator i = _plans.begin(); i != _plans.end(); ++i ) { shared_ptr<Cursor> c = (*i)->newCursor(); BSONObjBuilder explain; explain.append( "cursor", c->toString() ); @@ -477,37 +507,37 @@ namespace mongo { return b.obj(); } - QueryPlanSet::PlanPtr QueryPlanSet::getBestGuess() const { - assert( plans_.size() ); - if ( plans_[ 0 ]->scanAndOrderRequired() ){ - for ( unsigned i=1; i<plans_.size(); i++ ){ - if ( ! plans_[i]->scanAndOrderRequired() ) - return plans_[i]; + QueryPlanSet::QueryPlanPtr QueryPlanSet::getBestGuess() const { + assert( _plans.size() ); + if ( _plans[ 0 ]->scanAndOrderRequired() ) { + for ( unsigned i=1; i<_plans.size(); i++ ) { + if ( ! _plans[i]->scanAndOrderRequired() ) + return _plans[i]; } - + stringstream ss; ss << "best guess plan requested, but scan and order required:"; - ss << " query: " << fbs_->simplifiedQuery(); - ss << " order: " << order_; + ss << " query: " << _fbs->simplifiedQuery(); + ss << " order: " << _order; ss << " choices: "; - for ( unsigned i=0; i<plans_.size(); i++ ){ - ss << plans_[i]->indexKey() << " "; + for ( unsigned i=0; i<_plans.size(); i++ ) { + ss << _plans[i]->indexKey() << " "; } string s = ss.str(); msgassertedNoTrace( 13284, s.c_str() ); } - return plans_[0]; + return _plans[0]; } - + QueryPlanSet::Runner::Runner( QueryPlanSet &plans, QueryOp &op ) : - op_( op ), - plans_( plans ) { + _op( op ), + _plans( plans ) { } - + void QueryPlanSet::Runner::mayYield( const vector< shared_ptr< QueryOp > > &ops ) { - if ( plans_._mayYield ) { - if ( plans_._yieldSometimesTracker.ping() ) { + if ( _plans._mayYield ) { + if ( _plans._yieldSometimesTracker.ping() ) { int micros = ClientCursor::yieldSuggest(); if ( micros > 0 ) { for( vector< shared_ptr< QueryOp > >::const_iterator i = ops.begin(); i != ops.end(); ++i ) { @@ -515,28 +545,38 @@ namespace mongo { return; } } - ClientCursor::staticYield( micros ); + ClientCursor::staticYield( micros , _plans._ns ); for( vector< shared_ptr< QueryOp > >::const_iterator i = ops.begin(); i != ops.end(); ++i ) { recoverFromYield( **i ); - } + } } } - } + } } - + + struct OpHolder { + OpHolder( const shared_ptr< QueryOp > &op ) : _op( op ), _offset() {} + shared_ptr< QueryOp > _op; + long long _offset; + bool operator<( const OpHolder &other ) const { + return _op->nscanned() + _offset > other._op->nscanned() + other._offset; + } + }; + shared_ptr< QueryOp > QueryPlanSet::Runner::run() { - massert( 10369 , "no plans", plans_.plans_.size() > 0 ); - + massert( 10369 , "no plans", _plans._plans.size() > 0 ); + vector< shared_ptr< QueryOp > > ops; - if ( plans_._bestGuessOnly ) { - shared_ptr< QueryOp > op( op_.createChild() ); - op->setQueryPlan( plans_.getBestGuess().get() ); - ops.push_back( op ); - } else { - if ( plans_.plans_.size() > 1 ) - log(1) << " running multiple plans" << endl; - for( PlanSet::iterator i = plans_.plans_.begin(); i != plans_.plans_.end(); ++i ) { - shared_ptr< QueryOp > op( op_.createChild() ); + if ( _plans._bestGuessOnly ) { + shared_ptr< QueryOp > op( _op.createChild() ); + op->setQueryPlan( _plans.getBestGuess().get() ); + ops.push_back( op ); + } + else { + if ( _plans._plans.size() > 1 ) + log(1) << " running multiple plans" << endl; + for( PlanSet::iterator i = _plans._plans.begin(); i != _plans._plans.end(); ++i ) { + shared_ptr< QueryOp > op( _op.createChild() ); op->setQueryPlan( i->get() ); ops.push_back( op ); } @@ -547,53 +587,51 @@ namespace mongo { if ( (*i)->complete() ) return *i; } - - long long nScanned = 0; - long long nScannedBackup = 0; - while( 1 ) { - ++nScanned; - unsigned errCount = 0; - bool first = true; - for( vector< shared_ptr< QueryOp > >::iterator i = ops.begin(); i != ops.end(); ++i ) { - mayYield( ops ); - QueryOp &op = **i; - nextOp( op ); - if ( op.complete() ) { - if ( first ) { - nScanned += nScannedBackup; - } - if ( plans_.mayRecordPlan_ && op.mayRecordPlan() ) { - op.qp().registerSelf( nScanned ); - } - return *i; + + std::priority_queue< OpHolder > queue; + for( vector< shared_ptr< QueryOp > >::iterator i = ops.begin(); i != ops.end(); ++i ) { + if ( !(*i)->error() ) { + queue.push( *i ); + } + } + + while( !queue.empty() ) { + mayYield( ops ); + OpHolder holder = queue.top(); + queue.pop(); + QueryOp &op = *holder._op; + nextOp( op ); + if ( op.complete() ) { + if ( _plans._mayRecordPlan && op.mayRecordPlan() ) { + op.qp().registerSelf( op.nscanned() ); } - if ( op.error() ) - ++errCount; - first = false; + return holder._op; } - if ( errCount == ops.size() ) - break; - if ( !plans_._bestGuessOnly && plans_.usingPrerecordedPlan_ && nScanned > plans_.oldNScanned_ * 10 && plans_._special.empty() ) { - plans_.addOtherPlans( true ); - PlanSet::iterator i = plans_.plans_.begin(); + if ( op.error() ) { + continue; + } + queue.push( holder ); + if ( !_plans._bestGuessOnly && _plans._usingPrerecordedPlan && op.nscanned() > _plans._oldNScanned * 10 && _plans._special.empty() ) { + holder._offset = -op.nscanned(); + _plans.addOtherPlans( true ); + PlanSet::iterator i = _plans._plans.begin(); ++i; - for( ; i != plans_.plans_.end(); ++i ) { - shared_ptr< QueryOp > op( op_.createChild() ); + for( ; i != _plans._plans.end(); ++i ) { + shared_ptr< QueryOp > op( _op.createChild() ); op->setQueryPlan( i->get() ); ops.push_back( op ); initOp( *op ); if ( op->complete() ) return op; - } - plans_.mayRecordPlan_ = true; - plans_.usingPrerecordedPlan_ = false; - nScannedBackup = nScanned; - nScanned = 0; + queue.push( op ); + } + _plans._mayRecordPlan = true; + _plans._usingPrerecordedPlan = false; } } return ops[ 0 ]; } - + #define GUARD_OP_EXCEPTION( op, expression ) \ try { \ expression; \ @@ -607,8 +645,8 @@ namespace mongo { catch ( ... ) { \ op.setException( ExceptionInfo( "Caught unknown exception" , 0 ) ); \ } - - + + void QueryPlanSet::Runner::initOp( QueryOp &op ) { GUARD_OP_EXCEPTION( op, op.init() ); } @@ -619,39 +657,39 @@ namespace mongo { bool QueryPlanSet::Runner::prepareToYield( QueryOp &op ) { GUARD_OP_EXCEPTION( op, - if ( op.error() ) { - return true; - } else { - return op.prepareToYield(); - } ); + if ( op.error() ) { + return true; + } + else { + return op.prepareToYield(); + } ); return true; } void QueryPlanSet::Runner::recoverFromYield( QueryOp &op ) { GUARD_OP_EXCEPTION( op, if ( !op.error() ) { op.recoverFromYield(); } ); } - - + + MultiPlanScanner::MultiPlanScanner( const char *ns, - const BSONObj &query, - const BSONObj &order, - const BSONElement *hint, - bool honorRecordedPlan, - const BSONObj &min, - const BSONObj &max, - bool bestGuessOnly, - bool mayYield ) : - _ns( ns ), - _or( !query.getField( "$or" ).eoo() ), - _query( query.getOwned() ), - _fros( ns, _query ), - _i(), - _honorRecordedPlan( honorRecordedPlan ), - _bestGuessOnly( bestGuessOnly ), - _hint( ( hint && !hint->eoo() ) ? hint->wrap() : BSONObj() ), - _mayYield( mayYield ), - _tableScanned() - { + const BSONObj &query, + const BSONObj &order, + const BSONElement *hint, + bool honorRecordedPlan, + const BSONObj &min, + const BSONObj &max, + bool bestGuessOnly, + bool mayYield ) : + _ns( ns ), + _or( !query.getField( "$or" ).eoo() ), + _query( query.getOwned() ), + _fros( ns, _query ), + _i(), + _honorRecordedPlan( honorRecordedPlan ), + _bestGuessOnly( bestGuessOnly ), + _hint( ( hint && !hint->eoo() ) ? hint->wrap() : BSONObj() ), + _mayYield( mayYield ), + _tableScanned() { if ( !order.isEmpty() || !min.isEmpty() || !max.isEmpty() || !_fros.getSpecial().empty() ) { _or = false; } @@ -661,8 +699,10 @@ namespace mongo { // if _or == false, don't use or clauses for index selection if ( !_or ) { auto_ptr< FieldRangeSet > frs( new FieldRangeSet( ns, _query ) ); - _currentQps.reset( new QueryPlanSet( ns, frs, _query, order, hint, honorRecordedPlan, min, max, _bestGuessOnly, _mayYield ) ); - } else { + auto_ptr< FieldRangeSet > oldFrs( new FieldRangeSet( *frs ) ); + _currentQps.reset( new QueryPlanSet( ns, frs, oldFrs, _query, order, hint, honorRecordedPlan, min, max, _bestGuessOnly, _mayYield ) ); + } + else { BSONElement e = _query.getField( "$or" ); massert( 13268, "invalid $or spec", e.type() == Array && e.embeddedObject().nFields() > 0 ); } @@ -676,16 +716,17 @@ namespace mongo { } ++_i; auto_ptr< FieldRangeSet > frs( _fros.topFrs() ); + auto_ptr< FieldRangeSet > originalFrs( _fros.topFrsOriginal() ); BSONElement hintElt = _hint.firstElement(); - _currentQps.reset( new QueryPlanSet( _ns, frs, _query, BSONObj(), &hintElt, _honorRecordedPlan, BSONObj(), BSONObj(), _bestGuessOnly, _mayYield ) ); + _currentQps.reset( new QueryPlanSet( _ns, frs, originalFrs, _query, BSONObj(), &hintElt, _honorRecordedPlan, BSONObj(), BSONObj(), _bestGuessOnly, _mayYield ) ); shared_ptr< QueryOp > ret( _currentQps->runOp( op ) ); if ( ret->qp().willScanTable() ) { _tableScanned = true; } - _fros.popOrClause(); + _fros.popOrClause( ret->qp().indexed() ? ret->qp().indexKey() : BSONObj() ); return ret; } - + shared_ptr< QueryOp > MultiPlanScanner::runOp( QueryOp &op ) { shared_ptr< QueryOp > ret = runOpOnce( op ); while( !ret->stopRequested() && mayRunMore() ) { @@ -693,7 +734,7 @@ namespace mongo { } return ret; } - + bool MultiPlanScanner::uselessOr( const BSONElement &hint ) const { NamespaceDetails *nsd = nsdetails( _ns ); if ( !nsd ) { @@ -713,7 +754,8 @@ namespace mongo { if ( id->getSpec().suitability( *i, BSONObj() ) == USELESS ) { return true; } - } else { + } + else { bool useful = false; NamespaceDetails::IndexIterator j = nsd->ii(); while( j.more() ) { @@ -725,12 +767,12 @@ namespace mongo { } if ( !useful ) { return true; - } + } } } return false; } - + bool indexWorks( const BSONObj &idxPattern, const BSONObj &sampleKey, int direction, int firstSignificantField ) { BSONObjIterator p( idxPattern ); BSONObjIterator k( sampleKey ); @@ -761,19 +803,19 @@ namespace mongo { int idxDirection = e.number() >= 0 ? 1 : -1; int direction = idxDirection * baseDirection; switch( direction ) { - case 1: - b.appendMaxKey( e.fieldName() ); - break; - case -1: - b.appendMinKey( e.fieldName() ); - break; - default: - assert( false ); + case 1: + b.appendMaxKey( e.fieldName() ); + break; + case -1: + b.appendMinKey( e.fieldName() ); + break; + default: + assert( false ); } } - return b.obj(); + return b.obj(); } - + pair< int, int > keyAudit( const BSONObj &min, const BSONObj &max ) { int direction = 0; int firstSignificantField = 0; @@ -802,18 +844,19 @@ namespace mongo { pair< int, int > flexibleKeyAudit( const BSONObj &min, const BSONObj &max ) { if ( min.isEmpty() || max.isEmpty() ) { return make_pair( 1, -1 ); - } else { + } + else { return keyAudit( min, max ); } } - + // NOTE min, max, and keyPattern will be updated to be consistent with the selected index. IndexDetails *indexDetailsForRange( const char *ns, string &errmsg, BSONObj &min, BSONObj &max, BSONObj &keyPattern ) { if ( min.isEmpty() && max.isEmpty() ) { errmsg = "one of min or max must be specified"; return 0; } - + Client::Context ctx( ns ); IndexDetails *id = 0; NamespaceDetails *d = nsdetails( ns ); @@ -821,7 +864,7 @@ namespace mongo { errmsg = "ns not found"; return 0; } - + pair< int, int > ret = flexibleKeyAudit( min, max ); if ( ret == make_pair( -1, -1 ) ) { errmsg = "min and max keys do not share pattern"; @@ -832,15 +875,16 @@ namespace mongo { while( i.more() ) { IndexDetails& ii = i.next(); if ( indexWorks( ii.keyPattern(), min.isEmpty() ? max : min, ret.first, ret.second ) ) { - if ( ii.getSpec().getType() == 0 ){ + if ( ii.getSpec().getType() == 0 ) { id = ⅈ keyPattern = ii.keyPattern(); break; } } } - - } else { + + } + else { if ( !indexWorks( keyPattern, min.isEmpty() ? max : min, ret.first, ret.second ) ) { errmsg = "requested keyPattern does not match specified keys"; return 0; @@ -853,30 +897,31 @@ namespace mongo { break; } if ( keyPattern.nFields() == 1 && ii.keyPattern().nFields() == 1 && - IndexDetails::isIdIndexPattern( keyPattern ) && - ii.isIdIndex() ){ + IndexDetails::isIdIndexPattern( keyPattern ) && + ii.isIdIndex() ) { id = ⅈ break; } - + } } if ( min.isEmpty() ) { min = extremeKeyForIndex( keyPattern, -1 ); - } else if ( max.isEmpty() ) { + } + else if ( max.isEmpty() ) { max = extremeKeyForIndex( keyPattern, 1 ); } - + if ( !id ) { errmsg = (string)"no index found for specified keyPattern: " + keyPattern.toString(); return 0; } - + min = min.extractFieldsUnDotted( keyPattern ); max = max.extractFieldsUnDotted( keyPattern ); return id; } - + } // namespace mongo |