// @file queryoptimizer.h /** * Copyright (C) 2008 10gen Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License, version 3, * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ #pragma once #include "cursor.h" #include "jsobj.h" #include "queryutil.h" #include "matcher.h" #include "../util/net/listen.h" #include namespace mongo { class IndexDetails; class IndexType; class ElapsedTracker; /** A plan for executing a query using the given index spec and FieldRangeSet. */ class QueryPlan : boost::noncopyable { public: /** * @param originalFrsp - original constraints for this query clause. If null, frsp will be used instead. */ QueryPlan(NamespaceDetails *d, int idxNo, // -1 = no index const FieldRangeSetPair &frsp, const FieldRangeSetPair *originalFrsp, const BSONObj &originalQuery, const BSONObj &order, bool mustAssertOnYieldFailure = true, const BSONObj &startKey = BSONObj(), const BSONObj &endKey = BSONObj(), string special="" ); /** @return true iff no other plans should be considered. */ bool optimal() const { return _optimal; } /* @return true iff this plan should not be considered at all. */ bool unhelpful() const { return _unhelpful; } /** @return true iff ScanAndOrder processing will be required for result set. */ bool scanAndOrderRequired() const { return _scanAndOrderRequired; } /** * @return true iff the index we are using has keys such that it can completely resolve the * query expression to match by itself without ever checking the main object. */ bool exactKeyMatch() const { return _exactKeyMatch; } /** @return true iff this QueryPlan would perform an unindexed scan. */ bool willScanTable() const { return _idxNo < 0 && !_impossible; } /** @return a new cursor based on this QueryPlan's index and FieldRangeSet. */ shared_ptr newCursor( const DiskLoc &startLoc = DiskLoc() , int numWanted=0 ) const; /** @return a new reverse cursor if this is an unindexed plan. */ shared_ptr newReverseCursor() const; /** Register this plan as a winner for its QueryPattern, with specified 'nscanned'. */ void registerSelf( long long nScanned ) const; int direction() const { return _direction; } BSONObj indexKey() const; bool indexed() const { return _index; } int idxNo() const { return _idxNo; } const char *ns() const { return _frs.ns(); } NamespaceDetails *nsd() const { return _d; } BSONObj originalQuery() const { return _originalQuery; } BSONObj simplifiedQuery( const BSONObj& fields = BSONObj() ) const { return _frs.simplifiedQuery( fields ); } const FieldRange &range( const char *fieldName ) const { return _frs.range( fieldName ); } shared_ptr originalFrv() const { return _originalFrv; } const FieldRangeSet &multikeyFrs() const { return _frsMulti; } bool mustAssertOnYieldFailure() const { return _mustAssertOnYieldFailure; } /** just for testing */ shared_ptr frv() const { return _frv; } bool isMultiKey() const; private: NamespaceDetails * _d; int _idxNo; const FieldRangeSet &_frs; const FieldRangeSet &_frsMulti; const BSONObj &_originalQuery; const BSONObj &_order; const IndexDetails * _index; bool _optimal; bool _scanAndOrderRequired; bool _exactKeyMatch; int _direction; shared_ptr _frv; shared_ptr _originalFrv; BSONObj _startKey; BSONObj _endKey; bool _endKeyInclusive; bool _unhelpful; bool _impossible; string _special; IndexType * _type; bool _startOrEndSpec; bool _mustAssertOnYieldFailure; }; /** * Inherit from this interface to implement a new query operation. * The query optimizer will clone the QueryOp that is provided, giving * each clone its own query plan. * * Normal sequence of events: * 1) A new QueryOp is generated using createChild(). * 2) A QueryPlan is assigned to this QueryOp with setQueryPlan(). * 3) _init() is called on the QueryPlan. * 4) next() is called repeatedly, with nscanned() checked after each call. * 5) In one of these calls to next(), setComplete() is called. * 6) The QueryPattern for the QueryPlan may be recorded as a winner. */ class QueryOp { public: QueryOp() : _complete(), _stopRequested(), _qp(), _error() {} /** Used when handing off from one QueryOp to another. */ QueryOp( const QueryOp &other ) : _complete(), _stopRequested(), _qp(), _error(), _matcher( other._matcher ), _orConstraint( other._orConstraint ) {} virtual ~QueryOp() {} /** @return QueryPlan assigned to this QueryOp by the query optimizer. */ const QueryPlan &qp() const { return *_qp; } /** Advance to next potential matching document (eg using a cursor). */ virtual void next() = 0; /** * @return current 'nscanned' metric for this QueryOp. Used to compare * cost to other QueryOps. */ virtual long long nscanned() = 0; /** Take any steps necessary before the db mutex is yielded. */ virtual bool prepareToYield() { massert( 13335, "yield not supported", false ); return false; } /** Recover once the db mutex is regained. */ virtual void recoverFromYield() { massert( 13336, "yield not supported", false ); } /** * @return true iff the QueryPlan for this QueryOp may be registered * as a winning plan. */ virtual bool mayRecordPlan() const = 0; /** @return true iff the implementation called setComplete() or setStop(). */ bool complete() const { return _complete; } /** @return true iff the implementation called steStop(). */ bool stopRequested() const { return _stopRequested; } /** @return true iff the implementation threw an exception. */ bool error() const { return _error; } /** @return the exception thrown by implementation if one was thrown. */ ExceptionInfo exception() const { return _exception; } /** To be called by QueryPlanSet::Runner only. */ QueryOp *createChild(); void setQueryPlan( const QueryPlan *qp ) { _qp = qp; assert( _qp != NULL ); } void init(); void setException( const DBException &e ) { _error = true; _exception = e.getInfo(); } shared_ptr matcher( const shared_ptr& c ) const { return matcher( c.get() ); } shared_ptr matcher( Cursor* c ) const { if( ! c ) return _matcher; return c->matcher() ? c->matcherPtr() : _matcher; } protected: /** Call if all results have been found. */ void setComplete() { _orConstraint = qp().originalFrv(); _complete = true; } /** Call if the scan is complete even if not all results have been found. */ void setStop() { setComplete(); _stopRequested = true; } /** Handle initialization after a QueryPlan has been set. */ virtual void _init() = 0; /** @return a copy of the inheriting class, which will be run with its own query plan. */ virtual QueryOp *_createChild() const = 0; virtual bool alwaysUseRecord() const { return false; } private: bool _complete; bool _stopRequested; ExceptionInfo _exception; const QueryPlan *_qp; bool _error; shared_ptr _matcher; shared_ptr _oldMatcher; shared_ptr _orConstraint; }; // temp. this class works if T::operator< is variant unlike a regular stl priority queue. // but it's very slow. however if v.size() is always very small, it would be fine, // maybe even faster than a smart impl that does more memory allocations. template class our_priority_queue : boost::noncopyable { vector v; public: our_priority_queue() { v.reserve(4); } int size() const { return v.size(); } bool empty() const { return v.empty(); } void push(const T & x) { v.push_back(x); } T pop() { size_t t = 0; for( size_t i = 1; i < v.size(); i++ ) { if( v[t] < v[i] ) t = i; } T ret = v[t]; v.erase(v.begin()+t); return ret; } }; /** * A set of candidate query plans for a query. This class can return a best buess plan or run a * QueryOp on all the plans. */ class QueryPlanSet { public: typedef boost::shared_ptr QueryPlanPtr; typedef vector PlanSet; /** * @param originalFrsp - original constraints for this query clause; if null, frsp will be used. */ QueryPlanSet( const char *ns, auto_ptr frsp, auto_ptr originalFrsp, const BSONObj &originalQuery, const BSONObj &order, bool mustAssertOnYieldFailure = true, const BSONElement *hint = 0, bool honorRecordedPlan = true, const BSONObj &min = BSONObj(), const BSONObj &max = BSONObj(), bool bestGuessOnly = false, bool mayYield = false); /** @return number of candidate plans. */ int nPlans() const { return _plans.size(); } /** * Clone op for each query plan, and @return the first cloned op to call * setComplete() or setStop(). */ shared_ptr runOp( QueryOp &op ); template shared_ptr runOp( T &op ) { return dynamic_pointer_cast( runOp( static_cast( op ) ) ); } /** Initialize or iterate a runner generated from @param originalOp. */ shared_ptr nextOp( QueryOp &originalOp, bool retried = false ); /** Yield the runner member. */ bool prepareToYield(); void recoverFromYield(); QueryPlanPtr firstPlan() const { return _plans[ 0 ]; } /** @return metadata about cursors and index bounds for all plans, suitable for explain output. */ BSONObj explain() const; /** @return true iff a plan is selected based on previous success of this plan. */ bool usingPrerecordedPlan() const { return _usingPrerecordedPlan; } /** @return a single plan that may work well for the specified query. */ QueryPlanPtr getBestGuess() const; //for testing const FieldRangeSetPair &frsp() const { return *_frsp; } const FieldRangeSetPair *originalFrsp() const { return _originalFrsp.get(); } bool modifiedKeys() const; bool hasMultiKey() const; private: void addOtherPlans( bool checkFirst ); void addPlan( QueryPlanPtr plan, bool checkFirst ) { if ( checkFirst && plan->indexKey().woCompare( _plans[ 0 ]->indexKey() ) == 0 ) return; _plans.push_back( plan ); } void init(); void addHint( IndexDetails &id ); void warnOnCappedIdTableScan() const; class Runner { public: Runner( QueryPlanSet &plans, QueryOp &op ); /** * Iterate interactively through candidate documents on all plans. * QueryOp objects are returned at each interleaved step. */ /** @return a plan that has completed, otherwise an arbitrary plan. */ shared_ptr init(); /** * Move the Runner forward one iteration, and @return the plan for * this iteration. */ shared_ptr next(); /** @return next non error op if there is one, otherwise an error op. */ shared_ptr nextNonError(); bool prepareToYield(); void recoverFromYield(); /** Run until first op completes. */ shared_ptr runUntilFirstCompletes(); void mayYield(); QueryOp &_op; QueryPlanSet &_plans; static void initOp( QueryOp &op ); static void nextOp( QueryOp &op ); static bool prepareToYieldOp( QueryOp &op ); static void recoverFromYieldOp( QueryOp &op ); private: vector > _ops; struct OpHolder { OpHolder( const shared_ptr &op ) : _op( op ), _offset() {} shared_ptr _op; long long _offset; bool operator<( const OpHolder &other ) const { return _op->nscanned() + _offset > other._op->nscanned() + other._offset; } }; our_priority_queue _queue; }; const char *_ns; BSONObj _originalQuery; auto_ptr _frsp; auto_ptr _originalFrsp; PlanSet _plans; bool _mayRecordPlan; bool _usingPrerecordedPlan; BSONObj _hint; BSONObj _order; long long _oldNScanned; bool _honorRecordedPlan; BSONObj _min; BSONObj _max; string _special; bool _bestGuessOnly; bool _mayYield; ElapsedTracker _yieldSometimesTracker; shared_ptr _runner; bool _mustAssertOnYieldFailure; }; /** Handles $or type queries by generating a QueryPlanSet for each $or clause. */ class MultiPlanScanner { public: MultiPlanScanner( const char *ns, const BSONObj &query, const BSONObj &order, const BSONElement *hint = 0, bool honorRecordedPlan = true, const BSONObj &min = BSONObj(), const BSONObj &max = BSONObj(), bool bestGuessOnly = false, bool mayYield = false); /** * Clone op for each query plan of a single $or clause, and @return the first cloned op * to call setComplete() or setStop(). */ shared_ptr runOpOnce( QueryOp &op ); template shared_ptr runOpOnce( T &op ) { return dynamic_pointer_cast( runOpOnce( static_cast( op ) ) ); } /** * For each $or clause, calls runOpOnce on the child QueryOp cloned from the winning QueryOp * of the previous $or clause (or from the supplied 'op' for the first $or clause). */ shared_ptr runOp( QueryOp &op ); template shared_ptr runOp( T &op ) { return dynamic_pointer_cast( runOp( static_cast( op ) ) ); } /** Initialize or iterate a runner generated from @param originalOp. */ void initialOp( const shared_ptr &originalOp ) { _baseOp = originalOp; } shared_ptr nextOp(); /** Yield the runner member. */ bool prepareToYield(); void recoverFromYield(); /** * @return a single simple cursor if the scanner would run a single cursor * for this query, otherwise return an empty shared_ptr. */ shared_ptr singleCursor() const; /** @return true iff more $or clauses need to be scanned. */ bool mayRunMore() const { return _or ? ( !_tableScanned && !_org->orFinished() ) : _i == 0; } /** @return non-$or version of explain output. */ BSONObj oldExplain() const { assertNotOr(); return _currentQps->explain(); } /** @return true iff this is not a $or query and a plan is selected based on previous success of this plan. */ bool usingPrerecordedPlan() const { return !_or && _currentQps->usingPrerecordedPlan(); } /** Don't attempt to scan multiple plans, just use the best guess. */ void setBestGuessOnly() { _bestGuessOnly = true; } /** Yielding is allowed while running each QueryPlan. */ void mayYield( bool val ) { _mayYield = val; } bool modifiedKeys() const { return _currentQps->modifiedKeys(); } bool hasMultiKey() const { return _currentQps->hasMultiKey(); } private: void assertNotOr() const { massert( 13266, "not implemented for $or query", !_or ); } void assertMayRunMore() const { massert( 13271, "can't run more ops", mayRunMore() ); } shared_ptr nextOpBeginningClause(); shared_ptr nextOpHandleEndOfClause(); bool uselessOr( const BSONElement &hint ) const; const char * _ns; bool _or; BSONObj _query; shared_ptr _org; // May be null in certain non $or query cases. auto_ptr _currentQps; int _i; bool _honorRecordedPlan; bool _bestGuessOnly; BSONObj _hint; bool _mayYield; bool _tableScanned; shared_ptr _baseOp; }; /** Provides a cursor interface for certain limited uses of a MultiPlanScanner. */ class MultiCursor : public Cursor { public: class CursorOp : public QueryOp { public: CursorOp() {} CursorOp( const QueryOp &other ) : QueryOp( other ) {} virtual shared_ptr newCursor() const = 0; }; /** takes ownership of 'op' */ MultiCursor( const char *ns, const BSONObj &pattern, const BSONObj &order, shared_ptr op = shared_ptr(), bool mayYield = false ); /** * Used * 1. To handoff a query to a getMore() * 2. To handoff a QueryOptimizerCursor * @param nscanned is an optional initial value, if not supplied nscanned() * will always return -1 */ MultiCursor( auto_ptr mps, const shared_ptr &c, const shared_ptr &matcher, const QueryOp &op, long long nscanned = -1 ); virtual bool ok() { return _c->ok(); } virtual Record* _current() { return _c->_current(); } virtual BSONObj current() { return _c->current(); } virtual DiskLoc currLoc() { return _c->currLoc(); } virtual bool advance() { _c->advance(); while( !ok() && _mps->mayRunMore() ) { nextClause(); } return ok(); } virtual BSONObj currKey() const { return _c->currKey(); } virtual DiskLoc refLoc() { return _c->refLoc(); } virtual void noteLocation() { _c->noteLocation(); } virtual void checkLocation() { _c->checkLocation(); } virtual bool supportGetMore() { return true; } virtual bool supportYields() { return _c->supportYields(); } virtual BSONObj indexKeyPattern() { return _c->indexKeyPattern(); } /** * with update we could potentially get the same document on multiple * indexes, but update appears to already handle this with seenObjects * so we don't have to do anything special here. */ virtual bool getsetdup(DiskLoc loc) { return _c->getsetdup( loc ); } virtual bool modifiedKeys() const { return _mps->modifiedKeys(); } virtual bool isMultiKey() const { return _mps->hasMultiKey(); } virtual shared_ptr< CoveredIndexMatcher > matcherPtr() const { return _matcher; } virtual CoveredIndexMatcher* matcher() const { return _matcher.get(); } /** return -1 if we're a getmore handoff */ virtual long long nscanned() { return _nscanned >= 0 ? _nscanned + _c->nscanned() : _nscanned; } /** just for testing */ shared_ptr sub_c() const { return _c; } private: class NoOp : public CursorOp { public: NoOp() {} NoOp( const QueryOp &other ) : CursorOp( other ) {} virtual void _init() { setComplete(); } virtual void next() {} virtual bool mayRecordPlan() const { return false; } virtual QueryOp *_createChild() const { return new NoOp(); } virtual shared_ptr newCursor() const { return qp().newCursor(); } virtual long long nscanned() { assert( false ); return 0; } }; void nextClause(); shared_ptr _op; shared_ptr _c; auto_ptr _mps; shared_ptr _matcher; long long _nscanned; }; /** NOTE min, max, and keyPattern will be updated to be consistent with the selected index. */ IndexDetails *indexDetailsForRange( const char *ns, string &errmsg, BSONObj &min, BSONObj &max, BSONObj &keyPattern ); bool isSimpleIdQuery( const BSONObj& query ); /** * @return a single cursor that may work well for the given query. * It is possible no cursor is returned if the sort is not supported by an index. Clients are responsible * for checking this if they are not sure an index for a sort exists, and defaulting to a non-sort if * no suitable indices exist. */ shared_ptr bestGuessCursor( const char *ns, const BSONObj &query, const BSONObj &sort ); /** * Add-on functionality for queryutil classes requiring access to indexing * functionality not currently linked to mongos. * TODO Clean this up a bit, possibly with separate sharded and non sharded * implementations for the appropriate queryutil classes or by pulling index * related functionality into separate wrapper classes. */ struct QueryUtilIndexed { /** @return true if the index may be useful according to its KeySpec. */ static bool indexUseful( const FieldRangeSetPair &frsp, NamespaceDetails *d, int idxNo, const BSONObj &order ); /** Clear any indexes recorded as the best for either the single or multi key pattern. */ static void clearIndexesForPatterns( const FieldRangeSetPair &frsp, const BSONObj &order ); /** Return a recorded best index for the single or multi key pattern. */ static pair< BSONObj, long long > bestIndexForPatterns( const FieldRangeSetPair &frsp, const BSONObj &order ); static bool uselessOr( const OrRangeGenerator& org, NamespaceDetails *d, int hintIdx ); }; } // namespace mongo