summaryrefslogtreecommitdiff
path: root/db/query.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'db/query.cpp')
-rw-r--r--db/query.cpp696
1 files changed, 470 insertions, 226 deletions
diff --git a/db/query.cpp b/db/query.cpp
index 761a312..3d251a0 100644
--- a/db/query.cpp
+++ b/db/query.cpp
@@ -16,24 +16,26 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-#include "stdafx.h"
+#include "pch.h"
#include "query.h"
#include "pdfile.h"
#include "jsobjmanipulator.h"
-#include "../util/builder.h"
+#include "../bson/util/builder.h"
#include <time.h>
#include "introspect.h"
#include "btree.h"
#include "../util/lruishmap.h"
#include "json.h"
#include "repl.h"
-#include "replset.h"
+#include "replpair.h"
#include "scanandorder.h"
#include "security.h"
#include "curop.h"
#include "commands.h"
#include "queryoptimizer.h"
#include "lasterror.h"
+#include "../s/d_logic.h"
+#include "repl_block.h"
namespace mongo {
@@ -49,7 +51,7 @@ namespace mongo {
extern bool useHints;
// Just try to identify best plan.
- class DeleteOp : public QueryOp {
+ class DeleteOp : public MultiCursor::CursorOp {
public:
DeleteOp( bool justOne, int& bestCount ) :
justOne_( justOne ),
@@ -57,9 +59,21 @@ namespace mongo {
bestCount_( bestCount ),
_nscanned() {
}
- virtual void init() {
+ virtual void _init() {
c_ = qp().newCursor();
- _matcher.reset( new CoveredIndexMatcher( qp().query(), qp().indexKey() ) );
+ }
+ virtual bool prepareToYield() {
+ if ( ! _cc ) {
+ _cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , c_ , qp().ns() ) );
+ }
+ return _cc->prepareToYield( _yieldData );
+ }
+ virtual void recoverFromYield() {
+ if ( !ClientCursor::recoverFromYield( _yieldData ) ) {
+ _cc.reset();
+ c_.reset();
+ massert( 13340, "cursor dropped during delete", false );
+ }
}
virtual void next() {
if ( !c_->ok() ) {
@@ -69,7 +83,7 @@ namespace mongo {
DiskLoc rloc = c_->currLoc();
- if ( _matcher->matches(c_->currKey(), rloc ) ) {
+ if ( matcher()->matches(c_->currKey(), rloc ) ) {
if ( !c_->getsetdup(rloc) )
++count_;
}
@@ -87,17 +101,19 @@ namespace mongo {
}
}
virtual bool mayRecordPlan() const { return !justOne_; }
- virtual QueryOp *clone() const {
+ virtual QueryOp *_createChild() const {
+ bestCount_ = 0; // should be safe to reset this in contexts where createChild() is called
return new DeleteOp( justOne_, bestCount_ );
}
- auto_ptr< Cursor > newCursor() const { return qp().newCursor(); }
+ virtual shared_ptr<Cursor> newCursor() const { return qp().newCursor(); }
private:
bool justOne_;
int count_;
int &bestCount_;
long long _nscanned;
- auto_ptr< Cursor > c_;
- auto_ptr< CoveredIndexMatcher > _matcher;
+ shared_ptr<Cursor> c_;
+ ClientCursor::CleanupPointer _cc;
+ ClientCursor::YieldData _yieldData;
};
/* ns: namespace, e.g. <database>.<collection>
@@ -105,7 +121,7 @@ namespace mongo {
justOne: stop after 1 match
god: allow access to system namespaces, and don't yield
*/
- long long deleteObjects(const char *ns, BSONObj pattern, bool justOne, bool logop, bool god) {
+ long long deleteObjects(const char *ns, BSONObj pattern, bool justOneOrig, bool logop, bool god, RemoveSaver * rs ) {
if( !god ) {
if ( strstr(ns, ".system.") ) {
/* note a delete from system.indexes would corrupt the db
@@ -126,52 +142,58 @@ namespace mongo {
uassert( 10101 , "can't remove from a capped collection" , ! d->capped );
long long nDeleted = 0;
- QueryPlanSet s( ns, pattern, BSONObj() );
+
int best = 0;
- DeleteOp original( justOne, best );
- shared_ptr< DeleteOp > bestOp = s.runOp( original );
- auto_ptr< Cursor > creal = bestOp->newCursor();
+ shared_ptr< MultiCursor::CursorOp > opPtr( new DeleteOp( justOneOrig, best ) );
+ shared_ptr< MultiCursor > creal( new MultiCursor( ns, pattern, BSONObj(), opPtr, true ) );
if( !creal->ok() )
return nDeleted;
-
- CoveredIndexMatcher matcher(pattern, creal->indexKeyPattern());
-
- auto_ptr<ClientCursor> cc( new ClientCursor(creal, ns, false) );
+
+ shared_ptr< Cursor > cPtr = creal;
+ auto_ptr<ClientCursor> cc( new ClientCursor( QueryOption_NoCursorTimeout, cPtr, ns) );
cc->setDoingDeletes( true );
-
+
CursorId id = cc->cursorid;
-
- unsigned long long nScanned = 0;
+
+ bool justOne = justOneOrig;
+ bool canYield = !god && !creal->matcher()->docMatcher().atomic();
do {
- if ( ++nScanned % 128 == 0 && !god && !matcher.docMatcher().atomic() ) {
- if ( ! cc->yield() ){
- cc.release(); // has already been deleted elsewhere
- break;
- }
+ if ( canYield && ! cc->yieldSometimes() ){
+ cc.release(); // has already been deleted elsewhere
+ // TODO should we assert or something?
+ break;
}
-
+ if ( !cc->c->ok() ) {
+ break; // if we yielded, could have hit the end
+ }
+
// this way we can avoid calling updateLocation() every time (expensive)
// as well as some other nuances handled
cc->setDoingDeletes( true );
-
+
DiskLoc rloc = cc->c->currLoc();
BSONObj key = cc->c->currKey();
+
+ // NOTE Calling advance() may change the matcher, so it's important
+ // to try to match first.
+ bool match = creal->matcher()->matches( key , rloc );
- cc->c->advance();
-
- if ( ! matcher.matches( key , rloc ) )
+ if ( ! cc->c->advance() )
+ justOne = true;
+
+ if ( ! match )
continue;
-
+
assert( !cc->c->getsetdup(rloc) ); // can't be a dup, we deleted it!
-
+
if ( !justOne ) {
/* NOTE: this is SLOW. this is not good, noteLocation() was designed to be called across getMore
- blocks. here we might call millions of times which would be bad.
- */
+ blocks. here we might call millions of times which would be bad.
+ */
cc->c->noteLocation();
}
-
+
if ( logop ) {
BSONElement e;
if( BSONObj( rloc.rec() ).getObjectID( e ) ) {
@@ -184,18 +206,22 @@ namespace mongo {
}
}
+ if ( rs )
+ rs->goingToDelete( rloc.obj() /*cc->c->current()*/ );
+
theDataFileMgr.deleteRecord(ns, rloc.rec(), rloc);
nDeleted++;
- if ( justOne )
+ if ( justOne ) {
break;
+ }
cc->c->checkLocation();
-
+
} while ( cc->c->ok() );
if ( cc.get() && ClientCursor::find( id , false ) == 0 ){
cc.release();
}
-
+
return nDeleted;
}
@@ -208,14 +234,13 @@ namespace mongo {
return _runCommands(ns, jsobj, b, anObjBuilder, fromRepl, queryOptions);
}
catch ( AssertionException& e ) {
- if ( !e.msg.empty() )
- anObjBuilder.append("assertion", e.msg);
+ e.getInfo().append( anObjBuilder , "assertion" , "assertionCode" );
}
curop.debug().str << " assertion ";
anObjBuilder.append("errmsg", "db assertion failure");
anObjBuilder.append("ok", 0.0);
BSONObj x = anObjBuilder.done();
- b.append((void*) x.objdata(), x.objsize());
+ b.appendBuf((void*) x.objdata(), x.objsize());
return true;
}
@@ -227,7 +252,9 @@ namespace mongo {
if ( ClientCursor::erase(ids[i]) )
k++;
}
- log( k == n ) << "killcursors: found " << k << " of " << n << '\n';
+ if ( logLevel > 0 || k != n ){
+ log( k == n ) << "killcursors: found " << k << " of " << n << endl;
+ }
}
BSONObj id_obj = fromjson("{\"_id\":1}");
@@ -250,8 +277,9 @@ namespace mongo {
return qr;
}
- QueryResult* getMore(const char *ns, int ntoreturn, long long cursorid , CurOp& curop ) {
- StringBuilder& ss = curop.debug().str;
+ QueryResult* processGetMore(const char *ns, int ntoreturn, long long cursorid , CurOp& curop, int pass, bool& exhaust ) {
+// log() << "TEMP GETMORE " << ns << ' ' << cursorid << ' ' << pass << endl;
+ exhaust = false;
ClientCursor::Pointer p(cursorid);
ClientCursor *cc = p._c;
@@ -260,30 +288,51 @@ namespace mongo {
bufSize += sizeof( QueryResult );
bufSize += ( ntoreturn ? 4 : 1 ) * 1024 * 1024;
}
+
BufBuilder b( bufSize );
b.skip(sizeof(QueryResult));
-
- int resultFlags = 0; //QueryResult::ResultFlag_AwaitCapable;
+
+ int resultFlags = ResultFlag_AwaitCapable;
int start = 0;
int n = 0;
if ( !cc ) {
log() << "getMore: cursorid not found " << ns << " " << cursorid << endl;
cursorid = 0;
- resultFlags = QueryResult::ResultFlag_CursorNotFound;
+ resultFlags = ResultFlag_CursorNotFound;
}
else {
- ss << " query: " << cc->query << " ";
+ if ( pass == 0 )
+ cc->updateSlaveLocation( curop );
+
+ int queryOptions = cc->_queryOptions;
+
+ if( pass == 0 ) {
+ StringBuilder& ss = curop.debug().str;
+ ss << " getMore: " << cc->query.toString() << " ";
+ }
+
start = cc->pos;
Cursor *c = cc->c.get();
c->checkLocation();
+ DiskLoc last;
+
while ( 1 ) {
if ( !c->ok() ) {
+// log() << "TEMP Tailable : " << c->tailable() << ' ' << (queryOptions & QueryOption_AwaitData) << endl;
if ( c->tailable() ) {
- if ( c->advance() ) {
+ /* when a tailable cursor hits "EOF", ok() goes false, and current() is null. however
+ advance() can still be retries as a reactivation attempt. when there is new data, it will
+ return true. that's what we are doing here.
+ */
+ if ( c->advance() )
continue;
+
+ if( n == 0 && (queryOptions & QueryOption_AwaitData) && pass < 1000 ) {
+ throw GetMoreWaitException();
}
+
break;
}
p.release();
@@ -293,31 +342,42 @@ namespace mongo {
cc = 0;
break;
}
- if ( !cc->matcher->matches(c->currKey(), c->currLoc() ) ) {
+ // in some cases (clone collection) there won't be a matcher
+ if ( c->matcher() && !c->matcher()->matches(c->currKey(), c->currLoc() ) ) {
}
+ /*
+ TODO
+ else if ( _chunkMatcher && ! _chunkMatcher->belongsToMe( c->currKey(), c->currLoc() ) ){
+ cout << "TEMP skipping un-owned chunk: " << c->current() << endl;
+ }
+ */
else {
- //out() << "matches " << c->currLoc().toString() << '\n';
if( c->getsetdup(c->currLoc()) ) {
//out() << " but it's a dup \n";
}
else {
+ last = c->currLoc();
BSONObj js = c->current();
- fillQueryResultFromObj(b, cc->fields.get(), js);
+
+ // show disk loc should be part of the main query, not in an $or clause, so this should be ok
+ fillQueryResultFromObj(b, cc->fields.get(), js, ( cc->pq.get() && cc->pq->showDiskLoc() ? &last : 0));
n++;
if ( (ntoreturn>0 && (n >= ntoreturn || b.len() > MaxBytesToReturnToClientAtOnce)) ||
(ntoreturn==0 && b.len()>1*1024*1024) ) {
c->advance();
cc->pos += n;
- //cc->updateLocation();
break;
}
}
}
c->advance();
}
+
if ( cc ) {
cc->updateLocation();
cc->mayUpgradeStorage();
+ cc->storeOpForSlave( last );
+ exhaust = cc->_queryOptions & QueryOption_Exhaust;
}
}
@@ -335,26 +395,45 @@ namespace mongo {
class CountOp : public QueryOp {
public:
- CountOp( const BSONObj &spec ) : spec_( spec ), count_(), bc_() {}
- virtual void init() {
- query_ = spec_.getObjectField( "query" );
+ CountOp( const string& ns , const BSONObj &spec ) :
+ _ns(ns), count_(),
+ skip_( spec["skip"].numberLong() ),
+ limit_( spec["limit"].numberLong() ),
+ bc_(){
+ }
+
+ virtual void _init() {
c_ = qp().newCursor();
- _matcher.reset( new CoveredIndexMatcher( query_, c_->indexKeyPattern() ) );
- if ( qp().exactKeyMatch() && ! _matcher->needRecord() ) {
+
+ if ( qp().exactKeyMatch() && ! matcher()->needRecord() ) {
query_ = qp().simplifiedQuery( qp().indexKey() );
bc_ = dynamic_cast< BtreeCursor* >( c_.get() );
bc_->forgetEndKey();
}
-
- skip_ = spec_["skip"].numberLong();
- limit_ = spec_["limit"].numberLong();
}
+ virtual bool prepareToYield() {
+ if ( ! _cc ) {
+ _cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , c_ , _ns.c_str() ) );
+ }
+ return _cc->prepareToYield( _yieldData );
+ }
+
+ virtual void recoverFromYield() {
+ if ( !ClientCursor::recoverFromYield( _yieldData ) ) {
+ c_.reset();
+ _cc.reset();
+ massert( 13337, "cursor dropped during count", false );
+ // TODO maybe we want to prevent recording the winning plan as well?
+ }
+ }
+
virtual void next() {
if ( !c_->ok() ) {
setComplete();
return;
}
+
if ( bc_ ) {
if ( firstMatch_.isEmpty() ) {
firstMatch_ = bc_->currKeyNode().key;
@@ -371,8 +450,9 @@ namespace mongo {
}
_gotOne();
}
- } else {
- if ( !_matcher->matches(c_->currKey(), c_->currLoc() ) ) {
+ }
+ else {
+ if ( !matcher()->matches(c_->currKey(), c_->currLoc() ) ) {
}
else if( !c_->getsetdup(c_->currLoc()) ) {
_gotOne();
@@ -380,8 +460,12 @@ namespace mongo {
}
c_->advance();
}
- virtual QueryOp *clone() const {
- return new CountOp( spec_ );
+ virtual QueryOp *_createChild() const {
+ CountOp *ret = new CountOp( _ns , BSONObj() );
+ ret->count_ = count_;
+ ret->skip_ = skip_;
+ ret->limit_ = limit_;
+ return ret;
}
long long count() const { return count_; }
virtual bool mayRecordPlan() const { return true; }
@@ -394,95 +478,188 @@ namespace mongo {
}
if ( limit_ > 0 && count_ >= limit_ ){
- setComplete();
+ setStop();
return;
}
count_++;
}
- BSONObj spec_;
+ string _ns;
+
long long count_;
long long skip_;
long long limit_;
- auto_ptr< Cursor > c_;
+ shared_ptr<Cursor> c_;
BSONObj query_;
BtreeCursor *bc_;
- auto_ptr< CoveredIndexMatcher > _matcher;
BSONObj firstMatch_;
+
+ ClientCursor::CleanupPointer _cc;
+ ClientCursor::YieldData _yieldData;
};
-
+
/* { count: "collectionname"[, query: <query>] }
returns -1 on ns does not exist error.
*/
long long runCount( const char *ns, const BSONObj &cmd, string &err ) {
+ Client::Context cx(ns);
NamespaceDetails *d = nsdetails( ns );
if ( !d ) {
err = "ns missing";
return -1;
}
BSONObj query = cmd.getObjectField("query");
-
+
// count of all objects
if ( query.isEmpty() ){
- long long num = d->nrecords;
- num = num - cmd["skip"].numberLong();
- if ( num < 0 ) {
- num = 0;
- }
- if ( cmd["limit"].isNumber() ){
- long long limit = cmd["limit"].numberLong();
- if ( limit < num ){
- num = limit;
- }
- }
- return num;
+ return applySkipLimit( d->nrecords , cmd );
}
- QueryPlanSet qps( ns, query, BSONObj() );
- CountOp original( cmd );
- shared_ptr< CountOp > res = qps.runOp( original );
+ MultiPlanScanner mps( ns, query, BSONObj(), 0, true, BSONObj(), BSONObj(), false, true );
+ CountOp original( ns , cmd );
+ shared_ptr< CountOp > res = mps.runOp( original );
if ( !res->complete() ) {
log() << "Count with ns: " << ns << " and query: " << query
- << " failed with exception: " << res->exceptionMessage()
+ << " failed with exception: " << res->exception()
<< endl;
return 0;
}
return res->count();
}
+
+ class ExplainBuilder {
+ public:
+ ExplainBuilder() : _i() {}
+ void ensureStartScan() {
+ if ( !_a.get() ) {
+ _a.reset( new BSONArrayBuilder() );
+ }
+ }
+ void noteCursor( Cursor *c ) {
+ BSONObjBuilder b( _a->subobjStart() );
+ b << "cursor" << c->toString() << "indexBounds" << c->prettyIndexBounds();
+ b.done();
+ }
+ void noteScan( Cursor *c, long long nscanned, long long nscannedObjects, int n, bool scanAndOrder, int millis, bool hint ) {
+ if ( _i == 1 ) {
+ _c.reset( new BSONArrayBuilder() );
+ *_c << _b->obj();
+ }
+ if ( _i == 0 ) {
+ _b.reset( new BSONObjBuilder() );
+ } else {
+ _b.reset( new BSONObjBuilder( _c->subobjStart() ) );
+ }
+ *_b << "cursor" << c->toString();
+ _b->appendNumber( "nscanned", nscanned );
+ _b->appendNumber( "nscannedObjects", nscannedObjects );
+ *_b << "n" << n;
+
+ if ( scanAndOrder )
+ *_b << "scanAndOrder" << true;
+ *_b << "millis" << millis;
+
+ *_b << "indexBounds" << c->prettyIndexBounds();
+
+ if ( !hint ) {
+ *_b << "allPlans" << _a->arr();
+ }
+ if ( _i != 0 ) {
+ _b->done();
+ }
+ _a.reset( 0 );
+ ++_i;
+ }
+ BSONObj finishWithSuffix( long long nscanned, long long nscannedObjects, int n, int millis, const BSONObj &suffix ) {
+ if ( _i > 1 ) {
+ BSONObjBuilder b;
+ b << "clauses" << _c->arr();
+ b.appendNumber( "nscanned", nscanned );
+ b.appendNumber( "nscanneObjects", nscannedObjects );
+ b << "n" << n;
+ b << "millis" << millis;
+ b.appendElements( suffix );
+ return b.obj();
+ } else {
+ _b->appendElements( suffix );
+ return _b->obj();
+ }
+ }
+ private:
+ auto_ptr< BSONArrayBuilder > _a;
+ auto_ptr< BSONObjBuilder > _b;
+ auto_ptr< BSONArrayBuilder > _c;
+ int _i;
+ };
+
// Implements database 'query' requests using the query optimizer's QueryOp interface
class UserQueryOp : public QueryOp {
public:
- UserQueryOp( const ParsedQuery& pq ) :
- //int ntoskip, int ntoreturn, const BSONObj &order, bool wantMore,
- // bool explain, FieldMatcher *filter, int queryOptions ) :
+ UserQueryOp( const ParsedQuery& pq, Message &response, ExplainBuilder &eb, CurOp &curop ) :
_buf( 32768 ) , // TODO be smarter here
_pq( pq ) ,
_ntoskip( pq.getSkip() ) ,
- _nscanned(0), _nscannedObjects(0),
+ _nscanned(0), _oldNscanned(0), _nscannedObjects(0), _oldNscannedObjects(0),
_n(0),
+ _oldN(0),
+ _chunkMatcher(shardingState.getChunkMatcher(pq.ns())),
_inMemSort(false),
_saveClientCursor(false),
- _oplogReplay( pq.hasOption( QueryOption_OplogReplay) )
+ _wouldSaveClientCursor(false),
+ _oplogReplay( pq.hasOption( QueryOption_OplogReplay) ),
+ _response( response ),
+ _eb( eb ),
+ _curop( curop )
{}
- virtual void init() {
- _buf.skip( sizeof( QueryResult ) );
+ virtual void _init() {
+ // only need to put the QueryResult fields there if we're building the first buffer in the message.
+ if ( _response.empty() ) {
+ _buf.skip( sizeof( QueryResult ) );
+ }
if ( _oplogReplay ) {
_findingStartCursor.reset( new FindingStartCursor( qp() ) );
} else {
_c = qp().newCursor( DiskLoc() , _pq.getNumToReturn() + _pq.getSkip() );
}
- _matcher.reset(new CoveredIndexMatcher( qp().query() , qp().indexKey()));
if ( qp().scanAndOrderRequired() ) {
_inMemSort = true;
_so.reset( new ScanAndOrder( _pq.getSkip() , _pq.getNumToReturn() , _pq.getOrder() ) );
}
+
+ if ( _pq.isExplain() ) {
+ _eb.noteCursor( _c.get() );
+ }
}
+ virtual bool prepareToYield() {
+ if ( _findingStartCursor.get() ) {
+ return _findingStartCursor->prepareToYield();
+ } else {
+ if ( ! _cc ) {
+ _cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , _c , _pq.ns() ) );
+ }
+ return _cc->prepareToYield( _yieldData );
+ }
+ }
+
+ virtual void recoverFromYield() {
+ if ( _findingStartCursor.get() ) {
+ _findingStartCursor->recoverFromYield();
+ } else {
+ if ( !ClientCursor::recoverFromYield( _yieldData ) ) {
+ _c.reset();
+ _cc.reset();
+ massert( 13338, "cursor dropped during query", false );
+ // TODO maybe we want to prevent recording the winning plan as well?
+ }
+ }
+ }
+
virtual void next() {
if ( _findingStartCursor.get() ) {
if ( _findingStartCursor->done() ) {
@@ -495,18 +672,23 @@ namespace mongo {
}
if ( !_c->ok() ) {
- finish();
+ finish( false );
return;
}
-
+
bool mayCreateCursor1 = _pq.wantMore() && ! _inMemSort && _pq.getNumToReturn() != 1 && useCursors;
if( 0 ) {
cout << "SCANNING this: " << this << " key: " << _c->currKey() << " obj: " << _c->current() << endl;
}
+
+ if ( _pq.getMaxScan() && _nscanned >= _pq.getMaxScan() ){
+ finish( true ); //?
+ return;
+ }
_nscanned++;
- if ( !_matcher->matches(_c->currKey(), _c->currLoc() , &_details ) ) {
+ if ( !matcher()->matches(_c->currKey(), _c->currLoc() , &_details ) ) {
// not a match, continue onward
if ( _details.loadedObject )
_nscannedObjects++;
@@ -514,15 +696,18 @@ namespace mongo {
else {
_nscannedObjects++;
DiskLoc cl = _c->currLoc();
- if( !_c->getsetdup(cl) ) {
+ if ( _chunkMatcher && ! _chunkMatcher->belongsToMe( _c->currKey(), _c->currLoc() ) ){
+ // cout << "TEMP skipping un-owned chunk: " << _c->current() << endl;
+ }
+ else if( _c->getsetdup(cl) ) {
+ // dup
+ }
+ else {
// got a match.
- BSONObj js = _pq.returnKey() ? _c->currKey() : _c->current();
- assert( js.objsize() >= 0 ); //defensive for segfaults
-
if ( _inMemSort ) {
// note: no cursors for non-indexed, ordered results. results must be fairly small.
- _so->add(js);
+ _so->add( _pq.returnKey() ? _c->currKey() : _c->current(), _pq.showDiskLoc() ? &cl : 0 );
}
else if ( _ntoskip > 0 ) {
_ntoskip--;
@@ -530,38 +715,48 @@ namespace mongo {
else {
if ( _pq.isExplain() ) {
_n++;
- if ( _n >= _pq.getNumToReturn() && !_pq.wantMore() ) {
+ if ( n() >= _pq.getNumToReturn() && !_pq.wantMore() ) {
// .limit() was used, show just that much.
- finish();
+ finish( true ); //?
return;
}
}
else {
+
if ( _pq.returnKey() ){
BSONObjBuilder bb( _buf );
- bb.appendKeys( _c->indexKeyPattern() , js );
+ bb.appendKeys( _c->indexKeyPattern() , _c->currKey() );
bb.done();
}
else {
- fillQueryResultFromObj( _buf , _pq.getFields() , js );
+ BSONObj js = _c->current();
+ assert( js.isValid() );
+
+ if ( _oplogReplay ){
+ BSONElement e = js["ts"];
+ if ( e.type() == Date || e.type() == Timestamp )
+ _slaveReadTill = e._opTime();
+ }
+
+ fillQueryResultFromObj( _buf , _pq.getFields() , js , (_pq.showDiskLoc() ? &cl : 0));
}
_n++;
if ( ! _c->supportGetMore() ){
- if ( _pq.enough( _n ) || _buf.len() >= MaxBytesToReturnToClientAtOnce ){
- finish();
+ if ( _pq.enough( n() ) || _buf.len() >= MaxBytesToReturnToClientAtOnce ){
+ finish( true );
return;
}
}
- else if ( _pq.enoughForFirstBatch( _n , _buf.len() ) ){
+ else if ( _pq.enoughForFirstBatch( n() , _buf.len() ) ){
/* if only 1 requested, no cursor saved for efficiency...we assume it is findOne() */
if ( mayCreateCursor1 ) {
- _c->advance();
- if ( _c->ok() ) {
+ _wouldSaveClientCursor = true;
+ if ( _c->advance() ) {
// more...so save a cursor
_saveClientCursor = true;
}
}
- finish();
+ finish( true );
return;
}
}
@@ -571,7 +766,8 @@ namespace mongo {
_c->advance();
}
- void finish() {
+ // this plan won, so set data for response broadly
+ void finish( bool stop ) {
if ( _pq.isExplain() ) {
_n = _inMemSort ? _so->size() : _n;
}
@@ -586,121 +782,174 @@ namespace mongo {
if ( _c->tailable() )
_saveClientCursor = true;
- setComplete();
+ if ( _pq.isExplain()) {
+ _eb.noteScan( _c.get(), _nscanned, _nscannedObjects, _n, scanAndOrderRequired(), _curop.elapsedMillis(), useHints && !_pq.getHint().eoo() );
+ } else {
+ _response.appendData( _buf.buf(), _buf.len() );
+ _buf.decouple();
+ }
+ if ( stop ) {
+ setStop();
+ } else {
+ setComplete();
+ }
+
+ }
+
+ void finishExplain( const BSONObj &suffix ) {
+ BSONObj obj = _eb.finishWithSuffix( nscanned(), nscannedObjects(), n(), _curop.elapsedMillis(), suffix);
+ fillQueryResultFromObj(_buf, 0, obj);
+ _n = 1;
+ _oldN = 0;
+ _response.appendData( _buf.buf(), _buf.len() );
+ _buf.decouple();
}
virtual bool mayRecordPlan() const { return _pq.getNumToReturn() != 1; }
- virtual QueryOp *clone() const {
- return new UserQueryOp( _pq );
+ virtual QueryOp *_createChild() const {
+ if ( _pq.isExplain() ) {
+ _eb.ensureStartScan();
+ }
+ UserQueryOp *ret = new UserQueryOp( _pq, _response, _eb, _curop );
+ ret->_oldN = n();
+ ret->_oldNscanned = nscanned();
+ ret->_oldNscannedObjects = nscannedObjects();
+ ret->_ntoskip = _ntoskip;
+ return ret;
}
- BufBuilder &builder() { return _buf; }
bool scanAndOrderRequired() const { return _inMemSort; }
- auto_ptr< Cursor > cursor() { return _c; }
- auto_ptr< CoveredIndexMatcher > matcher() { return _matcher; }
- int n() const { return _n; }
- long long nscanned() const { return _nscanned; }
- long long nscannedObjects() const { return _nscannedObjects; }
+ shared_ptr<Cursor> cursor() { return _c; }
+ int n() const { return _oldN + _n; }
+ long long nscanned() const { return _nscanned + _oldNscanned; }
+ long long nscannedObjects() const { return _nscannedObjects + _oldNscannedObjects; }
bool saveClientCursor() const { return _saveClientCursor; }
+ bool wouldSaveClientCursor() const { return _wouldSaveClientCursor; }
+
+ void finishForOplogReplay( ClientCursor * cc ){
+ if ( _oplogReplay && ! _slaveReadTill.isNull() )
+ cc->_slaveReadTill = _slaveReadTill;
+ }
private:
BufBuilder _buf;
const ParsedQuery& _pq;
long long _ntoskip;
long long _nscanned;
+ long long _oldNscanned;
long long _nscannedObjects;
+ long long _oldNscannedObjects;
int _n; // found so far
+ int _oldN;
MatchDetails _details;
+ ChunkMatcherPtr _chunkMatcher;
+
bool _inMemSort;
auto_ptr< ScanAndOrder > _so;
- auto_ptr< Cursor > _c;
-
- auto_ptr< CoveredIndexMatcher > _matcher;
+ shared_ptr<Cursor> _c;
+ ClientCursor::CleanupPointer _cc;
+ ClientCursor::YieldData _yieldData;
bool _saveClientCursor;
+ bool _wouldSaveClientCursor;
bool _oplogReplay;
auto_ptr< FindingStartCursor > _findingStartCursor;
+
+ Message &_response;
+ ExplainBuilder &_eb;
+ CurOp &_curop;
+ OpTime _slaveReadTill;
};
/* run a query -- includes checking for and running a Command */
- auto_ptr< QueryResult > runQuery(Message& m, QueryMessage& q, CurOp& curop ) {
+ const char *runQuery(Message& m, QueryMessage& q, CurOp& curop, Message &result) {
StringBuilder& ss = curop.debug().str;
- ParsedQuery pq( q );
- const char *ns = q.ns;
+ shared_ptr<ParsedQuery> pq_shared( new ParsedQuery(q) );
+ ParsedQuery& pq( *pq_shared );
int ntoskip = q.ntoskip;
BSONObj jsobj = q.query;
int queryOptions = q.queryOptions;
- BSONObj snapshotHint;
+ const char *ns = q.ns;
if( logLevel >= 2 )
- log() << "runQuery: " << ns << jsobj << endl;
+ log() << "query: " << ns << jsobj << endl;
- long long nscanned = 0;
- ss << ns << " ntoreturn:" << pq.getNumToReturn();
+ ss << ns;
+ {
+ // only say ntoreturn if nonzero.
+ int n = pq.getNumToReturn();
+ if( n )
+ ss << " ntoreturn:" << n;
+ }
curop.setQuery(jsobj);
- BSONObjBuilder cmdResBuf;
- long long cursorid = 0;
-
- auto_ptr< QueryResult > qr;
- int n = 0;
-
- Client& c = cc();
-
- if ( pq.couldBeCommand() ){
+ if ( pq.couldBeCommand() ) {
BufBuilder bb;
bb.skip(sizeof(QueryResult));
-
+ BSONObjBuilder cmdResBuf;
if ( runCommands(ns, jsobj, curop, bb, cmdResBuf, false, queryOptions) ) {
- ss << " command ";
+ ss << " command: " << jsobj.toString();
curop.markCommand();
- n = 1;
+ auto_ptr< QueryResult > qr;
qr.reset( (QueryResult *) bb.buf() );
bb.decouple();
qr->setResultFlagsToOk();
qr->len = bb.len();
ss << " reslen:" << bb.len();
- // qr->channel = 0;
qr->setOperation(opReply);
- qr->cursorId = cursorid;
+ qr->cursorId = 0;
qr->startingFrom = 0;
- qr->nReturned = n;
+ qr->nReturned = 1;
+ result.setData( qr.release(), true );
}
- return qr;
+ return false;
}
- // regular query
-
- mongolock lk(false); // read lock
- Client::Context ctx( ns , dbpath , &lk );
-
- /* we allow queries to SimpleSlave's -- but not to the slave (nonmaster) member of a replica pair
- so that queries to a pair are realtime consistent as much as possible. use setSlaveOk() to
- query the nonmaster member of a replica pair.
- */
- uassert( 10107 , "not master" , isMaster() || pq.hasOption( QueryOption_SlaveOk ) || replSettings.slave == SimpleSlave );
+ /* --- regular query --- */
+ int n = 0;
BSONElement hint = useHints ? pq.getHint() : BSONElement();
bool explain = pq.isExplain();
bool snapshot = pq.isSnapshot();
- BSONObj query = pq.getFilter();
BSONObj order = pq.getOrder();
+ BSONObj query = pq.getFilter();
+
+ /* The ElemIter will not be happy if this isn't really an object. So throw exception
+ here when that is true.
+ (Which may indicate bad data from client.)
+ */
+ if ( query.objsize() == 0 ) {
+ out() << "Bad query object?\n jsobj:";
+ out() << jsobj.toString() << "\n query:";
+ out() << query.toString() << endl;
+ uassert( 10110 , "bad query object", false);
+ }
+
+ /* --- read lock --- */
+
+ mongolock lk(false);
+
+ Client::Context ctx( ns , dbpath , &lk );
+
+ replVerifyReadsOk(pq);
if ( pq.hasOption( QueryOption_CursorTailable ) ) {
NamespaceDetails *d = nsdetails( ns );
uassert( 13051, "tailable cursor requested on non capped collection", d && d->capped );
+ const BSONObj nat1 = BSON( "$natural" << 1 );
if ( order.isEmpty() ) {
- order = BSON( "$natural" << 1 );
+ order = nat1;
} else {
- uassert( 13052, "only {$natural:1} order allowed for tailable cursor", order == BSON( "$natural" << 1 ) );
+ uassert( 13052, "only {$natural:1} order allowed for tailable cursor", order == nat1 );
}
}
+ BSONObj snapshotHint; // put here to keep the data in scope
if( snapshot ) {
NamespaceDetails *d = nsdetails(ns);
if ( d ){
@@ -722,25 +971,12 @@ namespace mongo {
}
}
- /* The ElemIter will not be happy if this isn't really an object. So throw exception
- here when that is true.
- (Which may indicate bad data from client.)
- */
- if ( query.objsize() == 0 ) {
- out() << "Bad query object?\n jsobj:";
- out() << jsobj.toString() << "\n query:";
- out() << query.toString() << endl;
- uassert( 10110 , "bad query object", false);
- }
-
-
- if ( ! explain && isSimpleIdQuery( query ) && !pq.hasOption( QueryOption_CursorTailable ) ) {
- nscanned = 1;
-
+ if ( ! (explain || pq.showDiskLoc()) && isSimpleIdQuery( query ) && !pq.hasOption( QueryOption_CursorTailable ) ) {
bool nsFound = false;
bool indexFound = false;
BSONObj resObject;
+ Client& c = cc();
bool found = Helpers::findById( c, ns , query , resObject , &nsFound , &indexFound );
if ( nsFound == false || indexFound == true ){
BufBuilder bb(sizeof(QueryResult)+resObject.objsize()+32);
@@ -751,16 +987,18 @@ namespace mongo {
n = 1;
fillQueryResultFromObj( bb , pq.getFields() , resObject );
}
+ auto_ptr< QueryResult > qr;
qr.reset( (QueryResult *) bb.buf() );
bb.decouple();
qr->setResultFlagsToOk();
qr->len = bb.len();
ss << " reslen:" << bb.len();
qr->setOperation(opReply);
- qr->cursorId = cursorid;
+ qr->cursorId = 0;
qr->startingFrom = 0;
- qr->nReturned = n;
- return qr;
+ qr->nReturned = n;
+ result.setData( qr.release(), true );
+ return false;
}
}
@@ -768,67 +1006,73 @@ namespace mongo {
BSONObj oldPlan;
if ( explain && ! pq.hasIndexSpecifier() ){
- QueryPlanSet qps( ns, query, order );
- if ( qps.usingPrerecordedPlan() )
- oldPlan = qps.explain();
+ MultiPlanScanner mps( ns, query, order );
+ if ( mps.usingPrerecordedPlan() )
+ oldPlan = mps.oldExplain();
+ }
+ auto_ptr< MultiPlanScanner > mps( new MultiPlanScanner( ns, query, order, &hint, !explain, pq.getMin(), pq.getMax(), false, true ) );
+ BSONObj explainSuffix;
+ if ( explain ) {
+ BSONObjBuilder bb;
+ if ( !oldPlan.isEmpty() )
+ bb.append( "oldPlan", oldPlan.firstElement().embeddedObject().firstElement().embeddedObject() );
+ explainSuffix = bb.obj();
}
- QueryPlanSet qps( ns, query, order, &hint, !explain, pq.getMin(), pq.getMax() );
- UserQueryOp original( pq );
- shared_ptr< UserQueryOp > o = qps.runOp( original );
+ ExplainBuilder eb;
+ UserQueryOp original( pq, result, eb, curop );
+ shared_ptr< UserQueryOp > o = mps->runOp( original );
UserQueryOp &dqo = *o;
- massert( 10362 , dqo.exceptionMessage(), dqo.complete() );
+ if ( ! dqo.complete() )
+ throw MsgAssertionException( dqo.exception() );
+ if ( explain ) {
+ dqo.finishExplain( explainSuffix );
+ }
n = dqo.n();
- nscanned = dqo.nscanned();
+ long long nscanned = dqo.nscanned();
if ( dqo.scanAndOrderRequired() )
ss << " scanAndOrder ";
- auto_ptr<Cursor> cursor = dqo.cursor();
- log( 5 ) << " used cursor: " << cursor.get() << endl;
- if ( dqo.saveClientCursor() ) {
- // the clientcursor now owns the Cursor* and 'c' is released:
- ClientCursor *cc = new ClientCursor(cursor, ns, !(queryOptions & QueryOption_NoCursorTimeout));
+ shared_ptr<Cursor> cursor = dqo.cursor();
+ if( logLevel >= 5 )
+ log() << " used cursor: " << cursor.get() << endl;
+ long long cursorid = 0;
+ const char * exhaust = 0;
+ if ( dqo.saveClientCursor() || ( dqo.wouldSaveClientCursor() && mps->mayRunMore() ) ) {
+ ClientCursor *cc;
+ bool moreClauses = mps->mayRunMore();
+ if ( moreClauses ) {
+ // this MultiCursor will use a dumb NoOp to advance(), so no need to specify mayYield
+ shared_ptr< Cursor > multi( new MultiCursor( mps, cursor, dqo.matcher(), dqo ) );
+ cc = new ClientCursor(queryOptions, multi, ns);
+ } else {
+ cursor->setMatcher( dqo.matcher() );
+ cc = new ClientCursor( queryOptions, cursor, ns );
+ }
cursorid = cc->cursorid;
cc->query = jsobj.getOwned();
- DEV out() << " query has more, cursorid: " << cursorid << endl;
- cc->matcher = dqo.matcher();
+ DEV tlog() << "query has more, cursorid: " << cursorid << endl;
cc->pos = n;
+ cc->pq = pq_shared;
cc->fields = pq.getFieldPtr();
cc->originalMessage = m;
cc->updateLocation();
- if ( !cc->c->ok() && cc->c->tailable() ) {
- DEV out() << " query has no more but tailable, cursorid: " << cursorid << endl;
- } else {
- DEV out() << " query has more, cursorid: " << cursorid << endl;
+ if ( !cc->c->ok() && cc->c->tailable() )
+ DEV tlog() << "query has no more but tailable, cursorid: " << cursorid << endl;
+ if( queryOptions & QueryOption_Exhaust ) {
+ exhaust = ns;
+ ss << " exhaust ";
}
+ dqo.finishForOplogReplay(cc);
}
- if ( explain ) {
- BSONObjBuilder builder;
- builder.append("cursor", cursor->toString());
- builder.appendArray("indexBounds", cursor->prettyIndexBounds());
- builder.appendNumber("nscanned", dqo.nscanned() );
- builder.appendNumber("nscannedObjects", dqo.nscannedObjects() );
- builder.append("n", n);
- if ( dqo.scanAndOrderRequired() )
- builder.append("scanAndOrder", true);
- builder.append("millis", curop.elapsedMillis());
- if ( !oldPlan.isEmpty() )
- builder.append( "oldPlan", oldPlan.firstElement().embeddedObject().firstElement().embeddedObject() );
- if ( hint.eoo() )
- builder.appendElements(qps.explain());
- BSONObj obj = builder.done();
- fillQueryResultFromObj(dqo.builder(), 0, obj);
- n = 1;
- }
- qr.reset( (QueryResult *) dqo.builder().buf() );
- dqo.builder().decouple();
+
+ QueryResult *qr = (QueryResult *) result.header();
qr->cursorId = cursorid;
qr->setResultFlagsToOk();
- qr->len = dqo.builder().len();
+ // qr->len is updated automatically by appendData()
ss << " reslen:" << qr->len;
qr->setOperation(opReply);
qr->startingFrom = 0;
qr->nReturned = n;
-
int duration = curop.elapsedMillis();
bool dbprofile = curop.shouldDBProfile( duration );
if ( dbprofile || duration >= cmdLine.slowMS ) {
@@ -837,10 +1081,10 @@ namespace mongo {
ss << " ntoskip:" << ntoskip;
if ( dbprofile )
ss << " \nquery: ";
- ss << jsobj << ' ';
+ ss << jsobj.toString() << ' ';
}
ss << " nreturned:" << n;
- return qr;
+ return exhaust;
}
} // namespace mongo