diff options
author | Antonin Kral <a.kral@bobek.cz> | 2011-09-14 17:08:06 +0200 |
---|---|---|
committer | Antonin Kral <a.kral@bobek.cz> | 2011-09-14 17:08:06 +0200 |
commit | 5d342a758c6095b4d30aba0750b54f13b8916f51 (patch) | |
tree | 762e9aa84781f5e3b96db2c02d356c29cf0217c0 /db/ops | |
parent | cbe2d992e9cd1ea66af9fa91df006106775d3073 (diff) | |
download | mongodb-5d342a758c6095b4d30aba0750b54f13b8916f51.tar.gz |
Imported Upstream version 2.0.0
Diffstat (limited to 'db/ops')
-rw-r--r-- | db/ops/delete.cpp | 242 | ||||
-rw-r--r-- | db/ops/delete.h | 33 | ||||
-rw-r--r-- | db/ops/query.cpp | 1020 | ||||
-rw-r--r-- | db/ops/query.h | 250 | ||||
-rw-r--r-- | db/ops/update.cpp | 1369 | ||||
-rw-r--r-- | db/ops/update.h | 681 |
6 files changed, 3595 insertions, 0 deletions
diff --git a/db/ops/delete.cpp b/db/ops/delete.cpp new file mode 100644 index 0000000..3009047 --- /dev/null +++ b/db/ops/delete.cpp @@ -0,0 +1,242 @@ +// delete.cpp + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "pch.h" +#include "delete.h" +#include "../queryoptimizer.h" +#include "../oplog.h" + +namespace mongo { + + // Just try to identify best plan. + class DeleteOp : public MultiCursor::CursorOp { + public: + DeleteOp( bool justOne, int& bestCount, int orClauseIndex = -1 ) : + justOne_( justOne ), + count_(), + bestCount_( bestCount ), + _nscanned(), + _orClauseIndex( orClauseIndex ) { + } + virtual void _init() { + c_ = qp().newCursor(); + } + virtual bool prepareToYield() { + if ( _orClauseIndex > 0 ) { + return false; + } + if ( ! _cc ) { + _cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , c_ , qp().ns() ) ); + } + return _cc->prepareToYield( _yieldData ); + } + virtual void recoverFromYield() { + if ( !ClientCursor::recoverFromYield( _yieldData ) ) { + _cc.reset(); + c_.reset(); + massert( 13340, "cursor dropped during delete", false ); + } + } + virtual long long nscanned() { + return c_.get() ? c_->nscanned() : _nscanned; + } + virtual void next() { + if ( !c_->ok() ) { + setComplete(); + return; + } + + DiskLoc rloc = c_->currLoc(); + + if ( matcher( c_ )->matchesCurrent(c_.get()) ) { + if ( !c_->getsetdup(rloc) ) + ++count_; + } + + c_->advance(); + _nscanned = c_->nscanned(); + + if ( _orClauseIndex > 0 && _nscanned >= 100 ) { + setComplete(); + return; + } + + if ( count_ > bestCount_ ) + bestCount_ = count_; + + if ( count_ > 0 ) { + if ( justOne_ ) + setComplete(); + else if ( _nscanned >= 100 && count_ == bestCount_ ) + setComplete(); + } + } + virtual bool mayRecordPlan() const { return !justOne_; } + virtual QueryOp *_createChild() const { + bestCount_ = 0; // should be safe to reset this in contexts where createChild() is called + return new DeleteOp( justOne_, bestCount_, _orClauseIndex + 1 ); + } + virtual shared_ptr<Cursor> newCursor() const { return qp().newCursor(); } + private: + bool justOne_; + int count_; + int &bestCount_; + long long _nscanned; + shared_ptr<Cursor> c_; + ClientCursor::CleanupPointer _cc; + ClientCursor::YieldData _yieldData; + // Avoid yielding in the MultiPlanScanner when not the first $or clause - just a temporary implementaiton for now. SERVER-3555 + int _orClauseIndex; + }; + + /* ns: namespace, e.g. <database>.<collection> + pattern: the "where" clause / criteria + justOne: stop after 1 match + god: allow access to system namespaces, and don't yield + */ + long long deleteObjects(const char *ns, BSONObj pattern, bool justOneOrig, bool logop, bool god, RemoveSaver * rs ) { + if( !god ) { + if ( strstr(ns, ".system.") ) { + /* note a delete from system.indexes would corrupt the db + if done here, as there are pointers into those objects in + NamespaceDetails. + */ + uassert(12050, "cannot delete from system namespace", legalClientSystemNS( ns , true ) ); + } + if ( strchr( ns , '$' ) ) { + log() << "cannot delete from collection with reserved $ in name: " << ns << endl; + uassert( 10100 , "cannot delete from collection with reserved $ in name", strchr(ns, '$') == 0 ); + } + } + + { + NamespaceDetails *d = nsdetails( ns ); + if ( ! d ) + return 0; + uassert( 10101 , "can't remove from a capped collection" , ! d->capped ); + } + + long long nDeleted = 0; + + int best = 0; + shared_ptr< MultiCursor::CursorOp > opPtr( new DeleteOp( justOneOrig, best ) ); + shared_ptr< MultiCursor > creal( new MultiCursor( ns, pattern, BSONObj(), opPtr, !god ) ); + + if( !creal->ok() ) + return nDeleted; + + shared_ptr< Cursor > cPtr = creal; + auto_ptr<ClientCursor> cc( new ClientCursor( QueryOption_NoCursorTimeout, cPtr, ns) ); + cc->setDoingDeletes( true ); + + CursorId id = cc->cursorid(); + + bool justOne = justOneOrig; + bool canYield = !god && !creal->matcher()->docMatcher().atomic(); + + do { + // TODO: we can generalize this I believe + // + bool willNeedRecord = creal->matcher()->needRecord() || pattern.isEmpty() || isSimpleIdQuery( pattern ); + if ( ! willNeedRecord ) { + // TODO: this is a total hack right now + // check if the index full encompasses query + + if ( pattern.nFields() == 1 && + str::equals( pattern.firstElement().fieldName() , creal->indexKeyPattern().firstElement().fieldName() ) ) + willNeedRecord = true; + } + + if ( canYield && ! cc->yieldSometimes( willNeedRecord ? ClientCursor::WillNeed : ClientCursor::MaybeCovered ) ) { + cc.release(); // has already been deleted elsewhere + // TODO should we assert or something? + break; + } + if ( !cc->ok() ) { + break; // if we yielded, could have hit the end + } + + // this way we can avoid calling updateLocation() every time (expensive) + // as well as some other nuances handled + cc->setDoingDeletes( true ); + + DiskLoc rloc = cc->currLoc(); + BSONObj key = cc->currKey(); + + // NOTE Calling advance() may change the matcher, so it's important + // to try to match first. + bool match = creal->matcher()->matchesCurrent(creal.get()); + + if ( ! cc->advance() ) + justOne = true; + + if ( ! match ) + continue; + + assert( !cc->c()->getsetdup(rloc) ); // can't be a dup, we deleted it! + + if ( !justOne ) { + /* NOTE: this is SLOW. this is not good, noteLocation() was designed to be called across getMore + blocks. here we might call millions of times which would be bad. + */ + cc->c()->noteLocation(); + } + + if ( logop ) { + BSONElement e; + if( BSONObj( rloc.rec() ).getObjectID( e ) ) { + BSONObjBuilder b; + b.append( e ); + bool replJustOne = true; + logOp( "d", ns, b.done(), 0, &replJustOne ); + } + else { + problem() << "deleted object without id, not logging" << endl; + } + } + + if ( rs ) + rs->goingToDelete( rloc.obj() /*cc->c->current()*/ ); + + theDataFileMgr.deleteRecord(ns, rloc.rec(), rloc); + nDeleted++; + if ( justOne ) { + break; + } + cc->c()->checkLocation(); + + if( !god ) + getDur().commitIfNeeded(); + + if( debug && god && nDeleted == 100 ) + log() << "warning high number of deletes with god=true which could use significant memory" << endl; + } + while ( cc->ok() ); + + if ( cc.get() && ClientCursor::find( id , false ) == 0 ) { + // TODO: remove this and the id declaration above if this doesn't trigger + // if it does, then i'm very confused (ERH 06/2011) + error() << "this should be impossible" << endl; + printStackTrace(); + cc.release(); + } + + return nDeleted; + } + +} diff --git a/db/ops/delete.h b/db/ops/delete.h new file mode 100644 index 0000000..a74b7a6 --- /dev/null +++ b/db/ops/delete.h @@ -0,0 +1,33 @@ +// delete.h + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#pragma once + +#include "../../pch.h" +#include "../jsobj.h" +#include "../clientcursor.h" + +namespace mongo { + + class RemoveSaver; + + // If justOne is true, deletedId is set to the id of the deleted object. + long long deleteObjects(const char *ns, BSONObj pattern, bool justOne, bool logop = false, bool god=false, RemoveSaver * rs=0); + + +} diff --git a/db/ops/query.cpp b/db/ops/query.cpp new file mode 100644 index 0000000..cf4dc98 --- /dev/null +++ b/db/ops/query.cpp @@ -0,0 +1,1020 @@ +// query.cpp + +/** + * Copyright (C) 2008 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "pch.h" +#include "query.h" +#include "../pdfile.h" +#include "../jsobjmanipulator.h" +#include "../../bson/util/builder.h" +#include <time.h> +#include "../introspect.h" +#include "../btree.h" +#include "../../util/lruishmap.h" +#include "../json.h" +#include "../repl.h" +#include "../replutil.h" +#include "../scanandorder.h" +#include "../security.h" +#include "../curop-inl.h" +#include "../commands.h" +#include "../queryoptimizer.h" +#include "../lasterror.h" +#include "../../s/d_logic.h" +#include "../repl_block.h" +#include "../../server.h" + +namespace mongo { + + /* We cut off further objects once we cross this threshold; thus, you might get + a little bit more than this, it is a threshold rather than a limit. + */ + const int MaxBytesToReturnToClientAtOnce = 4 * 1024 * 1024; + + //ns->query->DiskLoc +// LRUishMap<BSONObj,DiskLoc,5> lrutest(123); + + extern bool useCursors; + extern bool useHints; + + bool runCommands(const char *ns, BSONObj& jsobj, CurOp& curop, BufBuilder &b, BSONObjBuilder& anObjBuilder, bool fromRepl, int queryOptions) { + try { + return _runCommands(ns, jsobj, b, anObjBuilder, fromRepl, queryOptions); + } + catch ( AssertionException& e ) { + e.getInfo().append( anObjBuilder , "assertion" , "assertionCode" ); + curop.debug().exceptionInfo = e.getInfo(); + } + anObjBuilder.append("errmsg", "db assertion failure"); + anObjBuilder.append("ok", 0.0); + BSONObj x = anObjBuilder.done(); + b.appendBuf((void*) x.objdata(), x.objsize()); + return true; + } + + + BSONObj id_obj = fromjson("{\"_id\":1}"); + BSONObj empty_obj = fromjson("{}"); + + + //int dump = 0; + + /* empty result for error conditions */ + QueryResult* emptyMoreResult(long long cursorid) { + BufBuilder b(32768); + b.skip(sizeof(QueryResult)); + QueryResult *qr = (QueryResult *) b.buf(); + qr->cursorId = 0; // 0 indicates no more data to retrieve. + qr->startingFrom = 0; + qr->len = b.len(); + qr->setOperation(opReply); + qr->initializeResultFlags(); + qr->nReturned = 0; + b.decouple(); + return qr; + } + + QueryResult* processGetMore(const char *ns, int ntoreturn, long long cursorid , CurOp& curop, int pass, bool& exhaust ) { + exhaust = false; + ClientCursor::Pointer p(cursorid); + ClientCursor *cc = p.c(); + + int bufSize = 512 + sizeof( QueryResult ) + MaxBytesToReturnToClientAtOnce; + + BufBuilder b( bufSize ); + b.skip(sizeof(QueryResult)); + int resultFlags = ResultFlag_AwaitCapable; + int start = 0; + int n = 0; + + if ( unlikely(!cc) ) { + log() << "getMore: cursorid not found " << ns << " " << cursorid << endl; + cursorid = 0; + resultFlags = ResultFlag_CursorNotFound; + } + else { + // check for spoofing of the ns such that it does not match the one originally there for the cursor + uassert(14833, "auth error", str::equals(ns, cc->ns().c_str())); + + if ( pass == 0 ) + cc->updateSlaveLocation( curop ); + + int queryOptions = cc->queryOptions(); + + curop.debug().query = cc->query(); + + start = cc->pos(); + Cursor *c = cc->c(); + c->checkLocation(); + DiskLoc last; + + scoped_ptr<Projection::KeyOnly> keyFieldsOnly; + if ( cc->modifiedKeys() == false && cc->isMultiKey() == false && cc->fields ) + keyFieldsOnly.reset( cc->fields->checkKey( cc->indexKeyPattern() ) ); + + // This manager may be stale, but it's the state of chunking when the cursor was created. + ShardChunkManagerPtr manager = cc->getChunkManager(); + + while ( 1 ) { + if ( !c->ok() ) { + if ( c->tailable() ) { + /* when a tailable cursor hits "EOF", ok() goes false, and current() is null. however + advance() can still be retries as a reactivation attempt. when there is new data, it will + return true. that's what we are doing here. + */ + if ( c->advance() ) + continue; + + if( n == 0 && (queryOptions & QueryOption_AwaitData) && pass < 1000 ) { + return 0; + } + + break; + } + p.release(); + bool ok = ClientCursor::erase(cursorid); + assert(ok); + cursorid = 0; + cc = 0; + break; + } + + // in some cases (clone collection) there won't be a matcher + if ( c->matcher() && !c->matcher()->matchesCurrent( c ) ) { + } + else if ( manager && ! manager->belongsToMe( cc ) ){ + LOG(2) << "cursor skipping document in un-owned chunk: " << c->current() << endl; + } + else { + if( c->getsetdup(c->currLoc()) ) { + //out() << " but it's a dup \n"; + } + else { + last = c->currLoc(); + n++; + + if ( keyFieldsOnly ) { + fillQueryResultFromObj(b, 0, keyFieldsOnly->hydrate( c->currKey() ) ); + } + else { + BSONObj js = c->current(); + // show disk loc should be part of the main query, not in an $or clause, so this should be ok + fillQueryResultFromObj(b, cc->fields.get(), js, ( cc->pq.get() && cc->pq->showDiskLoc() ? &last : 0)); + } + + if ( ( ntoreturn && n >= ntoreturn ) || b.len() > MaxBytesToReturnToClientAtOnce ) { + c->advance(); + cc->incPos( n ); + break; + } + } + } + c->advance(); + + if ( ! cc->yieldSometimes( ClientCursor::MaybeCovered ) ) { + ClientCursor::erase(cursorid); + cursorid = 0; + cc = 0; + p.deleted(); + break; + } + } + + if ( cc ) { + cc->updateLocation(); + cc->mayUpgradeStorage(); + cc->storeOpForSlave( last ); + exhaust = cc->queryOptions() & QueryOption_Exhaust; + } + } + + QueryResult *qr = (QueryResult *) b.buf(); + qr->len = b.len(); + qr->setOperation(opReply); + qr->_resultFlags() = resultFlags; + qr->cursorId = cursorid; + qr->startingFrom = start; + qr->nReturned = n; + b.decouple(); + + return qr; + } + + class CountOp : public QueryOp { + public: + CountOp( const string& ns , const BSONObj &spec ) : + _ns(ns), _capped(false), _count(), _myCount(), + _skip( spec["skip"].numberLong() ), + _limit( spec["limit"].numberLong() ), + _nscanned(), + _bc() { + } + + virtual void _init() { + _c = qp().newCursor(); + _capped = _c->capped(); + if ( qp().exactKeyMatch() && ! matcher( _c )->needRecord() ) { + _query = qp().simplifiedQuery( qp().indexKey() ); + _bc = dynamic_cast< BtreeCursor* >( _c.get() ); + _bc->forgetEndKey(); + } + } + + virtual long long nscanned() { + return _c.get() ? _c->nscanned() : _nscanned; + } + + virtual bool prepareToYield() { + if ( _c && !_cc ) { + _cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , _c , _ns.c_str() ) ); + } + if ( _cc ) { + return _cc->prepareToYield( _yieldData ); + } + // no active cursor - ok to yield + return true; + } + + virtual void recoverFromYield() { + if ( _cc && !ClientCursor::recoverFromYield( _yieldData ) ) { + _c.reset(); + _cc.reset(); + + if ( _capped ) { + msgassertedNoTrace( 13337, str::stream() << "capped cursor overrun during count: " << _ns ); + } + else if ( qp().mustAssertOnYieldFailure() ) { + msgassertedNoTrace( 15891, str::stream() << "CountOp::recoverFromYield() failed to recover: " << _ns ); + } + else { + // we don't fail query since we're fine with returning partial data if collection dropped + } + } + } + + virtual void next() { + if ( ! _c || !_c->ok() ) { + setComplete(); + return; + } + + _nscanned = _c->nscanned(); + if ( _bc ) { + if ( _firstMatch.isEmpty() ) { + _firstMatch = _bc->currKey().getOwned(); + // if not match + if ( _query.woCompare( _firstMatch, BSONObj(), false ) ) { + setComplete(); + return; + } + _gotOne(); + } + else { + if ( ! _firstMatch.equal( _bc->currKey() ) ) { + setComplete(); + return; + } + _gotOne(); + } + } + else { + if ( !matcher( _c )->matchesCurrent( _c.get() ) ) { + } + else if( !_c->getsetdup(_c->currLoc()) ) { + _gotOne(); + } + } + _c->advance(); + } + virtual QueryOp *_createChild() const { + CountOp *ret = new CountOp( _ns , BSONObj() ); + ret->_count = _count; + ret->_skip = _skip; + ret->_limit = _limit; + return ret; + } + long long count() const { return _count; } + virtual bool mayRecordPlan() const { + return ( _myCount > _limit / 2 ) || ( complete() && !stopRequested() ); + } + private: + + void _gotOne() { + if ( _skip ) { + _skip--; + return; + } + + if ( _limit > 0 && _count >= _limit ) { + setStop(); + return; + } + + _count++; + _myCount++; + } + + string _ns; + bool _capped; + + long long _count; + long long _myCount; + long long _skip; + long long _limit; + long long _nscanned; + shared_ptr<Cursor> _c; + BSONObj _query; + BtreeCursor * _bc; + BSONObj _firstMatch; + + ClientCursor::CleanupPointer _cc; + ClientCursor::YieldData _yieldData; + }; + + /* { count: "collectionname"[, query: <query>] } + returns -1 on ns does not exist error. + */ + long long runCount( const char *ns, const BSONObj &cmd, string &err ) { + Client::Context cx(ns); + NamespaceDetails *d = nsdetails( ns ); + if ( !d ) { + err = "ns missing"; + return -1; + } + BSONObj query = cmd.getObjectField("query"); + + // count of all objects + if ( query.isEmpty() ) { + return applySkipLimit( d->stats.nrecords , cmd ); + } + MultiPlanScanner mps( ns, query, BSONObj(), 0, true, BSONObj(), BSONObj(), false, true ); + CountOp original( ns , cmd ); + shared_ptr< CountOp > res = mps.runOp( original ); + if ( !res->complete() ) { + log() << "Count with ns: " << ns << " and query: " << query + << " failed with exception: " << res->exception() + << endl; + return 0; + } + return res->count(); + } + + class ExplainBuilder { + // Note: by default we filter out allPlans and oldPlan in the shell's + // explain() function. If you add any recursive structures, make sure to + // edit the JS to make sure everything gets filtered. + public: + ExplainBuilder() : _i() {} + void ensureStartScan() { + if ( !_a.get() ) { + _a.reset( new BSONArrayBuilder() ); + } + } + void noteCursor( Cursor *c ) { + BSONObjBuilder b( _a->subobjStart() ); + b << "cursor" << c->toString() << "indexBounds" << c->prettyIndexBounds(); + b.done(); + } + void noteScan( Cursor *c, long long nscanned, long long nscannedObjects, int n, bool scanAndOrder, + int millis, bool hint, int nYields , int nChunkSkips , bool indexOnly ) { + if ( _i == 1 ) { + _c.reset( new BSONArrayBuilder() ); + *_c << _b->obj(); + } + if ( _i == 0 ) { + _b.reset( new BSONObjBuilder() ); + } + else { + _b.reset( new BSONObjBuilder( _c->subobjStart() ) ); + } + *_b << "cursor" << c->toString(); + _b->appendNumber( "nscanned", nscanned ); + _b->appendNumber( "nscannedObjects", nscannedObjects ); + *_b << "n" << n; + + if ( scanAndOrder ) + *_b << "scanAndOrder" << true; + + *_b << "millis" << millis; + + *_b << "nYields" << nYields; + *_b << "nChunkSkips" << nChunkSkips; + *_b << "isMultiKey" << c->isMultiKey(); + *_b << "indexOnly" << indexOnly; + + *_b << "indexBounds" << c->prettyIndexBounds(); + + c->explainDetails( *_b ); + + if ( !hint ) { + *_b << "allPlans" << _a->arr(); + } + if ( _i != 0 ) { + _b->done(); + } + _a.reset( 0 ); + ++_i; + } + BSONObj finishWithSuffix( long long nscanned, long long nscannedObjects, int n, int millis, const BSONObj &suffix ) { + if ( _i > 1 ) { + BSONObjBuilder b; + b << "clauses" << _c->arr(); + b.appendNumber( "nscanned", nscanned ); + b.appendNumber( "nscannedObjects", nscannedObjects ); + b << "n" << n; + b << "millis" << millis; + b.appendElements( suffix ); + return b.obj(); + } + else { + _b->appendElements( suffix ); + return _b->obj(); + } + } + private: + auto_ptr< BSONArrayBuilder > _a; + auto_ptr< BSONObjBuilder > _b; + auto_ptr< BSONArrayBuilder > _c; + int _i; + }; + + // Implements database 'query' requests using the query optimizer's QueryOp interface + class UserQueryOp : public QueryOp { + public: + + UserQueryOp( const ParsedQuery& pq, Message &response, ExplainBuilder &eb, CurOp &curop ) : + _buf( 32768 ) , // TODO be smarter here + _pq( pq ) , + _ntoskip( pq.getSkip() ) , + _nscanned(0), _oldNscanned(0), _nscannedObjects(0), _oldNscannedObjects(0), + _n(0), + _oldN(0), + _nYields(), + _nChunkSkips(), + _chunkManager( shardingState.needShardChunkManager(pq.ns()) ? + shardingState.getShardChunkManager(pq.ns()) : ShardChunkManagerPtr() ), + _inMemSort(false), + _capped(false), + _saveClientCursor(false), + _wouldSaveClientCursor(false), + _oplogReplay( pq.hasOption( QueryOption_OplogReplay) ), + _response( response ), + _eb( eb ), + _curop( curop ) + {} + + virtual void _init() { + // only need to put the QueryResult fields there if we're building the first buffer in the message. + if ( _response.empty() ) { + _buf.skip( sizeof( QueryResult ) ); + } + + if ( _oplogReplay ) { + _findingStartCursor.reset( new FindingStartCursor( qp() ) ); + _capped = true; + } + else { + _c = qp().newCursor( DiskLoc() , _pq.getNumToReturn() + _pq.getSkip() ); + _capped = _c->capped(); + + // setup check for if we can only use index to extract + if ( _c->modifiedKeys() == false && _c->isMultiKey() == false && _pq.getFields() ) { + _keyFieldsOnly.reset( _pq.getFields()->checkKey( _c->indexKeyPattern() ) ); + } + } + + if ( qp().scanAndOrderRequired() ) { + _inMemSort = true; + _so.reset( new ScanAndOrder( _pq.getSkip() , _pq.getNumToReturn() , _pq.getOrder(), qp().multikeyFrs() ) ); + } + + if ( _pq.isExplain() ) { + _eb.noteCursor( _c.get() ); + } + + } + + virtual bool prepareToYield() { + if ( _findingStartCursor.get() ) { + return _findingStartCursor->prepareToYield(); + } + else { + if ( _c && !_cc ) { + _cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , _c , _pq.ns() ) ); + } + if ( _cc ) { + return _cc->prepareToYield( _yieldData ); + } + } + // no active cursor - ok to yield + return true; + } + + virtual void recoverFromYield() { + _nYields++; + + if ( _findingStartCursor.get() ) { + _findingStartCursor->recoverFromYield(); + } + else if ( _cc && !ClientCursor::recoverFromYield( _yieldData ) ) { + _c.reset(); + _cc.reset(); + _so.reset(); + + if ( _capped ) { + msgassertedNoTrace( 13338, str::stream() << "capped cursor overrun during query: " << _pq.ns() ); + } + else if ( qp().mustAssertOnYieldFailure() ) { + msgassertedNoTrace( 15890, str::stream() << "UserQueryOp::recoverFromYield() failed to recover: " << _pq.ns() ); + } + else { + // we don't fail query since we're fine with returning partial data if collection dropped + + // todo: this is wrong. the cursor could be gone if closeAllDatabases command just ran + } + + } + } + + virtual long long nscanned() { + if ( _findingStartCursor.get() ) { + return 0; // should only be one query plan, so value doesn't really matter. + } + return _c.get() ? _c->nscanned() : _nscanned; + } + + virtual void next() { + if ( _findingStartCursor.get() ) { + if ( !_findingStartCursor->done() ) { + _findingStartCursor->next(); + } + if ( _findingStartCursor->done() ) { + _c = _findingStartCursor->cursor(); + _findingStartCursor.reset( 0 ); + } + _capped = true; + return; + } + + if ( !_c || !_c->ok() ) { + finish( false ); + return; + } + + bool mayCreateCursor1 = _pq.wantMore() && ! _inMemSort && _pq.getNumToReturn() != 1 && useCursors; + + if( 0 ) { + cout << "SCANNING this: " << this << " key: " << _c->currKey() << " obj: " << _c->current() << endl; + } + + if ( _pq.getMaxScan() && _nscanned >= _pq.getMaxScan() ) { + finish( true ); //? + return; + } + + _nscanned = _c->nscanned(); + if ( !matcher( _c )->matchesCurrent(_c.get() , &_details ) ) { + // not a match, continue onward + if ( _details._loadedObject ) + _nscannedObjects++; + } + else { + _nscannedObjects++; + DiskLoc cl = _c->currLoc(); + if ( _chunkManager && ! _chunkManager->belongsToMe( cl.obj() ) ) { // TODO: should make this covered at some point + _nChunkSkips++; + // log() << "TEMP skipping un-owned chunk: " << _c->current() << endl; + } + else if( _c->getsetdup(cl) ) { + // dup + } + else { + // got a match. + + if ( _inMemSort ) { + // note: no cursors for non-indexed, ordered results. results must be fairly small. + _so->add( _pq.returnKey() ? _c->currKey() : _c->current(), _pq.showDiskLoc() ? &cl : 0 ); + } + else if ( _ntoskip > 0 ) { + _ntoskip--; + } + else { + if ( _pq.isExplain() ) { + _n++; + if ( n() >= _pq.getNumToReturn() && !_pq.wantMore() ) { + // .limit() was used, show just that much. + finish( true ); //? + return; + } + } + else { + + if ( _pq.returnKey() ) { + BSONObjBuilder bb( _buf ); + bb.appendKeys( _c->indexKeyPattern() , _c->currKey() ); + bb.done(); + } + else if ( _keyFieldsOnly ) { + fillQueryResultFromObj( _buf , 0 , _keyFieldsOnly->hydrate( _c->currKey() ) ); + } + else { + BSONObj js = _c->current(); + assert( js.isValid() ); + + if ( _oplogReplay ) { + BSONElement e = js["ts"]; + if ( e.type() == Date || e.type() == Timestamp ) + _slaveReadTill = e._opTime(); + } + + fillQueryResultFromObj( _buf , _pq.getFields() , js , (_pq.showDiskLoc() ? &cl : 0)); + } + _n++; + if ( ! _c->supportGetMore() ) { + if ( _pq.enough( n() ) || _buf.len() >= MaxBytesToReturnToClientAtOnce ) { + finish( true ); + return; + } + } + else if ( _pq.enoughForFirstBatch( n() , _buf.len() ) ) { + /* if only 1 requested, no cursor saved for efficiency...we assume it is findOne() */ + if ( mayCreateCursor1 ) { + _wouldSaveClientCursor = true; + if ( _c->advance() ) { + // more...so save a cursor + _saveClientCursor = true; + } + } + finish( true ); + return; + } + } + } + } + } + _c->advance(); + } + + // this plan won, so set data for response broadly + void finish( bool stop ) { + massert( 13638, "client cursor dropped during explain query yield", !_pq.isExplain() || _c.get() ); + + if ( _pq.isExplain() ) { + _n = _inMemSort ? _so->size() : _n; + } + else if ( _inMemSort ) { + if( _so.get() ) + _so->fill( _buf, _pq.getFields() , _n ); + } + + if ( _c.get() ) { + _nscanned = _c->nscanned(); + + if ( _pq.hasOption( QueryOption_CursorTailable ) && _pq.getNumToReturn() != 1 ) + _c->setTailable(); + + // If the tailing request succeeded. + if ( _c->tailable() ) + _saveClientCursor = true; + } + + if ( _pq.isExplain() ) { + _eb.noteScan( _c.get(), _nscanned, _nscannedObjects, _n, scanAndOrderRequired(), + _curop.elapsedMillis(), useHints && !_pq.getHint().eoo(), _nYields , + _nChunkSkips, _keyFieldsOnly.get() > 0 ); + } + else { + if ( _buf.len() ) { + _response.appendData( _buf.buf(), _buf.len() ); + _buf.decouple(); + } + } + + if ( stop ) { + setStop(); + } + else { + setComplete(); + } + + } + + void finishExplain( const BSONObj &suffix ) { + BSONObj obj = _eb.finishWithSuffix( totalNscanned(), nscannedObjects(), n(), _curop.elapsedMillis(), suffix); + fillQueryResultFromObj(_buf, 0, obj); + _n = 1; + _oldN = 0; + _response.appendData( _buf.buf(), _buf.len() ); + _buf.decouple(); + } + + virtual bool mayRecordPlan() const { + return ( _pq.getNumToReturn() != 1 ) && ( ( _n > _pq.getNumToReturn() / 2 ) || ( complete() && !stopRequested() ) ); + } + + virtual QueryOp *_createChild() const { + if ( _pq.isExplain() ) { + _eb.ensureStartScan(); + } + UserQueryOp *ret = new UserQueryOp( _pq, _response, _eb, _curop ); + ret->_oldN = n(); + ret->_oldNscanned = totalNscanned(); + ret->_oldNscannedObjects = nscannedObjects(); + ret->_ntoskip = _ntoskip; + return ret; + } + + bool scanAndOrderRequired() const { return _inMemSort; } + shared_ptr<Cursor> cursor() { return _c; } + int n() const { return _oldN + _n; } + long long totalNscanned() const { return _nscanned + _oldNscanned; } + long long nscannedObjects() const { return _nscannedObjects + _oldNscannedObjects; } + bool saveClientCursor() const { return _saveClientCursor; } + bool wouldSaveClientCursor() const { return _wouldSaveClientCursor; } + + void finishForOplogReplay( ClientCursor * cc ) { + if ( _oplogReplay && ! _slaveReadTill.isNull() ) + cc->slaveReadTill( _slaveReadTill ); + + } + + ShardChunkManagerPtr getChunkManager(){ return _chunkManager; } + + private: + BufBuilder _buf; + const ParsedQuery& _pq; + scoped_ptr<Projection::KeyOnly> _keyFieldsOnly; + + long long _ntoskip; + long long _nscanned; + long long _oldNscanned; + long long _nscannedObjects; + long long _oldNscannedObjects; + int _n; // found so far + int _oldN; + + int _nYields; + int _nChunkSkips; + + MatchDetails _details; + + ShardChunkManagerPtr _chunkManager; + + bool _inMemSort; + auto_ptr< ScanAndOrder > _so; + + shared_ptr<Cursor> _c; + ClientCursor::CleanupPointer _cc; + ClientCursor::YieldData _yieldData; + + bool _capped; + bool _saveClientCursor; + bool _wouldSaveClientCursor; + bool _oplogReplay; + auto_ptr< FindingStartCursor > _findingStartCursor; + + Message &_response; + ExplainBuilder &_eb; + CurOp &_curop; + OpTime _slaveReadTill; + }; + + /* run a query -- includes checking for and running a Command \ + @return points to ns if exhaust mode. 0=normal mode + */ + const char *runQuery(Message& m, QueryMessage& q, CurOp& curop, Message &result) { + shared_ptr<ParsedQuery> pq_shared( new ParsedQuery(q) ); + ParsedQuery& pq( *pq_shared ); + int ntoskip = q.ntoskip; + BSONObj jsobj = q.query; + int queryOptions = q.queryOptions; + const char *ns = q.ns; + + if( logLevel >= 2 ) + log() << "runQuery called " << ns << " " << jsobj << endl; + + curop.debug().ns = ns; + curop.debug().ntoreturn = pq.getNumToReturn(); + curop.setQuery(jsobj); + + if ( pq.couldBeCommand() ) { + BufBuilder bb; + bb.skip(sizeof(QueryResult)); + BSONObjBuilder cmdResBuf; + if ( runCommands(ns, jsobj, curop, bb, cmdResBuf, false, queryOptions) ) { + curop.debug().iscommand = true; + curop.debug().query = jsobj; + curop.markCommand(); + + auto_ptr< QueryResult > qr; + qr.reset( (QueryResult *) bb.buf() ); + bb.decouple(); + qr->setResultFlagsToOk(); + qr->len = bb.len(); + curop.debug().responseLength = bb.len(); + qr->setOperation(opReply); + qr->cursorId = 0; + qr->startingFrom = 0; + qr->nReturned = 1; + result.setData( qr.release(), true ); + } + else { + uasserted(13530, "bad or malformed command request?"); + } + return 0; + } + + /* --- regular query --- */ + + int n = 0; + BSONElement hint = useHints ? pq.getHint() : BSONElement(); + bool explain = pq.isExplain(); + bool snapshot = pq.isSnapshot(); + BSONObj order = pq.getOrder(); + BSONObj query = pq.getFilter(); + + /* The ElemIter will not be happy if this isn't really an object. So throw exception + here when that is true. + (Which may indicate bad data from client.) + */ + if ( query.objsize() == 0 ) { + out() << "Bad query object?\n jsobj:"; + out() << jsobj.toString() << "\n query:"; + out() << query.toString() << endl; + uassert( 10110 , "bad query object", false); + } + + /* --- read lock --- */ + + mongolock lk(false); + + Client::Context ctx( ns , dbpath , &lk ); + + replVerifyReadsOk(pq); + + if ( pq.hasOption( QueryOption_CursorTailable ) ) { + NamespaceDetails *d = nsdetails( ns ); + uassert( 13051, "tailable cursor requested on non capped collection", d && d->capped ); + const BSONObj nat1 = BSON( "$natural" << 1 ); + if ( order.isEmpty() ) { + order = nat1; + } + else { + uassert( 13052, "only {$natural:1} order allowed for tailable cursor", order == nat1 ); + } + } + + BSONObj snapshotHint; // put here to keep the data in scope + if( snapshot ) { + NamespaceDetails *d = nsdetails(ns); + if ( d ) { + int i = d->findIdIndex(); + if( i < 0 ) { + if ( strstr( ns , ".system." ) == 0 ) + log() << "warning: no _id index on $snapshot query, ns:" << ns << endl; + } + else { + /* [dm] the name of an _id index tends to vary, so we build the hint the hard way here. + probably need a better way to specify "use the _id index" as a hint. if someone is + in the query optimizer please fix this then! + */ + BSONObjBuilder b; + b.append("$hint", d->idx(i).indexName()); + snapshotHint = b.obj(); + hint = snapshotHint.firstElement(); + } + } + } + + if ( ! (explain || pq.showDiskLoc()) && isSimpleIdQuery( query ) && !pq.hasOption( QueryOption_CursorTailable ) ) { + + bool nsFound = false; + bool indexFound = false; + + BSONObj resObject; + Client& c = cc(); + bool found = Helpers::findById( c, ns , query , resObject , &nsFound , &indexFound ); + if ( nsFound == false || indexFound == true ) { + BufBuilder bb(sizeof(QueryResult)+resObject.objsize()+32); + bb.skip(sizeof(QueryResult)); + + curop.debug().idhack = true; + if ( found ) { + n = 1; + fillQueryResultFromObj( bb , pq.getFields() , resObject ); + } + auto_ptr< QueryResult > qr; + qr.reset( (QueryResult *) bb.buf() ); + bb.decouple(); + qr->setResultFlagsToOk(); + qr->len = bb.len(); + + curop.debug().responseLength = bb.len(); + qr->setOperation(opReply); + qr->cursorId = 0; + qr->startingFrom = 0; + qr->nReturned = n; + result.setData( qr.release(), true ); + return NULL; + } + } + + // regular, not QO bypass query + + BSONObj oldPlan; + if ( explain && ! pq.hasIndexSpecifier() ) { + MultiPlanScanner mps( ns, query, order ); + if ( mps.usingPrerecordedPlan() ) + oldPlan = mps.oldExplain(); + } + auto_ptr< MultiPlanScanner > mps( new MultiPlanScanner( ns, query, order, &hint, !explain, pq.getMin(), pq.getMax(), false, true ) ); + BSONObj explainSuffix; + if ( explain ) { + BSONObjBuilder bb; + if ( !oldPlan.isEmpty() ) + bb.append( "oldPlan", oldPlan.firstElement().embeddedObject().firstElement().embeddedObject() ); + explainSuffix = bb.obj(); + } + ExplainBuilder eb; + UserQueryOp original( pq, result, eb, curop ); + shared_ptr< UserQueryOp > o = mps->runOp( original ); + UserQueryOp &dqo = *o; + if ( ! dqo.complete() ) + throw MsgAssertionException( dqo.exception() ); + if ( explain ) { + dqo.finishExplain( explainSuffix ); + } + n = dqo.n(); + long long nscanned = dqo.totalNscanned(); + curop.debug().scanAndOrder = dqo.scanAndOrderRequired(); + + shared_ptr<Cursor> cursor = dqo.cursor(); + if( logLevel >= 5 ) + log() << " used cursor: " << cursor.get() << endl; + long long cursorid = 0; + const char * exhaust = 0; + if ( dqo.saveClientCursor() || ( dqo.wouldSaveClientCursor() && mps->mayRunMore() ) ) { + ClientCursor *cc; + bool moreClauses = mps->mayRunMore(); + if ( moreClauses ) { + // this MultiCursor will use a dumb NoOp to advance(), so no need to specify mayYield + shared_ptr< Cursor > multi( new MultiCursor( mps, cursor, dqo.matcher( cursor ), dqo ) ); + cc = new ClientCursor(queryOptions, multi, ns, jsobj.getOwned()); + } + else { + if( ! cursor->matcher() ) cursor->setMatcher( dqo.matcher( cursor ) ); + cc = new ClientCursor( queryOptions, cursor, ns, jsobj.getOwned() ); + } + + cc->setChunkManager( dqo.getChunkManager() ); + + cursorid = cc->cursorid(); + DEV tlog(2) << "query has more, cursorid: " << cursorid << endl; + cc->setPos( n ); + cc->pq = pq_shared; + cc->fields = pq.getFieldPtr(); + cc->originalMessage = m; + cc->updateLocation(); + if ( !cc->ok() && cc->c()->tailable() ) + DEV tlog() << "query has no more but tailable, cursorid: " << cursorid << endl; + if( queryOptions & QueryOption_Exhaust ) { + exhaust = ns; + curop.debug().exhaust = true; + } + dqo.finishForOplogReplay(cc); + } + + QueryResult *qr = (QueryResult *) result.header(); + qr->cursorId = cursorid; + qr->setResultFlagsToOk(); + // qr->len is updated automatically by appendData() + curop.debug().responseLength = qr->len; + qr->setOperation(opReply); + qr->startingFrom = 0; + qr->nReturned = n; + + int duration = curop.elapsedMillis(); + bool dbprofile = curop.shouldDBProfile( duration ); + if ( dbprofile || duration >= cmdLine.slowMS ) { + curop.debug().nscanned = (int) nscanned; + curop.debug().ntoskip = ntoskip; + } + curop.debug().nreturned = n; + return exhaust; + } + +} // namespace mongo diff --git a/db/ops/query.h b/db/ops/query.h new file mode 100644 index 0000000..ada2e90 --- /dev/null +++ b/db/ops/query.h @@ -0,0 +1,250 @@ +// query.h + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#pragma once + +#include "../../pch.h" +#include "../../util/net/message.h" +#include "../dbmessage.h" +#include "../jsobj.h" +#include "../diskloc.h" +#include "../projection.h" + +// struct QueryOptions, QueryResult, QueryResultFlags in: +#include "../../client/dbclient.h" + +namespace mongo { + + extern const int MaxBytesToReturnToClientAtOnce; + + QueryResult* processGetMore(const char *ns, int ntoreturn, long long cursorid , CurOp& op, int pass, bool& exhaust); + + long long runCount(const char *ns, const BSONObj& cmd, string& err); + + const char * runQuery(Message& m, QueryMessage& q, CurOp& curop, Message &result); + + /* This is for languages whose "objects" are not well ordered (JSON is well ordered). + [ { a : ... } , { b : ... } ] -> { a : ..., b : ... } + */ + inline BSONObj transformOrderFromArrayFormat(BSONObj order) { + /* note: this is slow, but that is ok as order will have very few pieces */ + BSONObjBuilder b; + char p[2] = "0"; + + while ( 1 ) { + BSONObj j = order.getObjectField(p); + if ( j.isEmpty() ) + break; + BSONElement e = j.firstElement(); + uassert( 10102 , "bad order array", !e.eoo()); + uassert( 10103 , "bad order array [2]", e.isNumber()); + b.append(e); + (*p)++; + uassert( 10104 , "too many ordering elements", *p <= '9'); + } + + return b.obj(); + } + + /** + * this represents a total user query + * includes fields from the query message, both possible query levels + * parses everything up front + */ + class ParsedQuery : boost::noncopyable { + public: + ParsedQuery( QueryMessage& qm ) + : _ns( qm.ns ) , _ntoskip( qm.ntoskip ) , _ntoreturn( qm.ntoreturn ) , _options( qm.queryOptions ) { + init( qm.query ); + initFields( qm.fields ); + } + ParsedQuery( const char* ns , int ntoskip , int ntoreturn , int queryoptions , const BSONObj& query , const BSONObj& fields ) + : _ns( ns ) , _ntoskip( ntoskip ) , _ntoreturn( ntoreturn ) , _options( queryoptions ) { + init( query ); + initFields( fields ); + } + + const char * ns() const { return _ns; } + bool isLocalDB() const { return strncmp(_ns, "local.", 6) == 0; } + + const BSONObj& getFilter() const { return _filter; } + Projection* getFields() const { return _fields.get(); } + shared_ptr<Projection> getFieldPtr() const { return _fields; } + + int getSkip() const { return _ntoskip; } + int getNumToReturn() const { return _ntoreturn; } + bool wantMore() const { return _wantMore; } + int getOptions() const { return _options; } + bool hasOption( int x ) const { return x & _options; } + + bool isExplain() const { return _explain; } + bool isSnapshot() const { return _snapshot; } + bool returnKey() const { return _returnKey; } + bool showDiskLoc() const { return _showDiskLoc; } + + const BSONObj& getMin() const { return _min; } + const BSONObj& getMax() const { return _max; } + const BSONObj& getOrder() const { return _order; } + const BSONElement& getHint() const { return _hint; } + int getMaxScan() const { return _maxScan; } + + bool couldBeCommand() const { + /* we assume you are using findOne() for running a cmd... */ + return _ntoreturn == 1 && strstr( _ns , ".$cmd" ); + } + + bool hasIndexSpecifier() const { + return ! _hint.eoo() || ! _min.isEmpty() || ! _max.isEmpty(); + } + + /* if ntoreturn is zero, we return up to 101 objects. on the subsequent getmore, there + is only a size limit. The idea is that on a find() where one doesn't use much results, + we don't return much, but once getmore kicks in, we start pushing significant quantities. + + The n limit (vs. size) is important when someone fetches only one small field from big + objects, which causes massive scanning server-side. + */ + bool enoughForFirstBatch( int n , int len ) const { + if ( _ntoreturn == 0 ) + return ( len > 1024 * 1024 ) || n >= 101; + return n >= _ntoreturn || len > MaxBytesToReturnToClientAtOnce; + } + + bool enough( int n ) const { + if ( _ntoreturn == 0 ) + return false; + return n >= _ntoreturn; + } + + private: + void init( const BSONObj& q ) { + _reset(); + uassert( 10105 , "bad skip value in query", _ntoskip >= 0); + + if ( _ntoreturn < 0 ) { + /* _ntoreturn greater than zero is simply a hint on how many objects to send back per + "cursor batch". + A negative number indicates a hard limit. + */ + _wantMore = false; + _ntoreturn = -_ntoreturn; + } + + + BSONElement e = q["query"]; + if ( ! e.isABSONObj() ) + e = q["$query"]; + + if ( e.isABSONObj() ) { + _filter = e.embeddedObject(); + _initTop( q ); + } + else { + _filter = q; + } + } + + void _reset() { + _wantMore = true; + _explain = false; + _snapshot = false; + _returnKey = false; + _showDiskLoc = false; + _maxScan = 0; + } + + void _initTop( const BSONObj& top ) { + BSONObjIterator i( top ); + while ( i.more() ) { + BSONElement e = i.next(); + const char * name = e.fieldName(); + + if ( strcmp( "$orderby" , name ) == 0 || + strcmp( "orderby" , name ) == 0 ) { + if ( e.type() == Object ) { + _order = e.embeddedObject(); + } + else if ( e.type() == Array ) { + _order = transformOrderFromArrayFormat( _order ); + } + else { + uasserted(13513, "sort must be an object or array"); + } + continue; + } + + if( *name == '$' ) { + name++; + if ( strcmp( "explain" , name ) == 0 ) + _explain = e.trueValue(); + else if ( strcmp( "snapshot" , name ) == 0 ) + _snapshot = e.trueValue(); + else if ( strcmp( "min" , name ) == 0 ) + _min = e.embeddedObject(); + else if ( strcmp( "max" , name ) == 0 ) + _max = e.embeddedObject(); + else if ( strcmp( "hint" , name ) == 0 ) + _hint = e; + else if ( strcmp( "returnKey" , name ) == 0 ) + _returnKey = e.trueValue(); + else if ( strcmp( "maxScan" , name ) == 0 ) + _maxScan = e.numberInt(); + else if ( strcmp( "showDiskLoc" , name ) == 0 ) + _showDiskLoc = e.trueValue(); + else if ( strcmp( "comment" , name ) == 0 ) { + ; // no-op + } + } + } + + if ( _snapshot ) { + uassert( 12001 , "E12001 can't sort with $snapshot", _order.isEmpty() ); + uassert( 12002 , "E12002 can't use hint with $snapshot", _hint.eoo() ); + } + + } + + void initFields( const BSONObj& fields ) { + if ( fields.isEmpty() ) + return; + _fields.reset( new Projection() ); + _fields->init( fields ); + } + + const char * const _ns; + const int _ntoskip; + int _ntoreturn; + BSONObj _filter; + BSONObj _order; + const int _options; + shared_ptr< Projection > _fields; + bool _wantMore; + bool _explain; + bool _snapshot; + bool _returnKey; + bool _showDiskLoc; + BSONObj _min; + BSONObj _max; + BSONElement _hint; + int _maxScan; + }; + + +} // namespace mongo + + diff --git a/db/ops/update.cpp b/db/ops/update.cpp new file mode 100644 index 0000000..fd9798a --- /dev/null +++ b/db/ops/update.cpp @@ -0,0 +1,1369 @@ +// update.cpp + +/** + * Copyright (C) 2008 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "pch.h" +#include "query.h" +#include "../pdfile.h" +#include "../jsobjmanipulator.h" +#include "../queryoptimizer.h" +#include "../repl.h" +#include "../btree.h" +#include "../../util/stringutils.h" +#include "update.h" + +//#define DEBUGUPDATE(x) cout << x << endl; +#define DEBUGUPDATE(x) + +namespace mongo { + + const char* Mod::modNames[] = { "$inc", "$set", "$push", "$pushAll", "$pull", "$pullAll" , "$pop", "$unset" , + "$bitand" , "$bitor" , "$bit" , "$addToSet", "$rename", "$rename" + }; + unsigned Mod::modNamesNum = sizeof(Mod::modNames)/sizeof(char*); + + bool Mod::_pullElementMatch( BSONElement& toMatch ) const { + + if ( elt.type() != Object ) { + // if elt isn't an object, then comparison will work + return toMatch.woCompare( elt , false ) == 0; + } + + if ( matcherOnPrimitive ) + return matcher->matches( toMatch.wrap( "" ) ); + + if ( toMatch.type() != Object ) { + // looking for an object, so this can't match + return false; + } + + // now we have an object on both sides + return matcher->matches( toMatch.embeddedObject() ); + } + + template< class Builder > + void Mod::appendIncremented( Builder& bb , const BSONElement& in, ModState& ms ) const { + BSONType a = in.type(); + BSONType b = elt.type(); + + if ( a == NumberDouble || b == NumberDouble ) { + ms.incType = NumberDouble; + ms.incdouble = elt.numberDouble() + in.numberDouble(); + } + else if ( a == NumberLong || b == NumberLong ) { + ms.incType = NumberLong; + ms.inclong = elt.numberLong() + in.numberLong(); + } + else { + int x = elt.numberInt() + in.numberInt(); + if ( x < 0 && elt.numberInt() > 0 && in.numberInt() > 0 ) { + // overflow + ms.incType = NumberLong; + ms.inclong = elt.numberLong() + in.numberLong(); + } + else { + ms.incType = NumberInt; + ms.incint = elt.numberInt() + in.numberInt(); + } + } + + ms.appendIncValue( bb , false ); + } + + template< class Builder > + void appendUnset( Builder &b ) { + } + + template<> + void appendUnset( BSONArrayBuilder &b ) { + b.appendNull(); + } + + template< class Builder > + void Mod::apply( Builder& b , BSONElement in , ModState& ms ) const { + if ( ms.dontApply ) { + return; + } + + switch ( op ) { + + case INC: { + appendIncremented( b , in , ms ); + break; + } + + case SET: { + _checkForAppending( elt ); + b.appendAs( elt , shortFieldName ); + break; + } + + case UNSET: { + appendUnset( b ); + break; + } + + case PUSH: { + uassert( 10131 , "$push can only be applied to an array" , in.type() == Array ); + BSONObjBuilder bb( b.subarrayStart( shortFieldName ) ); + BSONObjIterator i( in.embeddedObject() ); + int n=0; + while ( i.more() ) { + bb.append( i.next() ); + n++; + } + + ms.pushStartSize = n; + + bb.appendAs( elt , bb.numStr( n ) ); + bb.done(); + break; + } + + case ADDTOSET: { + uassert( 12592 , "$addToSet can only be applied to an array" , in.type() == Array ); + BSONObjBuilder bb( b.subarrayStart( shortFieldName ) ); + + BSONObjIterator i( in.embeddedObject() ); + int n=0; + + if ( isEach() ) { + + BSONElementSet toadd; + parseEach( toadd ); + + while ( i.more() ) { + BSONElement cur = i.next(); + bb.append( cur ); + n++; + toadd.erase( cur ); + } + + { + BSONObjIterator i( getEach() ); + while ( i.more() ) { + BSONElement e = i.next(); + if ( toadd.count(e) ) { + bb.appendAs( e , BSONObjBuilder::numStr( n++ ) ); + toadd.erase( e ); + } + } + } + + } + else { + + bool found = false; + + while ( i.more() ) { + BSONElement cur = i.next(); + bb.append( cur ); + n++; + if ( elt.woCompare( cur , false ) == 0 ) + found = true; + } + + if ( ! found ) + bb.appendAs( elt , bb.numStr( n ) ); + + } + + bb.done(); + break; + } + + + + case PUSH_ALL: { + uassert( 10132 , "$pushAll can only be applied to an array" , in.type() == Array ); + uassert( 10133 , "$pushAll has to be passed an array" , elt.type() ); + + BSONObjBuilder bb( b.subarrayStart( shortFieldName ) ); + + BSONObjIterator i( in.embeddedObject() ); + int n=0; + while ( i.more() ) { + bb.append( i.next() ); + n++; + } + + ms.pushStartSize = n; + + i = BSONObjIterator( elt.embeddedObject() ); + while ( i.more() ) { + bb.appendAs( i.next() , bb.numStr( n++ ) ); + } + + bb.done(); + break; + } + + case PULL: + case PULL_ALL: { + uassert( 10134 , "$pull/$pullAll can only be applied to an array" , in.type() == Array ); + BSONObjBuilder bb( b.subarrayStart( shortFieldName ) ); + + int n = 0; + + BSONObjIterator i( in.embeddedObject() ); + while ( i.more() ) { + BSONElement e = i.next(); + bool allowed = true; + + if ( op == PULL ) { + allowed = ! _pullElementMatch( e ); + } + else { + BSONObjIterator j( elt.embeddedObject() ); + while( j.more() ) { + BSONElement arrJ = j.next(); + if ( e.woCompare( arrJ, false ) == 0 ) { + allowed = false; + break; + } + } + } + + if ( allowed ) + bb.appendAs( e , bb.numStr( n++ ) ); + } + + bb.done(); + break; + } + + case POP: { + uassert( 10135 , "$pop can only be applied to an array" , in.type() == Array ); + BSONObjBuilder bb( b.subarrayStart( shortFieldName ) ); + + int n = 0; + + BSONObjIterator i( in.embeddedObject() ); + if ( elt.isNumber() && elt.number() < 0 ) { + // pop from front + if ( i.more() ) { + i.next(); + n++; + } + + while( i.more() ) { + bb.appendAs( i.next() , bb.numStr( n - 1 ) ); + n++; + } + } + else { + // pop from back + while( i.more() ) { + n++; + BSONElement arrI = i.next(); + if ( i.more() ) { + bb.append( arrI ); + } + } + } + + ms.pushStartSize = n; + assert( ms.pushStartSize == in.embeddedObject().nFields() ); + bb.done(); + break; + } + + case BIT: { + uassert( 10136 , "$bit needs an array" , elt.type() == Object ); + uassert( 10137 , "$bit can only be applied to numbers" , in.isNumber() ); + uassert( 10138 , "$bit cannot update a value of type double" , in.type() != NumberDouble ); + + int x = in.numberInt(); + long long y = in.numberLong(); + + BSONObjIterator it( elt.embeddedObject() ); + while ( it.more() ) { + BSONElement e = it.next(); + uassert( 10139 , "$bit field must be number" , e.isNumber() ); + if ( str::equals(e.fieldName(), "and") ) { + switch( in.type() ) { + case NumberInt: x = x&e.numberInt(); break; + case NumberLong: y = y&e.numberLong(); break; + default: assert( 0 ); + } + } + else if ( str::equals(e.fieldName(), "or") ) { + switch( in.type() ) { + case NumberInt: x = x|e.numberInt(); break; + case NumberLong: y = y|e.numberLong(); break; + default: assert( 0 ); + } + } + else { + uasserted(9016, str::stream() << "unknown $bit operation: " << e.fieldName()); + } + } + + switch( in.type() ) { + case NumberInt: b.append( shortFieldName , x ); break; + case NumberLong: b.append( shortFieldName , y ); break; + default: assert( 0 ); + } + + break; + } + + case RENAME_FROM: { + break; + } + + case RENAME_TO: { + ms.handleRename( b, shortFieldName ); + break; + } + + default: + stringstream ss; + ss << "Mod::apply can't handle type: " << op; + throw UserException( 9017, ss.str() ); + } + } + + // -1 inside a non-object (non-object could be array) + // 0 missing + // 1 found + int validRenamePath( BSONObj obj, const char *path ) { + while( const char *p = strchr( path, '.' ) ) { + string left( path, p - path ); + BSONElement e = obj.getField( left ); + if ( e.eoo() ) { + return 0; + } + if ( e.type() != Object ) { + return -1; + } + obj = e.embeddedObject(); + path = p + 1; + } + return !obj.getField( path ).eoo(); + } + + auto_ptr<ModSetState> ModSet::prepare(const BSONObj &obj) const { + DEBUGUPDATE( "\t start prepare" ); + auto_ptr<ModSetState> mss( new ModSetState( obj ) ); + + + // Perform this check first, so that we don't leave a partially modified object on uassert. + for ( ModHolder::const_iterator i = _mods.begin(); i != _mods.end(); ++i ) { + DEBUGUPDATE( "\t\t prepare : " << i->first ); + ModState& ms = mss->_mods[i->first]; + + const Mod& m = i->second; + BSONElement e = obj.getFieldDotted(m.fieldName); + + ms.m = &m; + ms.old = e; + + if ( m.op == Mod::RENAME_FROM ) { + int source = validRenamePath( obj, m.fieldName ); + uassert( 13489, "$rename source field invalid", source != -1 ); + if ( source != 1 ) { + ms.dontApply = true; + } + continue; + } + + if ( m.op == Mod::RENAME_TO ) { + int source = validRenamePath( obj, m.renameFrom() ); + if ( source == 1 ) { + int target = validRenamePath( obj, m.fieldName ); + uassert( 13490, "$rename target field invalid", target != -1 ); + ms.newVal = obj.getFieldDotted( m.renameFrom() ); + mss->amIInPlacePossible( false ); + } + else { + ms.dontApply = true; + } + continue; + } + + if ( e.eoo() ) { + mss->amIInPlacePossible( m.op == Mod::UNSET ); + continue; + } + + switch( m.op ) { + case Mod::INC: + uassert( 10140 , "Cannot apply $inc modifier to non-number", e.isNumber() || e.eoo() ); + if ( mss->amIInPlacePossible( e.isNumber() ) ) { + // check more typing info here + if ( m.elt.type() != e.type() ) { + // if i'm incrementing with a double, then the storage has to be a double + mss->amIInPlacePossible( m.elt.type() != NumberDouble ); + } + + // check for overflow + if ( e.type() == NumberInt && e.numberLong() + m.elt.numberLong() > numeric_limits<int>::max() ) { + mss->amIInPlacePossible( false ); + } + } + break; + + case Mod::SET: + mss->amIInPlacePossible( m.elt.type() == e.type() && + m.elt.valuesize() == e.valuesize() ); + break; + + case Mod::PUSH: + case Mod::PUSH_ALL: + uassert( 10141 , "Cannot apply $push/$pushAll modifier to non-array", e.type() == Array || e.eoo() ); + mss->amIInPlacePossible( false ); + break; + + case Mod::PULL: + case Mod::PULL_ALL: { + uassert( 10142 , "Cannot apply $pull/$pullAll modifier to non-array", e.type() == Array || e.eoo() ); + BSONObjIterator i( e.embeddedObject() ); + while( mss->_inPlacePossible && i.more() ) { + BSONElement arrI = i.next(); + if ( m.op == Mod::PULL ) { + mss->amIInPlacePossible( ! m._pullElementMatch( arrI ) ); + } + else if ( m.op == Mod::PULL_ALL ) { + BSONObjIterator j( m.elt.embeddedObject() ); + while( mss->_inPlacePossible && j.moreWithEOO() ) { + BSONElement arrJ = j.next(); + if ( arrJ.eoo() ) + break; + mss->amIInPlacePossible( arrI.woCompare( arrJ, false ) ); + } + } + } + break; + } + + case Mod::POP: { + uassert( 10143 , "Cannot apply $pop modifier to non-array", e.type() == Array || e.eoo() ); + mss->amIInPlacePossible( e.embeddedObject().isEmpty() ); + break; + } + + case Mod::ADDTOSET: { + uassert( 12591 , "Cannot apply $addToSet modifier to non-array", e.type() == Array || e.eoo() ); + + BSONObjIterator i( e.embeddedObject() ); + if ( m.isEach() ) { + BSONElementSet toadd; + m.parseEach( toadd ); + while( i.more() ) { + BSONElement arrI = i.next(); + toadd.erase( arrI ); + } + mss->amIInPlacePossible( toadd.size() == 0 ); + } + else { + bool found = false; + while( i.more() ) { + BSONElement arrI = i.next(); + if ( arrI.woCompare( m.elt , false ) == 0 ) { + found = true; + break; + } + } + mss->amIInPlacePossible( found ); + } + break; + } + + default: + // mods we don't know about shouldn't be done in place + mss->amIInPlacePossible( false ); + } + } + + DEBUGUPDATE( "\t mss\n" << mss->toString() << "\t--" ); + + return mss; + } + + void ModState::appendForOpLog( BSONObjBuilder& b ) const { + if ( dontApply ) { + return; + } + + if ( incType ) { + DEBUGUPDATE( "\t\t\t\t\t appendForOpLog inc fieldname: " << m->fieldName << " short:" << m->shortFieldName ); + BSONObjBuilder bb( b.subobjStart( "$set" ) ); + appendIncValue( bb , true ); + bb.done(); + return; + } + + if ( m->op == Mod::RENAME_FROM ) { + DEBUGUPDATE( "\t\t\t\t\t appendForOpLog RENAME_FROM fieldName:" << m->fieldName ); + BSONObjBuilder bb( b.subobjStart( "$unset" ) ); + bb.append( m->fieldName, 1 ); + bb.done(); + return; + } + + if ( m->op == Mod::RENAME_TO ) { + DEBUGUPDATE( "\t\t\t\t\t appendForOpLog RENAME_TO fieldName:" << m->fieldName ); + BSONObjBuilder bb( b.subobjStart( "$set" ) ); + bb.appendAs( newVal, m->fieldName ); + return; + } + + const char * name = fixedOpName ? fixedOpName : Mod::modNames[op()]; + + DEBUGUPDATE( "\t\t\t\t\t appendForOpLog name:" << name << " fixed: " << fixed << " fn: " << m->fieldName ); + + BSONObjBuilder bb( b.subobjStart( name ) ); + if ( fixed ) { + bb.appendAs( *fixed , m->fieldName ); + } + else { + bb.appendAs( m->elt , m->fieldName ); + } + bb.done(); + } + + string ModState::toString() const { + stringstream ss; + if ( fixedOpName ) + ss << " fixedOpName: " << fixedOpName; + if ( fixed ) + ss << " fixed: " << fixed; + return ss.str(); + } + + template< class Builder > + void ModState::handleRename( Builder &newObjBuilder, const char *shortFieldName ) { + newObjBuilder.appendAs( newVal , shortFieldName ); + BSONObjBuilder b; + b.appendAs( newVal, shortFieldName ); + assert( _objData.isEmpty() ); + _objData = b.obj(); + newVal = _objData.firstElement(); + } + + void ModSetState::applyModsInPlace( bool isOnDisk ) { + // TODO i think this assert means that we can get rid of the isOnDisk param + // and just use isOwned as the determination + DEV assert( isOnDisk == ! _obj.isOwned() ); + + for ( ModStateHolder::iterator i = _mods.begin(); i != _mods.end(); ++i ) { + ModState& m = i->second; + + if ( m.dontApply ) { + continue; + } + + switch ( m.m->op ) { + case Mod::UNSET: + case Mod::ADDTOSET: + case Mod::RENAME_FROM: + case Mod::RENAME_TO: + // this should have been handled by prepare + break; + case Mod::PULL: + case Mod::PULL_ALL: + // this should have been handled by prepare + break; + case Mod::POP: + assert( m.old.eoo() || ( m.old.isABSONObj() && m.old.Obj().isEmpty() ) ); + break; + // [dm] the BSONElementManipulator statements below are for replication (correct?) + case Mod::INC: + if ( isOnDisk ) + m.m->IncrementMe( m.old ); + else + m.m->incrementMe( m.old ); + m.fixedOpName = "$set"; + m.fixed = &(m.old); + break; + case Mod::SET: + if ( isOnDisk ) + BSONElementManipulator( m.old ).ReplaceTypeAndValue( m.m->elt ); + else + BSONElementManipulator( m.old ).replaceTypeAndValue( m.m->elt ); + break; + default: + uassert( 13478 , "can't apply mod in place - shouldn't have gotten here" , 0 ); + } + } + } + + void ModSet::extractFields( map< string, BSONElement > &fields, const BSONElement &top, const string &base ) { + if ( top.type() != Object ) { + fields[ base + top.fieldName() ] = top; + return; + } + BSONObjIterator i( top.embeddedObject() ); + bool empty = true; + while( i.moreWithEOO() ) { + BSONElement e = i.next(); + if ( e.eoo() ) + break; + extractFields( fields, e, base + top.fieldName() + "." ); + empty = false; + } + if ( empty ) + fields[ base + top.fieldName() ] = top; + } + + template< class Builder > + void ModSetState::_appendNewFromMods( const string& root , ModState& m , Builder& b , set<string>& onedownseen ) { + const char * temp = m.fieldName(); + temp += root.size(); + const char * dot = strchr( temp , '.' ); + if ( dot ) { + string nr( m.fieldName() , 0 , 1 + ( dot - m.fieldName() ) ); + string nf( temp , 0 , dot - temp ); + if ( onedownseen.count( nf ) ) + return; + onedownseen.insert( nf ); + BSONObjBuilder bb ( b.subobjStart( nf ) ); + createNewFromMods( nr , bb , BSONObj() ); // don't infer an array from name + bb.done(); + } + else { + appendNewFromMod( m , b ); + } + + } + + template< class Builder > + void ModSetState::createNewFromMods( const string& root , Builder& b , const BSONObj &obj ) { + DEBUGUPDATE( "\t\t createNewFromMods root: " << root ); + BSONObjIteratorSorted es( obj ); + BSONElement e = es.next(); + + ModStateHolder::iterator m = _mods.lower_bound( root ); + StringBuilder buf(root.size() + 2 ); + buf << root << (char)255; + ModStateHolder::iterator mend = _mods.lower_bound( buf.str() ); + + set<string> onedownseen; + + while ( e.type() && m != mend ) { + string field = root + e.fieldName(); + FieldCompareResult cmp = compareDottedFieldNames( m->second.m->fieldName , field ); + + DEBUGUPDATE( "\t\t\t field:" << field << "\t mod:" << m->second.m->fieldName << "\t cmp:" << cmp << "\t short: " << e.fieldName() ); + + switch ( cmp ) { + + case LEFT_SUBFIELD: { // Mod is embedded under this element + uassert( 10145 , str::stream() << "LEFT_SUBFIELD only supports Object: " << field << " not: " << e.type() , e.type() == Object || e.type() == Array ); + if ( onedownseen.count( e.fieldName() ) == 0 ) { + onedownseen.insert( e.fieldName() ); + if ( e.type() == Object ) { + BSONObjBuilder bb( b.subobjStart( e.fieldName() ) ); + stringstream nr; nr << root << e.fieldName() << "."; + createNewFromMods( nr.str() , bb , e.embeddedObject() ); + bb.done(); + } + else { + BSONArrayBuilder ba( b.subarrayStart( e.fieldName() ) ); + stringstream nr; nr << root << e.fieldName() << "."; + createNewFromMods( nr.str() , ba , e.embeddedObject() ); + ba.done(); + } + // inc both as we handled both + e = es.next(); + m++; + } + else { + // this is a very weird case + // have seen it in production, but can't reproduce + // this assert prevents an inf. loop + // but likely isn't the correct solution + assert(0); + } + continue; + } + case LEFT_BEFORE: // Mod on a field that doesn't exist + DEBUGUPDATE( "\t\t\t\t creating new field for: " << m->second.m->fieldName ); + _appendNewFromMods( root , m->second , b , onedownseen ); + m++; + continue; + case SAME: + DEBUGUPDATE( "\t\t\t\t applying mod on: " << m->second.m->fieldName ); + m->second.apply( b , e ); + e = es.next(); + m++; + continue; + case RIGHT_BEFORE: // field that doesn't have a MOD + DEBUGUPDATE( "\t\t\t\t just copying" ); + b.append( e ); // if array, ignore field name + e = es.next(); + continue; + case RIGHT_SUBFIELD: + massert( 10399 , "ModSet::createNewFromMods - RIGHT_SUBFIELD should be impossible" , 0 ); + break; + default: + massert( 10400 , "unhandled case" , 0 ); + } + } + + // finished looping the mods, just adding the rest of the elements + while ( e.type() ) { + DEBUGUPDATE( "\t\t\t copying: " << e.fieldName() ); + b.append( e ); // if array, ignore field name + e = es.next(); + } + + // do mods that don't have fields already + for ( ; m != mend; m++ ) { + DEBUGUPDATE( "\t\t\t\t appending from mod at end: " << m->second.m->fieldName ); + _appendNewFromMods( root , m->second , b , onedownseen ); + } + } + + BSONObj ModSetState::createNewFromMods() { + BSONObjBuilder b( (int)(_obj.objsize() * 1.1) ); + createNewFromMods( "" , b , _obj ); + return _newFromMods = b.obj(); + } + + string ModSetState::toString() const { + stringstream ss; + for ( ModStateHolder::const_iterator i=_mods.begin(); i!=_mods.end(); ++i ) { + ss << "\t\t" << i->first << "\t" << i->second.toString() << "\n"; + } + return ss.str(); + } + + bool ModSetState::FieldCmp::operator()( const string &l, const string &r ) const { + return lexNumCmp( l.c_str(), r.c_str() ) < 0; + } + + BSONObj ModSet::createNewFromQuery( const BSONObj& query ) { + BSONObj newObj; + + { + BSONObjBuilder bb; + EmbeddedBuilder eb( &bb ); + BSONObjIteratorSorted i( query ); + while ( i.more() ) { + BSONElement e = i.next(); + if ( e.fieldName()[0] == '$' ) // for $atomic and anything else we add + continue; + + if ( e.type() == Object && e.embeddedObject().firstElementFieldName()[0] == '$' ) { + // this means this is a $gt type filter, so don't make part of the new object + continue; + } + + eb.appendAs( e , e.fieldName() ); + } + eb.done(); + newObj = bb.obj(); + } + + auto_ptr<ModSetState> mss = prepare( newObj ); + + if ( mss->canApplyInPlace() ) + mss->applyModsInPlace( false ); + else + newObj = mss->createNewFromMods(); + + return newObj; + } + + /* get special operations like $inc + { $inc: { a:1, b:1 } } + { $set: { a:77 } } + { $push: { a:55 } } + { $pushAll: { a:[77,88] } } + { $pull: { a:66 } } + { $pullAll : { a:[99,1010] } } + NOTE: MODIFIES source from object! + */ + ModSet::ModSet( + const BSONObj &from , + const set<string>& idxKeys, + const set<string> *backgroundKeys) + : _isIndexed(0) , _hasDynamicArray( false ) { + + BSONObjIterator it(from); + + while ( it.more() ) { + BSONElement e = it.next(); + const char *fn = e.fieldName(); + + uassert( 10147 , "Invalid modifier specified: " + string( fn ), e.type() == Object ); + BSONObj j = e.embeddedObject(); + DEBUGUPDATE( "\t" << j ); + + BSONObjIterator jt(j); + Mod::Op op = opFromStr( fn ); + + while ( jt.more() ) { + BSONElement f = jt.next(); // x:44 + + const char * fieldName = f.fieldName(); + + uassert( 10148 , "Mod on _id not allowed", strcmp( fieldName, "_id" ) != 0 ); + uassert( 10149 , "Invalid mod field name, may not end in a period", fieldName[ strlen( fieldName ) - 1 ] != '.' ); + uassert( 10150 , "Field name duplication not allowed with modifiers", ! haveModForField( fieldName ) ); + uassert( 10151 , "have conflicting mods in update" , ! haveConflictingMod( fieldName ) ); + uassert( 10152 , "Modifier $inc allowed for numbers only", f.isNumber() || op != Mod::INC ); + uassert( 10153 , "Modifier $pushAll/pullAll allowed for arrays only", f.type() == Array || ( op != Mod::PUSH_ALL && op != Mod::PULL_ALL ) ); + + if ( op == Mod::RENAME_TO ) { + uassert( 13494, "$rename target must be a string", f.type() == String ); + const char *target = f.valuestr(); + uassert( 13495, "$rename source must differ from target", strcmp( fieldName, target ) != 0 ); + uassert( 13496, "invalid mod field name, source may not be empty", fieldName[0] ); + uassert( 13479, "invalid mod field name, target may not be empty", target[0] ); + uassert( 13480, "invalid mod field name, source may not begin or end in period", fieldName[0] != '.' && fieldName[ strlen( fieldName ) - 1 ] != '.' ); + uassert( 13481, "invalid mod field name, target may not begin or end in period", target[0] != '.' && target[ strlen( target ) - 1 ] != '.' ); + uassert( 13482, "$rename affecting _id not allowed", !( fieldName[0] == '_' && fieldName[1] == 'i' && fieldName[2] == 'd' && ( !fieldName[3] || fieldName[3] == '.' ) ) ); + uassert( 13483, "$rename affecting _id not allowed", !( target[0] == '_' && target[1] == 'i' && target[2] == 'd' && ( !target[3] || target[3] == '.' ) ) ); + uassert( 13484, "field name duplication not allowed with $rename target", !haveModForField( target ) ); + uassert( 13485, "conflicting mods not allowed with $rename target", !haveConflictingMod( target ) ); + uassert( 13486, "$rename target may not be a parent of source", !( strncmp( fieldName, target, strlen( target ) ) == 0 && fieldName[ strlen( target ) ] == '.' ) ); + uassert( 13487, "$rename source may not be dynamic array", strstr( fieldName , ".$" ) == 0 ); + uassert( 13488, "$rename target may not be dynamic array", strstr( target , ".$" ) == 0 ); + + Mod from; + from.init( Mod::RENAME_FROM, f ); + from.setFieldName( fieldName ); + updateIsIndexed( from, idxKeys, backgroundKeys ); + _mods[ from.fieldName ] = from; + + Mod to; + to.init( Mod::RENAME_TO, f ); + to.setFieldName( target ); + updateIsIndexed( to, idxKeys, backgroundKeys ); + _mods[ to.fieldName ] = to; + + DEBUGUPDATE( "\t\t " << fieldName << "\t" << from.fieldName << "\t" << to.fieldName ); + continue; + } + + _hasDynamicArray = _hasDynamicArray || strstr( fieldName , ".$" ) > 0; + + Mod m; + m.init( op , f ); + m.setFieldName( f.fieldName() ); + updateIsIndexed( m, idxKeys, backgroundKeys ); + _mods[m.fieldName] = m; + + DEBUGUPDATE( "\t\t " << fieldName << "\t" << m.fieldName << "\t" << _hasDynamicArray ); + } + } + + } + + ModSet * ModSet::fixDynamicArray( const char * elemMatchKey ) const { + ModSet * n = new ModSet(); + n->_isIndexed = _isIndexed; + n->_hasDynamicArray = _hasDynamicArray; + for ( ModHolder::const_iterator i=_mods.begin(); i!=_mods.end(); i++ ) { + string s = i->first; + size_t idx = s.find( ".$" ); + if ( idx == string::npos ) { + n->_mods[s] = i->second; + continue; + } + StringBuilder buf(s.size()+strlen(elemMatchKey)); + buf << s.substr(0,idx+1) << elemMatchKey << s.substr(idx+2); + string fixed = buf.str(); + DEBUGUPDATE( "fixed dynamic: " << s << " -->> " << fixed ); + n->_mods[fixed] = i->second; + ModHolder::iterator temp = n->_mods.find( fixed ); + temp->second.setFieldName( temp->first.c_str() ); + } + return n; + } + + void checkNoMods( BSONObj o ) { + BSONObjIterator i( o ); + while( i.moreWithEOO() ) { + BSONElement e = i.next(); + if ( e.eoo() ) + break; + uassert( 10154 , "Modifiers and non-modifiers cannot be mixed", e.fieldName()[ 0 ] != '$' ); + } + } + + class UpdateOp : public MultiCursor::CursorOp { + public: + UpdateOp( bool hasPositionalField, int orClauseIndex = -1 ) : + _nscanned(), + _hasPositionalField( hasPositionalField ), + _orClauseIndex( orClauseIndex ) {} + virtual void _init() { + _c = qp().newCursor(); + if ( ! _c->ok() ) { + setComplete(); + } + } + virtual bool prepareToYield() { + if ( _orClauseIndex > 0 ) { + return false; + } + if ( ! _cc ) { + _cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , _c , qp().ns() ) ); + } + return _cc->prepareToYield( _yieldData ); + } + virtual void recoverFromYield() { + if ( !ClientCursor::recoverFromYield( _yieldData ) ) { + _c.reset(); + _cc.reset(); + massert( 13339, "cursor dropped during update", false ); + } + } + virtual long long nscanned() { + return _c.get() ? _c->nscanned() : _nscanned; + } + virtual void next() { + if ( ! _c->ok() ) { + setComplete(); + return; + } + _nscanned = _c->nscanned(); + if ( _orClauseIndex > 0 && _nscanned >= 100 ) { + setComplete(); + return; + } + if ( matcher( _c )->matchesCurrent(_c.get(), &_details ) ) { + setComplete(); + return; + } + _c->advance(); + } + + virtual bool mayRecordPlan() const { return false; } + virtual QueryOp *_createChild() const { + return new UpdateOp( _hasPositionalField, _orClauseIndex + 1 ); + } + // already scanned to the first match, so return _c + virtual shared_ptr< Cursor > newCursor() const { return _c; } + virtual bool alwaysUseRecord() const { return _hasPositionalField; } + private: + shared_ptr< Cursor > _c; + long long _nscanned; + bool _hasPositionalField; + MatchDetails _details; + ClientCursor::CleanupPointer _cc; + ClientCursor::YieldData _yieldData; + // Avoid yielding in the MultiPlanScanner when not the first $or clause - just a temporary implementaiton for now. SERVER-3555 + int _orClauseIndex; + }; + + static void checkTooLarge(const BSONObj& newObj) { + uassert( 12522 , "$ operator made object too large" , newObj.objsize() <= BSONObjMaxUserSize ); + } + + /* note: this is only (as-is) called for + + - not multi + - not mods is indexed + - not upsert + */ + static UpdateResult _updateById(bool isOperatorUpdate, int idIdxNo, ModSet *mods, int profile, NamespaceDetails *d, + NamespaceDetailsTransient *nsdt, + bool god, const char *ns, + const BSONObj& updateobj, BSONObj patternOrig, bool logop, OpDebug& debug) { + + DiskLoc loc; + { + IndexDetails& i = d->idx(idIdxNo); + BSONObj key = i.getKeyFromQuery( patternOrig ); + loc = i.idxInterface().findSingle(i, i.head, key); + if( loc.isNull() ) { + // no upsert support in _updateById yet, so we are done. + return UpdateResult(0, 0, 0); + } + } + Record *r = loc.rec(); + + if ( ! r->likelyInPhysicalMemory() ) { + { + auto_ptr<RWLockRecursive::Shared> lk( new RWLockRecursive::Shared( MongoFile::mmmutex) ); + dbtempreleasewritelock t; + r->touch(); + lk.reset(0); // we have to release mmmutex before we can re-acquire dbmutex + } + + { + // we need to re-find in case something changed + d = nsdetails( ns ); + if ( ! d ) { + // dropped + return UpdateResult(0, 0, 0); + } + nsdt = &NamespaceDetailsTransient::get_w(ns); + IndexDetails& i = d->idx(idIdxNo); + BSONObj key = i.getKeyFromQuery( patternOrig ); + loc = i.idxInterface().findSingle(i, i.head, key); + if( loc.isNull() ) { + // no upsert support in _updateById yet, so we are done. + return UpdateResult(0, 0, 0); + } + + r = loc.rec(); + } + } + + /* look for $inc etc. note as listed here, all fields to inc must be this type, you can't set some + regular ones at the moment. */ + if ( isOperatorUpdate ) { + const BSONObj& onDisk = loc.obj(); + auto_ptr<ModSetState> mss = mods->prepare( onDisk ); + + if( mss->canApplyInPlace() ) { + mss->applyModsInPlace(true); + DEBUGUPDATE( "\t\t\t updateById doing in place update" ); + } + else { + BSONObj newObj = mss->createNewFromMods(); + checkTooLarge(newObj); + assert(nsdt); + theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , newObj.objdata(), newObj.objsize(), debug); + } + + if ( logop ) { + DEV assert( mods->size() ); + + BSONObj pattern = patternOrig; + if ( mss->haveArrayDepMod() ) { + BSONObjBuilder patternBuilder; + patternBuilder.appendElements( pattern ); + mss->appendSizeSpecForArrayDepMods( patternBuilder ); + pattern = patternBuilder.obj(); + } + + if( mss->needOpLogRewrite() ) { + DEBUGUPDATE( "\t rewrite update: " << mss->getOpLogRewrite() ); + logOp("u", ns, mss->getOpLogRewrite() , &pattern ); + } + else { + logOp("u", ns, updateobj, &pattern ); + } + } + return UpdateResult( 1 , 1 , 1); + } // end $operator update + + // regular update + BSONElementManipulator::lookForTimestamps( updateobj ); + checkNoMods( updateobj ); + assert(nsdt); + theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , updateobj.objdata(), updateobj.objsize(), debug ); + if ( logop ) { + logOp("u", ns, updateobj, &patternOrig ); + } + return UpdateResult( 1 , 0 , 1 ); + } + + UpdateResult _updateObjects(bool god, const char *ns, const BSONObj& updateobj, BSONObj patternOrig, bool upsert, bool multi, bool logop , OpDebug& debug, RemoveSaver* rs ) { + DEBUGUPDATE( "update: " << ns << " update: " << updateobj << " query: " << patternOrig << " upsert: " << upsert << " multi: " << multi ); + Client& client = cc(); + int profile = client.database()->profile; + + debug.updateobj = updateobj; + + // idea with these here it to make them loop invariant for multi updates, and thus be a bit faster for that case + // The pointers may be left invalid on a failed or terminal yield recovery. + NamespaceDetails *d = nsdetails(ns); // can be null if an upsert... + NamespaceDetailsTransient *nsdt = &NamespaceDetailsTransient::get_w(ns); + + auto_ptr<ModSet> mods; + bool isOperatorUpdate = updateobj.firstElementFieldName()[0] == '$'; + int modsIsIndexed = false; // really the # of indexes + if ( isOperatorUpdate ) { + if( d && d->indexBuildInProgress ) { + set<string> bgKeys; + d->inProgIdx().keyPattern().getFieldNames(bgKeys); + mods.reset( new ModSet(updateobj, nsdt->indexKeys(), &bgKeys) ); + } + else { + mods.reset( new ModSet(updateobj, nsdt->indexKeys()) ); + } + modsIsIndexed = mods->isIndexed(); + } + + if( !multi && isSimpleIdQuery(patternOrig) && d && !modsIsIndexed ) { + int idxNo = d->findIdIndex(); + if( idxNo >= 0 ) { + debug.idhack = true; + UpdateResult result = _updateById(isOperatorUpdate, idxNo, mods.get(), profile, d, nsdt, god, ns, updateobj, patternOrig, logop, debug); + if ( result.existing || ! upsert ) { + return result; + } + else if ( upsert && ! isOperatorUpdate && ! logop) { + // this handles repl inserts + checkNoMods( updateobj ); + debug.upsert = true; + BSONObj no = updateobj; + theDataFileMgr.insertWithObjMod(ns, no, god); + return UpdateResult( 0 , 0 , 1 , no ); + } + } + } + + int numModded = 0; + long long nscanned = 0; + shared_ptr< MultiCursor::CursorOp > opPtr( new UpdateOp( mods.get() && mods->hasDynamicArray() ) ); + shared_ptr< MultiCursor > c( new MultiCursor( ns, patternOrig, BSONObj(), opPtr, true ) ); + + d = nsdetails(ns); + nsdt = &NamespaceDetailsTransient::get_w(ns); + + if( c->ok() ) { + set<DiskLoc> seenObjects; + MatchDetails details; + auto_ptr<ClientCursor> cc; + do { + nscanned++; + + bool atomic = c->matcher()->docMatcher().atomic(); + + if ( !atomic ) { + // ***************** + if ( cc.get() == 0 ) { + shared_ptr< Cursor > cPtr = c; + cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , cPtr , ns ) ); + } + + bool didYield; + if ( ! cc->yieldSometimes( ClientCursor::WillNeed, &didYield ) ) { + cc.release(); + break; + } + if ( !c->ok() ) { + break; + } + + if ( didYield ) { + d = nsdetails(ns); + nsdt = &NamespaceDetailsTransient::get_w(ns); + } + // ***************** + } + + // May have already matched in UpdateOp, but do again to get details set correctly + if ( ! c->matcher()->matchesCurrent( c.get(), &details ) ) { + c->advance(); + + if ( nscanned % 256 == 0 && ! atomic ) { + if ( cc.get() == 0 ) { + shared_ptr< Cursor > cPtr = c; + cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , cPtr , ns ) ); + } + if ( ! cc->yield() ) { + cc.release(); + // TODO should we assert or something? + break; + } + if ( !c->ok() ) { + break; + } + d = nsdetails(ns); + nsdt = &NamespaceDetailsTransient::get_w(ns); + } + continue; + } + + Record *r = c->_current(); + DiskLoc loc = c->currLoc(); + + // TODO Maybe this is unnecessary since we have seenObjects + if ( c->getsetdup( loc ) ) { + c->advance(); + continue; + } + + BSONObj js(r); + + BSONObj pattern = patternOrig; + + if ( logop ) { + BSONObjBuilder idPattern; + BSONElement id; + // NOTE: If the matching object lacks an id, we'll log + // with the original pattern. This isn't replay-safe. + // It might make sense to suppress the log instead + // if there's no id. + if ( js.getObjectID( id ) ) { + idPattern.append( id ); + pattern = idPattern.obj(); + } + else { + uassert( 10157 , "multi-update requires all modified objects to have an _id" , ! multi ); + } + } + + if ( profile && !multi ) + debug.nscanned = (int) nscanned; + + /* look for $inc etc. note as listed here, all fields to inc must be this type, you can't set some + regular ones at the moment. */ + if ( isOperatorUpdate ) { + + if ( multi ) { + c->advance(); // go to next record in case this one moves + if ( seenObjects.count( loc ) ) + continue; + } + + const BSONObj& onDisk = loc.obj(); + + ModSet * useMods = mods.get(); + bool forceRewrite = false; + + auto_ptr<ModSet> mymodset; + if ( details._elemMatchKey && mods->hasDynamicArray() ) { + useMods = mods->fixDynamicArray( details._elemMatchKey ); + mymodset.reset( useMods ); + forceRewrite = true; + } + + auto_ptr<ModSetState> mss = useMods->prepare( onDisk ); + + bool indexHack = multi && ( modsIsIndexed || ! mss->canApplyInPlace() ); + + if ( indexHack ) { + if ( cc.get() ) + cc->updateLocation(); + else + c->noteLocation(); + } + + if ( modsIsIndexed <= 0 && mss->canApplyInPlace() ) { + mss->applyModsInPlace( true );// const_cast<BSONObj&>(onDisk) ); + + DEBUGUPDATE( "\t\t\t doing in place update" ); + if ( profile && !multi ) + debug.fastmod = true; + + if ( modsIsIndexed ) { + seenObjects.insert( loc ); + } + } + else { + if ( rs ) + rs->goingToDelete( onDisk ); + + BSONObj newObj = mss->createNewFromMods(); + checkTooLarge(newObj); + DiskLoc newLoc = theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , newObj.objdata(), newObj.objsize(), debug); + if ( newLoc != loc || modsIsIndexed ) { + // object moved, need to make sure we don' get again + seenObjects.insert( newLoc ); + } + + } + + if ( logop ) { + DEV assert( mods->size() ); + + if ( mss->haveArrayDepMod() ) { + BSONObjBuilder patternBuilder; + patternBuilder.appendElements( pattern ); + mss->appendSizeSpecForArrayDepMods( patternBuilder ); + pattern = patternBuilder.obj(); + } + + if ( forceRewrite || mss->needOpLogRewrite() ) { + DEBUGUPDATE( "\t rewrite update: " << mss->getOpLogRewrite() ); + logOp("u", ns, mss->getOpLogRewrite() , &pattern ); + } + else { + logOp("u", ns, updateobj, &pattern ); + } + } + numModded++; + if ( ! multi ) + return UpdateResult( 1 , 1 , numModded ); + if ( indexHack ) + c->checkLocation(); + + if ( nscanned % 64 == 0 && ! atomic ) { + if ( cc.get() == 0 ) { + shared_ptr< Cursor > cPtr = c; + cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , cPtr , ns ) ); + } + if ( ! cc->yield() ) { + cc.release(); + break; + } + if ( !c->ok() ) { + break; + } + d = nsdetails(ns); + nsdt = &NamespaceDetailsTransient::get_w(ns); + } + + getDur().commitIfNeeded(); + + continue; + } + + uassert( 10158 , "multi update only works with $ operators" , ! multi ); + + BSONElementManipulator::lookForTimestamps( updateobj ); + checkNoMods( updateobj ); + theDataFileMgr.updateRecord(ns, d, nsdt, r, loc , updateobj.objdata(), updateobj.objsize(), debug, god); + if ( logop ) { + DEV wassert( !god ); // god doesn't get logged, this would be bad. + logOp("u", ns, updateobj, &pattern ); + } + return UpdateResult( 1 , 0 , 1 ); + } while ( c->ok() ); + } // endif + + if ( numModded ) + return UpdateResult( 1 , 1 , numModded ); + + if ( profile ) + debug.nscanned = (int) nscanned; + + if ( upsert ) { + if ( updateobj.firstElementFieldName()[0] == '$' ) { + /* upsert of an $inc. build a default */ + BSONObj newObj = mods->createNewFromQuery( patternOrig ); + checkNoMods( newObj ); + debug.fastmodinsert = true; + theDataFileMgr.insertWithObjMod(ns, newObj, god); + if ( logop ) + logOp( "i", ns, newObj ); + + return UpdateResult( 0 , 1 , 1 , newObj ); + } + uassert( 10159 , "multi update only works with $ operators" , ! multi ); + checkNoMods( updateobj ); + debug.upsert = true; + BSONObj no = updateobj; + theDataFileMgr.insertWithObjMod(ns, no, god); + if ( logop ) + logOp( "i", ns, no ); + return UpdateResult( 0 , 0 , 1 , no ); + } + return UpdateResult( 0 , 0 , 0 ); + } + + UpdateResult updateObjects(const char *ns, const BSONObj& updateobj, BSONObj patternOrig, bool upsert, bool multi, bool logop , OpDebug& debug ) { + uassert( 10155 , "cannot update reserved $ collection", strchr(ns, '$') == 0 ); + if ( strstr(ns, ".system.") ) { + /* dm: it's very important that system.indexes is never updated as IndexDetails has pointers into it */ + uassert( 10156 , str::stream() << "cannot update system collection: " << ns << " q: " << patternOrig << " u: " << updateobj , legalClientSystemNS( ns , true ) ); + } + return _updateObjects(false, ns, updateobj, patternOrig, upsert, multi, logop, debug); + } + +} diff --git a/db/ops/update.h b/db/ops/update.h new file mode 100644 index 0000000..de5805a --- /dev/null +++ b/db/ops/update.h @@ -0,0 +1,681 @@ +// update.h + +/** + * Copyright (C) 2008 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "../../pch.h" +#include "../jsobj.h" +#include "../../util/embedded_builder.h" +#include "../matcher.h" + +namespace mongo { + + // ---------- public ------------- + + struct UpdateResult { + bool existing; // if existing objects were modified + bool mod; // was this a $ mod + long long num; // how many objects touched + OID upserted; // if something was upserted, the new _id of the object + + UpdateResult( bool e, bool m, unsigned long long n , const BSONObj& upsertedObject = BSONObj() ) + : existing(e) , mod(m), num(n) { + upserted.clear(); + + BSONElement id = upsertedObject["_id"]; + if ( ! e && n == 1 && id.type() == jstOID ) { + upserted = id.OID(); + } + } + + }; + + + class RemoveSaver; + + /* returns true if an existing object was updated, false if no existing object was found. + multi - update multiple objects - mostly useful with things like $set + god - allow access to system namespaces + */ + UpdateResult updateObjects(const char *ns, const BSONObj& updateobj, BSONObj pattern, bool upsert, bool multi , bool logop , OpDebug& debug ); + UpdateResult _updateObjects(bool god, const char *ns, const BSONObj& updateobj, BSONObj pattern, + bool upsert, bool multi , bool logop , OpDebug& debug , RemoveSaver * rs = 0 ); + + + + // ---------- private ------------- + + class ModState; + class ModSetState; + + /* Used for modifiers such as $inc, $set, $push, ... + * stores the info about a single operation + * once created should never be modified + */ + struct Mod { + // See opFromStr below + // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 + enum Op { INC, SET, PUSH, PUSH_ALL, PULL, PULL_ALL , POP, UNSET, BITAND, BITOR , BIT , ADDTOSET, RENAME_FROM, RENAME_TO } op; + + static const char* modNames[]; + static unsigned modNamesNum; + + const char *fieldName; + const char *shortFieldName; + + BSONElement elt; // x:5 note: this is the actual element from the updateobj + boost::shared_ptr<Matcher> matcher; + bool matcherOnPrimitive; + + void init( Op o , BSONElement& e ) { + op = o; + elt = e; + if ( op == PULL && e.type() == Object ) { + BSONObj t = e.embeddedObject(); + if ( t.firstElement().getGtLtOp() == 0 ) { + matcher.reset( new Matcher( t ) ); + matcherOnPrimitive = false; + } + else { + matcher.reset( new Matcher( BSON( "" << t ) ) ); + matcherOnPrimitive = true; + } + } + } + + void setFieldName( const char * s ) { + fieldName = s; + shortFieldName = strrchr( fieldName , '.' ); + if ( shortFieldName ) + shortFieldName++; + else + shortFieldName = fieldName; + } + + /** + * @param in incrememnts the actual value inside in + */ + void incrementMe( BSONElement& in ) const { + BSONElementManipulator manip( in ); + switch ( in.type() ) { + case NumberDouble: + manip.setNumber( elt.numberDouble() + in.numberDouble() ); + break; + case NumberLong: + manip.setLong( elt.numberLong() + in.numberLong() ); + break; + case NumberInt: + manip.setInt( elt.numberInt() + in.numberInt() ); + break; + default: + assert(0); + } + } + void IncrementMe( BSONElement& in ) const { + BSONElementManipulator manip( in ); + switch ( in.type() ) { + case NumberDouble: + manip.SetNumber( elt.numberDouble() + in.numberDouble() ); + break; + case NumberLong: + manip.SetLong( elt.numberLong() + in.numberLong() ); + break; + case NumberInt: + manip.SetInt( elt.numberInt() + in.numberInt() ); + break; + default: + assert(0); + } + } + + template< class Builder > + void appendIncremented( Builder& bb , const BSONElement& in, ModState& ms ) const; + + bool operator<( const Mod &other ) const { + return strcmp( fieldName, other.fieldName ) < 0; + } + + bool arrayDep() const { + switch (op) { + case PUSH: + case PUSH_ALL: + case POP: + return true; + default: + return false; + } + } + + static bool isIndexed( const string& fullName , const set<string>& idxKeys ) { + const char * fieldName = fullName.c_str(); + // check if there is an index key that is a parent of mod + for( const char *dot = strchr( fieldName, '.' ); dot; dot = strchr( dot + 1, '.' ) ) + if ( idxKeys.count( string( fieldName, dot - fieldName ) ) ) + return true; + + // check if there is an index key equal to mod + if ( idxKeys.count(fullName) ) + return true; + // check if there is an index key that is a child of mod + set< string >::const_iterator j = idxKeys.upper_bound( fullName ); + if ( j != idxKeys.end() && j->find( fullName ) == 0 && (*j)[fullName.size()] == '.' ) + return true; + + return false; + } + + bool isIndexed( const set<string>& idxKeys ) const { + string fullName = fieldName; + + if ( isIndexed( fullName , idxKeys ) ) + return true; + + if ( strstr( fieldName , "." ) ) { + // check for a.0.1 + StringBuilder buf( fullName.size() + 1 ); + for ( size_t i=0; i<fullName.size(); i++ ) { + char c = fullName[i]; + + if ( c == '$' && + i > 0 && fullName[i-1] == '.' && + i+1<fullName.size() && + fullName[i+1] == '.' ) { + i++; + continue; + } + + buf << c; + + if ( c != '.' ) + continue; + + if ( ! isdigit( fullName[i+1] ) ) + continue; + + bool possible = true; + size_t j=i+2; + for ( ; j<fullName.size(); j++ ) { + char d = fullName[j]; + if ( d == '.' ) + break; + if ( isdigit( d ) ) + continue; + possible = false; + break; + } + + if ( possible ) + i = j; + } + string x = buf.str(); + if ( isIndexed( x , idxKeys ) ) + return true; + } + + return false; + } + + template< class Builder > + void apply( Builder& b , BSONElement in , ModState& ms ) const; + + /** + * @return true iff toMatch should be removed from the array + */ + bool _pullElementMatch( BSONElement& toMatch ) const; + + void _checkForAppending( const BSONElement& e ) const { + if ( e.type() == Object ) { + // this is a tiny bit slow, but rare and important + // only when setting something TO an object, not setting something in an object + // and it checks for { $set : { x : { 'a.b' : 1 } } } + // which is feel has been common + uassert( 12527 , "not okForStorage" , e.embeddedObject().okForStorage() ); + } + } + + bool isEach() const { + if ( elt.type() != Object ) + return false; + BSONElement e = elt.embeddedObject().firstElement(); + if ( e.type() != Array ) + return false; + return strcmp( e.fieldName() , "$each" ) == 0; + } + + BSONObj getEach() const { + return elt.embeddedObjectUserCheck().firstElement().embeddedObjectUserCheck(); + } + + void parseEach( BSONElementSet& s ) const { + BSONObjIterator i(getEach()); + while ( i.more() ) { + s.insert( i.next() ); + } + } + + const char *renameFrom() const { + massert( 13492, "mod must be RENAME_TO type", op == Mod::RENAME_TO ); + return elt.fieldName(); + } + }; + + /** + * stores a set of Mods + * once created, should never be changed + */ + class ModSet : boost::noncopyable { + typedef map<string,Mod> ModHolder; + ModHolder _mods; + int _isIndexed; + bool _hasDynamicArray; + + static void extractFields( map< string, BSONElement > &fields, const BSONElement &top, const string &base ); + + FieldCompareResult compare( const ModHolder::iterator &m, map< string, BSONElement >::iterator &p, const map< string, BSONElement >::iterator &pEnd ) const { + bool mDone = ( m == _mods.end() ); + bool pDone = ( p == pEnd ); + assert( ! mDone ); + assert( ! pDone ); + if ( mDone && pDone ) + return SAME; + // If one iterator is done we want to read from the other one, so say the other one is lower. + if ( mDone ) + return RIGHT_BEFORE; + if ( pDone ) + return LEFT_BEFORE; + + return compareDottedFieldNames( m->first, p->first.c_str() ); + } + + bool mayAddEmbedded( map< string, BSONElement > &existing, string right ) { + for( string left = EmbeddedBuilder::splitDot( right ); + left.length() > 0 && left[ left.length() - 1 ] != '.'; + left += "." + EmbeddedBuilder::splitDot( right ) ) { + if ( existing.count( left ) > 0 && existing[ left ].type() != Object ) + return false; + if ( haveModForField( left.c_str() ) ) + return false; + } + return true; + } + static Mod::Op opFromStr( const char *fn ) { + assert( fn[0] == '$' ); + switch( fn[1] ) { + case 'i': { + if ( fn[2] == 'n' && fn[3] == 'c' && fn[4] == 0 ) + return Mod::INC; + break; + } + case 's': { + if ( fn[2] == 'e' && fn[3] == 't' && fn[4] == 0 ) + return Mod::SET; + break; + } + case 'p': { + if ( fn[2] == 'u' ) { + if ( fn[3] == 's' && fn[4] == 'h' ) { + if ( fn[5] == 0 ) + return Mod::PUSH; + if ( fn[5] == 'A' && fn[6] == 'l' && fn[7] == 'l' && fn[8] == 0 ) + return Mod::PUSH_ALL; + } + else if ( fn[3] == 'l' && fn[4] == 'l' ) { + if ( fn[5] == 0 ) + return Mod::PULL; + if ( fn[5] == 'A' && fn[6] == 'l' && fn[7] == 'l' && fn[8] == 0 ) + return Mod::PULL_ALL; + } + } + else if ( fn[2] == 'o' && fn[3] == 'p' && fn[4] == 0 ) + return Mod::POP; + break; + } + case 'u': { + if ( fn[2] == 'n' && fn[3] == 's' && fn[4] == 'e' && fn[5] == 't' && fn[6] == 0 ) + return Mod::UNSET; + break; + } + case 'b': { + if ( fn[2] == 'i' && fn[3] == 't' ) { + if ( fn[4] == 0 ) + return Mod::BIT; + if ( fn[4] == 'a' && fn[5] == 'n' && fn[6] == 'd' && fn[7] == 0 ) + return Mod::BITAND; + if ( fn[4] == 'o' && fn[5] == 'r' && fn[6] == 0 ) + return Mod::BITOR; + } + break; + } + case 'a': { + if ( fn[2] == 'd' && fn[3] == 'd' ) { + // add + if ( fn[4] == 'T' && fn[5] == 'o' && fn[6] == 'S' && fn[7] == 'e' && fn[8] == 't' && fn[9] == 0 ) + return Mod::ADDTOSET; + + } + break; + } + case 'r': { + if ( fn[2] == 'e' && fn[3] == 'n' && fn[4] == 'a' && fn[5] == 'm' && fn[6] =='e' ) { + return Mod::RENAME_TO; // with this return code we handle both RENAME_TO and RENAME_FROM + } + break; + } + default: break; + } + uassert( 10161 , "Invalid modifier specified " + string( fn ), false ); + return Mod::INC; + } + + ModSet() {} + + void updateIsIndexed( const Mod &m, const set<string> &idxKeys, const set<string> *backgroundKeys ) { + if ( m.isIndexed( idxKeys ) || + (backgroundKeys && m.isIndexed(*backgroundKeys)) ) { + _isIndexed++; + } + } + + public: + + ModSet( const BSONObj &from , + const set<string>& idxKeys = set<string>(), + const set<string>* backgroundKeys = 0 + ); + + // TODO: this is inefficient - should probably just handle when iterating + ModSet * fixDynamicArray( const char * elemMatchKey ) const; + + bool hasDynamicArray() const { return _hasDynamicArray; } + + /** + * creates a ModSetState suitable for operation on obj + * doesn't change or modify this ModSet or any underying Mod + */ + auto_ptr<ModSetState> prepare( const BSONObj& obj ) const; + + /** + * given a query pattern, builds an object suitable for an upsert + * will take the query spec and combine all $ operators + */ + BSONObj createNewFromQuery( const BSONObj& query ); + + /** + * + */ + int isIndexed() const { + return _isIndexed; + } + + unsigned size() const { return _mods.size(); } + + bool haveModForField( const char *fieldName ) const { + return _mods.find( fieldName ) != _mods.end(); + } + + bool haveConflictingMod( const string& fieldName ) { + size_t idx = fieldName.find( '.' ); + if ( idx == string::npos ) + idx = fieldName.size(); + + ModHolder::const_iterator start = _mods.lower_bound(fieldName.substr(0,idx)); + for ( ; start != _mods.end(); start++ ) { + FieldCompareResult r = compareDottedFieldNames( fieldName , start->first ); + switch ( r ) { + case LEFT_SUBFIELD: return true; + case LEFT_BEFORE: return false; + case SAME: return true; + case RIGHT_BEFORE: return false; + case RIGHT_SUBFIELD: return true; + } + } + return false; + + + } + + }; + + /** + * stores any information about a single Mod operating on a single Object + */ + class ModState { + public: + const Mod * m; + BSONElement old; + BSONElement newVal; + BSONObj _objData; + + const char * fixedOpName; + BSONElement * fixed; + int pushStartSize; + + BSONType incType; + int incint; + double incdouble; + long long inclong; + + bool dontApply; + + ModState() { + fixedOpName = 0; + fixed = 0; + pushStartSize = -1; + incType = EOO; + dontApply = false; + } + + Mod::Op op() const { + return m->op; + } + + const char * fieldName() const { + return m->fieldName; + } + + bool needOpLogRewrite() const { + if ( dontApply ) + return false; + + if ( fixed || fixedOpName || incType ) + return true; + + switch( op() ) { + case Mod::RENAME_FROM: + case Mod::RENAME_TO: + return true; + case Mod::BIT: + case Mod::BITAND: + case Mod::BITOR: + // TODO: should we convert this to $set? + return false; + default: + return false; + } + } + + void appendForOpLog( BSONObjBuilder& b ) const; + + template< class Builder > + void apply( Builder& b , BSONElement in ) { + m->apply( b , in , *this ); + } + + template< class Builder > + void appendIncValue( Builder& b , bool useFullName ) const { + const char * n = useFullName ? m->fieldName : m->shortFieldName; + + switch ( incType ) { + case NumberDouble: + b.append( n , incdouble ); break; + case NumberLong: + b.append( n , inclong ); break; + case NumberInt: + b.append( n , incint ); break; + default: + assert(0); + } + } + + string toString() const; + + template< class Builder > + void handleRename( Builder &newObjBuilder, const char *shortFieldName ); + }; + + /** + * this is used to hold state, meta data while applying a ModSet to a BSONObj + * the goal is to make ModSet const so its re-usable + */ + class ModSetState : boost::noncopyable { + struct FieldCmp { + bool operator()( const string &l, const string &r ) const; + }; + typedef map<string,ModState,FieldCmp> ModStateHolder; + const BSONObj& _obj; + ModStateHolder _mods; + bool _inPlacePossible; + BSONObj _newFromMods; // keep this data alive, as oplog generation may depend on it + + ModSetState( const BSONObj& obj ) + : _obj( obj ) , _inPlacePossible(true) { + } + + /** + * @return if in place is still possible + */ + bool amIInPlacePossible( bool inPlacePossible ) { + if ( ! inPlacePossible ) + _inPlacePossible = false; + return _inPlacePossible; + } + + template< class Builder > + void createNewFromMods( const string& root , Builder& b , const BSONObj &obj ); + + template< class Builder > + void _appendNewFromMods( const string& root , ModState& m , Builder& b , set<string>& onedownseen ); + + template< class Builder > + void appendNewFromMod( ModState& ms , Builder& b ) { + if ( ms.dontApply ) { + return; + } + + //const Mod& m = *(ms.m); // HACK + Mod& m = *((Mod*)(ms.m)); // HACK + + switch ( m.op ) { + + case Mod::PUSH: + case Mod::ADDTOSET: { + if ( m.isEach() ) { + b.appendArray( m.shortFieldName , m.getEach() ); + } + else { + BSONObjBuilder arr( b.subarrayStart( m.shortFieldName ) ); + arr.appendAs( m.elt, "0" ); + arr.done(); + } + break; + } + + case Mod::PUSH_ALL: { + b.appendAs( m.elt, m.shortFieldName ); + break; + } + + case Mod::UNSET: + case Mod::PULL: + case Mod::PULL_ALL: + // no-op b/c unset/pull of nothing does nothing + break; + + case Mod::INC: + ms.fixedOpName = "$set"; + case Mod::SET: { + m._checkForAppending( m.elt ); + b.appendAs( m.elt, m.shortFieldName ); + break; + } + // shouldn't see RENAME_FROM here + case Mod::RENAME_TO: + ms.handleRename( b, m.shortFieldName ); + break; + default: + stringstream ss; + ss << "unknown mod in appendNewFromMod: " << m.op; + throw UserException( 9015, ss.str() ); + } + + } + + public: + + bool canApplyInPlace() const { + return _inPlacePossible; + } + + /** + * modified underlying _obj + * @param isOnDisk - true means this is an on disk object, and this update needs to be made durable + */ + void applyModsInPlace( bool isOnDisk ); + + BSONObj createNewFromMods(); + + // re-writing for oplog + + bool needOpLogRewrite() const { + for ( ModStateHolder::const_iterator i = _mods.begin(); i != _mods.end(); i++ ) + if ( i->second.needOpLogRewrite() ) + return true; + return false; + } + + BSONObj getOpLogRewrite() const { + BSONObjBuilder b; + for ( ModStateHolder::const_iterator i = _mods.begin(); i != _mods.end(); i++ ) + i->second.appendForOpLog( b ); + return b.obj(); + } + + bool haveArrayDepMod() const { + for ( ModStateHolder::const_iterator i = _mods.begin(); i != _mods.end(); i++ ) + if ( i->second.m->arrayDep() ) + return true; + return false; + } + + void appendSizeSpecForArrayDepMods( BSONObjBuilder &b ) const { + for ( ModStateHolder::const_iterator i = _mods.begin(); i != _mods.end(); i++ ) { + const ModState& m = i->second; + if ( m.m->arrayDep() ) { + if ( m.pushStartSize == -1 ) + b.appendNull( m.fieldName() ); + else + b << m.fieldName() << BSON( "$size" << m.pushStartSize ); + } + } + } + + string toString() const; + + friend class ModSet; + }; + +} + |