diff options
Diffstat (limited to 'db/pdfile.cpp')
-rw-r--r-- | db/pdfile.cpp | 514 |
1 files changed, 343 insertions, 171 deletions
diff --git a/db/pdfile.cpp b/db/pdfile.cpp index 18df5f1..1c4608c 100644 --- a/db/pdfile.cpp +++ b/db/pdfile.cpp @@ -30,6 +30,7 @@ _ disallow system* manipulations from the database. #include "../util/mmap.h" #include "../util/hashtab.h" #include "../util/file_allocator.h" +#include "../util/processinfo.h" #include "btree.h" #include <algorithm> #include <list> @@ -40,10 +41,63 @@ _ disallow system* manipulations from the database. #include "queryutil.h" #include "extsort.h" #include "curop.h" +#include "background.h" namespace mongo { + map<string, unsigned> BackgroundOperation::dbsInProg; + set<string> BackgroundOperation::nsInProg; + + bool BackgroundOperation::inProgForDb(const char *db) { + assertInWriteLock(); + return dbsInProg[db] != 0; + } + + bool BackgroundOperation::inProgForNs(const char *ns) { + assertInWriteLock(); + return nsInProg.count(ns) != 0; + } + + void BackgroundOperation::assertNoBgOpInProgForDb(const char *db) { + uassert(12586, "cannot perform operation: a background operation is currently running for this database", + !inProgForDb(db)); + } + + void BackgroundOperation::assertNoBgOpInProgForNs(const char *ns) { + uassert(12587, "cannot perform operation: a background operation is currently running for this collection", + !inProgForNs(ns)); + } + + BackgroundOperation::BackgroundOperation(const char *ns) : _ns(ns) { + assertInWriteLock(); + dbsInProg[_ns.db]++; + assert( nsInProg.count(_ns.ns()) == 0 ); + nsInProg.insert(_ns.ns()); + } + + BackgroundOperation::~BackgroundOperation() { + assertInWriteLock(); + dbsInProg[_ns.db]--; + nsInProg.erase(_ns.ns()); + } + + void BackgroundOperation::dump(stringstream& ss) { + if( nsInProg.size() ) { + ss << "\n<b>Background Jobs in Progress</b>\n"; + for( set<string>::iterator i = nsInProg.begin(); i != nsInProg.end(); i++ ) + ss << " " << *i << '\n'; + } + for( map<string,unsigned>::iterator i = dbsInProg.begin(); i != dbsInProg.end(); i++ ) { + if( i->second ) + ss << "database " << i->first << ": " << i->second << '\n'; + } + } + + /* ----------------------------------------- */ + string dbpath = "/data/db/"; + bool directoryperdb = false; + string repairpath; DataFileMgr theDataFileMgr; DatabaseHolder dbHolder; @@ -53,7 +107,8 @@ namespace mongo { extern int otherTraceLevel; void addNewNamespaceToCatalog(const char *ns, const BSONObj *options = 0); void ensureIdIndexForNewNs(const char *ns) { - if ( !strstr( ns, ".system." ) && !strstr( ns, ".$freelist" ) ) { + if ( ( strstr( ns, ".system." ) == 0 || legalClientSystemNS( ns , false ) ) && + strstr( ns, ".$freelist" ) == 0 ){ log( 1 ) << "adding _id index for new collection" << endl; ensureHaveIdIndex( ns ); } @@ -63,10 +118,13 @@ namespace mongo { stringstream ss; Client * c = currentClient.get(); if ( c ){ - Database *database = c->database(); - if ( database ) { - ss << database->name << ' '; - ss << cc().ns() << ' '; + Client::Context * cx = c->getContext(); + if ( cx ){ + Database *database = cx->db(); + if ( database ) { + ss << database->name << ' '; + ss << cx->ns() << ' '; + } } } return ss.str(); @@ -105,7 +163,7 @@ namespace mongo { addNewNamespaceToCatalog(ns, j.isEmpty() ? 0 : &j); long long size = initialExtentSize(128); - BSONElement e = j.findElement("size"); + BSONElement e = j.getField("size"); if ( e.isNumber() ) { size = (long long) e.number(); size += 256; @@ -116,10 +174,10 @@ namespace mongo { bool newCapped = false; int mx = 0; - e = j.findElement("capped"); + e = j.getField("capped"); if ( e.type() == Bool && e.boolean() ) { newCapped = true; - e = j.findElement("max"); + e = j.getField("max"); if ( e.isNumber() ) { mx = (int) e.number(); } @@ -127,7 +185,7 @@ namespace mongo { // $nExtents just for debug/testing. We create '$nExtents' extents, // each of size 'size'. - e = j.findElement( "$nExtents" ); + e = j.getField( "$nExtents" ); int nExtents = int( e.number() ); Database *database = cc().database(); if ( nExtents > 0 ) { @@ -487,13 +545,11 @@ namespace mongo { /*---------------------------------------------------------------------*/ auto_ptr<Cursor> DataFileMgr::findAll(const char *ns, const DiskLoc &startLoc) { - DiskLoc loc; - bool found = nsindex(ns)->find(ns, loc); - if ( !found ) { - // out() << "info: findAll() namespace does not exist: " << ns << endl; + NamespaceDetails * d = nsdetails( ns ); + if ( ! d ) return auto_ptr<Cursor>(new BasicCursor(DiskLoc())); - } + DiskLoc loc = d->firstExtent; Extent *e = getExtent(loc); DEBUGGING { @@ -512,40 +568,42 @@ namespace mongo { } out() << endl; - nsdetails(ns)->dumpDeleted(&extents); + d->dumpDeleted(&extents); } - if ( !nsdetails( ns )->capped ) { - if ( !startLoc.isNull() ) - return auto_ptr<Cursor>(new BasicCursor( startLoc )); - while ( e->firstRecord.isNull() && !e->xnext.isNull() ) { - /* todo: if extent is empty, free it for reuse elsewhere. - that is a bit complicated have to clean up the freelists. - */ - RARELY out() << "info DFM::findAll(): extent " << loc.toString() << " was empty, skipping ahead " << ns << endl; - // find a nonempty extent - // it might be nice to free the whole extent here! but have to clean up free recs then. - e = e->getNextExtent(); - } - return auto_ptr<Cursor>(new BasicCursor( e->firstRecord )); - } else { - return auto_ptr< Cursor >( new ForwardCappedCursor( nsdetails( ns ), startLoc ) ); + if ( d->capped ) + return auto_ptr< Cursor >( new ForwardCappedCursor( d , startLoc ) ); + + if ( !startLoc.isNull() ) + return auto_ptr<Cursor>(new BasicCursor( startLoc )); + + while ( e->firstRecord.isNull() && !e->xnext.isNull() ) { + /* todo: if extent is empty, free it for reuse elsewhere. + that is a bit complicated have to clean up the freelists. + */ + RARELY out() << "info DFM::findAll(): extent " << loc.toString() << " was empty, skipping ahead " << ns << endl; + // find a nonempty extent + // it might be nice to free the whole extent here! but have to clean up free recs then. + e = e->getNextExtent(); } + return auto_ptr<Cursor>(new BasicCursor( e->firstRecord )); } /* get a table scan cursor, but can be forward or reverse direction. order.$natural - if set, > 0 means forward (asc), < 0 backward (desc). */ auto_ptr<Cursor> findTableScan(const char *ns, const BSONObj& order, const DiskLoc &startLoc) { - BSONElement el = order.findElement("$natural"); // e.g., { $natural : -1 } + BSONElement el = order.getField("$natural"); // e.g., { $natural : -1 } if ( el.number() >= 0 ) return DataFileMgr::findAll(ns, startLoc); - + // "reverse natural order" NamespaceDetails *d = nsdetails(ns); + if ( !d ) return auto_ptr<Cursor>(new BasicCursor(DiskLoc())); + if ( !d->capped ) { if ( !startLoc.isNull() ) return auto_ptr<Cursor>(new ReverseCursor( startLoc )); @@ -583,6 +641,8 @@ namespace mongo { NamespaceDetails* d = nsdetails(nsToDrop.c_str()); uassert( 10086 , (string)"ns not found: " + nsToDrop , d ); + BackgroundOperation::assertNoBgOpInProgForNs(nsToDrop.c_str()); + NamespaceString s(nsToDrop); assert( s.db == cc().database()->name ); if( s.isSystem() ) { @@ -634,29 +694,33 @@ namespace mongo { log(1) << "dropCollection: " << name << endl; NamespaceDetails *d = nsdetails(name.c_str()); assert( d ); + + BackgroundOperation::assertNoBgOpInProgForNs(name.c_str()); + if ( d->nIndexes != 0 ) { try { - assert( deleteIndexes(d, name.c_str(), "*", errmsg, result, true) ); + assert( dropIndexes(d, name.c_str(), "*", errmsg, result, true) ); } catch( DBException& ) { - uasserted(12503,"drop: deleteIndexes for collection failed - consider trying repair"); + uasserted(12503,"drop: dropIndexes for collection failed - consider trying repair"); } assert( d->nIndexes == 0 ); } - log(1) << "\t deleteIndexes done" << endl; + log(1) << "\t dropIndexes done" << endl; result.append("ns", name.c_str()); ClientCursor::invalidate(name.c_str()); + Top::global.collectionDropped( name ); dropNS(name); } int nUnindexes = 0; - void _unindexRecord(IndexDetails& id, BSONObj& obj, const DiskLoc& dl, bool logMissing = true) { + /* unindex all keys in index for this record. */ + static void _unindexRecord(IndexDetails& id, BSONObj& obj, const DiskLoc& dl, bool logMissing = true) { BSONObjSetDefaultOrder keys; id.getKeysFromObject(obj, keys); for ( BSONObjSetDefaultOrder::iterator i=keys.begin(); i != keys.end(); i++ ) { BSONObj j = *i; - // out() << "UNINDEX: j:" << j.toString() << " head:" << id.head.toString() << dl.toString() << endl; if ( otherTraceLevel >= 5 ) { out() << "_unindexRecord() " << obj.toString(); out() << "\n unindex:" << j.toString() << endl; @@ -666,9 +730,9 @@ namespace mongo { try { ok = id.head.btree()->unindex(id.head, id, j, dl); } - catch (AssertionException&) { + catch (AssertionException& e) { problem() << "Assertion failure: _unindex failed " << id.indexNamespace() << endl; - out() << "Assertion failure: _unindex failed" << '\n'; + out() << "Assertion failure: _unindex failed: " << e.what() << '\n'; out() << " obj:" << obj.toString() << '\n'; out() << " key:" << j.toString() << '\n'; out() << " dl:" << dl.toString() << endl; @@ -682,12 +746,14 @@ namespace mongo { } /* unindex all keys in all indexes for this record. */ - void unindexRecord(NamespaceDetails *d, Record *todelete, const DiskLoc& dl, bool noWarn = false) { - if ( d->nIndexes == 0 ) return; + static void unindexRecord(NamespaceDetails *d, Record *todelete, const DiskLoc& dl, bool noWarn = false) { BSONObj obj(todelete); - NamespaceDetails::IndexIterator i = d->ii(); - while( i.more() ) { - _unindexRecord(i.next(), obj, dl, !noWarn); + int n = d->nIndexes; + for ( int i = 0; i < n; i++ ) + _unindexRecord(d->idx(i), obj, dl, !noWarn); + if( d->backgroundIndexBuildInProgress ) { + // always pass nowarn here, as this one may be missing for valid reasons as we are concurrently building it + _unindexRecord(d->idx(n), obj, dl, false); } } @@ -763,19 +829,20 @@ namespace mongo { /** Note: if the object shrinks a lot, we don't free up space, we leave extra at end of the record. */ - const DiskLoc DataFileMgr::update(const char *ns, - Record *toupdate, const DiskLoc& dl, - const char *_buf, int _len, OpDebug& debug) + const DiskLoc DataFileMgr::updateRecord( + const char *ns, + NamespaceDetails *d, + NamespaceDetailsTransient *nsdt, + Record *toupdate, const DiskLoc& dl, + const char *_buf, int _len, OpDebug& debug) { StringBuilder& ss = debug.str; dassert( toupdate == dl.rec() ); - NamespaceDetails *d = nsdetails(ns); - BSONObj objOld(toupdate); BSONObj objNew(_buf); - assert( objNew.objsize() == _len ); - assert( objNew.objdata() == _buf ); + DEV assert( objNew.objsize() == _len ); + DEV assert( objNew.objdata() == _buf ); if( !objNew.hasElement("_id") && objOld.hasElement("_id") ) { /* add back the old _id value if the update removes it. Note this implementation is slow @@ -795,7 +862,7 @@ namespace mongo { */ vector<IndexChanges> changes; getIndexChanges(changes, *d, objNew, objOld); - dupCheck(changes, *d); + dupCheck(changes, *d, dl); if ( toupdate->netLength() < objNew.objsize() ) { // doesn't fit. reallocate ----------------------------------------------------- @@ -807,13 +874,14 @@ namespace mongo { return insert(ns, objNew.objdata(), objNew.objsize(), false); } - NamespaceDetailsTransient::get_w( ns ).notifyOfWriteOp(); + nsdt->notifyOfWriteOp(); d->paddingFits(); /* have any index keys changed? */ { unsigned keyUpdates = 0; - for ( int x = 0; x < d->nIndexes; x++ ) { + int z = d->nIndexesBeingBuilt(); + for ( int x = 0; x < z; x++ ) { IndexDetails& idx = d->idx(x); for ( unsigned i = 0; i < changes[x].removed.size(); i++ ) { try { @@ -859,10 +927,8 @@ namespace mongo { return sz; } - int deb=0; - - /* add keys to indexes for a new record */ - inline void _indexRecord(NamespaceDetails *d, int idxNo, BSONObj& obj, DiskLoc newRecordLoc, bool dupsAllowed) { + /* add keys to index idxNo for a new record */ + static inline void _indexRecord(NamespaceDetails *d, int idxNo, BSONObj& obj, DiskLoc recordLoc, bool dupsAllowed) { IndexDetails& idx = d->idx(idxNo); BSONObjSetDefaultOrder keys; idx.getKeysFromObject(obj, keys); @@ -872,12 +938,16 @@ namespace mongo { if( ++n == 2 ) { d->setIndexIsMultikey(idxNo); } - assert( !newRecordLoc.isNull() ); + assert( !recordLoc.isNull() ); try { - idx.head.btree()->bt_insert(idx.head, newRecordLoc, + idx.head.btree()->bt_insert(idx.head, recordLoc, *i, order, dupsAllowed, idx); } - catch (AssertionException& ) { + catch (AssertionException& e) { + if( e.code == 10287 && idxNo == d->nIndexes ) { + DEV log() << "info: caught key already in index on bg indexing (ok)" << endl; + continue; + } if( !dupsAllowed ) { // dup key exception, presumably. throw; @@ -913,10 +983,10 @@ namespace mongo { } // throws DBException - /* _ TODO dropDups - */ unsigned long long fastBuildIndex(const char *ns, NamespaceDetails *d, IndexDetails& idx, int idxNo) { - // testSorting(); + assert( d->backgroundIndexBuildInProgress == 0 ); + CurOp * op = cc().curop(); + Timer t; log() << "Buildindex " << ns << " idxNo:" << idxNo << ' ' << idx.info.obj().toString() << endl; @@ -926,13 +996,16 @@ namespace mongo { BSONObj order = idx.keyPattern(); idx.head.Null(); + + if ( logLevel > 1 ) printMemInfo( "before index start" ); /* get and sort all the keys ----- */ unsigned long long n = 0; auto_ptr<Cursor> c = theDataFileMgr.findAll(ns); BSONObjExternalSorter sorter(order); + sorter.hintNumObjects( d->nrecords ); unsigned long long nkeys = 0; - ProgressMeter pm( d->nrecords , 10 ); + ProgressMeter & pm = op->setMessage( "index: (1/3) external sort" , d->nrecords , 10 ); while ( c->ok() ) { BSONObj o = c->current(); DiskLoc loc = c->currLoc(); @@ -947,12 +1020,20 @@ namespace mongo { sorter.add(*i, loc); nkeys++; } - + c->advance(); n++; pm.hit(); + if ( logLevel > 1 && n % 10000 == 0 ){ + printMemInfo( "\t iterating objects" ); + } + }; + pm.finished(); + + if ( logLevel > 1 ) printMemInfo( "before final sort" ); sorter.sort(); + if ( logLevel > 1 ) printMemInfo( "after final sort" ); log(t.seconds() > 5 ? 0 : 1) << "\t external sort used : " << sorter.numFiles() << " files " << " in " << t.seconds() << " secs" << endl; @@ -963,21 +1044,23 @@ namespace mongo { BtreeBuilder btBuilder(dupsAllowed, idx); BSONObj keyLast; auto_ptr<BSONObjExternalSorter::Iterator> i = sorter.iterator(); - ProgressMeter pm2( nkeys , 10 ); + pm = op->setMessage( "index: (2/3) btree bottom up" , nkeys , 10 ); while( i->more() ) { RARELY killCurrentOp.checkForInterrupt(); BSONObjExternalSorter::Data d = i->next(); - //cout<<"TEMP SORTER next " << d.first.toString() << endl; try { btBuilder.addKey(d.first, d.second); } - catch( AssertionException& ) { + catch( AssertionException& e ) { if ( dupsAllowed ){ // unknow exception?? throw; } + if( e.interrupted() ) + throw; + if ( ! dropDups ) throw; @@ -987,8 +1070,11 @@ namespace mongo { dupsToDrop.push_back(d.second); uassert( 10092 , "too may dups on index build with dropDups=true", dupsToDrop.size() < 1000000 ); } - pm2.hit(); + pm.hit(); } + pm.finished(); + op->setMessage( "index: (3/3) btree-middle" ); + log(t.seconds() > 10 ? 0 : 1 ) << "\t done building bottom layer, going to commit" << endl; btBuilder.commit(); wassert( btBuilder.getn() == nkeys || dropDups ); } @@ -1001,32 +1087,61 @@ namespace mongo { return n; } - static class BackgroundIndexBuildJobs { + class BackgroundIndexBuildJob : public BackgroundOperation { unsigned long long addExistingToIndex(const char *ns, NamespaceDetails *d, IndexDetails& idx, int idxNo) { bool dupsAllowed = !idx.unique(); bool dropDups = idx.dropDups(); + ProgressMeter& progress = cc().curop()->setMessage( "bg index build" , d->nrecords ); + unsigned long long n = 0; - auto_ptr<Cursor> c = theDataFileMgr.findAll(ns); - while ( c->ok() ) { - BSONObj js = c->current(); + auto_ptr<ClientCursor> cc; + { + auto_ptr<Cursor> c = theDataFileMgr.findAll(ns); + cc.reset( new ClientCursor(c, ns, false) ); + } + CursorId id = cc->cursorid; + + while ( cc->c->ok() ) { + BSONObj js = cc->c->current(); try { - _indexRecord(d, idxNo, js, c->currLoc(),dupsAllowed); - c->advance(); + _indexRecord(d, idxNo, js, cc->c->currLoc(), dupsAllowed); + cc->c->advance(); } catch( AssertionException& e ) { + if( e.interrupted() ) + throw; + if ( dropDups ) { - DiskLoc toDelete = c->currLoc(); - c->advance(); + DiskLoc toDelete = cc->c->currLoc(); + bool ok = cc->c->advance(); + cc->updateLocation(); theDataFileMgr.deleteRecord( ns, toDelete.rec(), toDelete, false, true ); + if( ClientCursor::find(id, false) == 0 ) { + cc.release(); + if( !ok ) { + /* we were already at the end. normal. */ + } + else { + uasserted(12585, "cursor gone during bg index; dropDups"); + } + break; + } } else { - _log() << endl; - log(2) << "addExistingToIndex exception " << e.what() << endl; + log() << "background addExistingToIndex exception " << e.what() << endl; throw; } } n++; - }; + progress.hit(); + + if ( n % 128 == 0 && !cc->yield() ) { + cc.release(); + uasserted(12584, "cursor gone during bg index"); + break; + } + } + progress.done(); return n; } @@ -1034,72 +1149,76 @@ namespace mongo { that way on a crash/restart, we don't think we are still building one. */ set<NamespaceDetails*> bgJobsInProgress; - void prep(NamespaceDetails *d) { + void prep(const char *ns, NamespaceDetails *d) { assertInWriteLock(); - assert( bgJobsInProgress.count(d) == 0 ); bgJobsInProgress.insert(d); d->backgroundIndexBuildInProgress = 1; + d->nIndexes--; } - - public: - /* Note you cannot even do a foreground index build if a background is in progress, - as bg build assumes it is the last index in the array! - */ - void checkInProg(NamespaceDetails *d) { + void done(const char *ns, NamespaceDetails *d) { + d->nIndexes++; + d->backgroundIndexBuildInProgress = 0; + NamespaceDetailsTransient::get_w(ns).addedIndex(); // clear query optimizer cache assertInWriteLock(); - uassert(12580, "already building an index for this namespace in background", bgJobsInProgress.count(d) == 0); } -/* todo: clean bg flag on loading of NamespaceDetails */ + public: + BackgroundIndexBuildJob(const char *ns) : BackgroundOperation(ns) { } unsigned long long go(string ns, NamespaceDetails *d, IndexDetails& idx, int idxNo) { - unsigned long long n; - prep(d); + unsigned long long n = 0; + + prep(ns.c_str(), d); + assert( idxNo == d->nIndexes ); try { idx.head = BtreeBucket::addBucket(idx); n = addExistingToIndex(ns.c_str(), d, idx, idxNo); } catch(...) { - assertInWriteLock(); - bgJobsInProgress.erase(d); - d->backgroundIndexBuildInProgress = 0; + if( cc().database() && nsdetails(ns.c_str()) == d ) { + assert( idxNo == d->nIndexes ); + done(ns.c_str(), d); + } + else { + log() << "ERROR: db gone during bg index?" << endl; + } throw; } + assert( idxNo == d->nIndexes ); + done(ns.c_str(), d); return n; } - } backgroundIndex; + }; // throws DBException - static void buildAnIndex(string ns, NamespaceDetails *d, IndexDetails& idx, int idxNo) { - log() << "building new index on " << idx.keyPattern() << " for " << ns << "..." << endl; + static void buildAnIndex(string ns, NamespaceDetails *d, IndexDetails& idx, int idxNo, bool background) { + log() << "building new index on " << idx.keyPattern() << " for " << ns << endl; Timer t; unsigned long long n; - BSONObj info = idx.info.obj(); - bool background = info["background"].trueValue(); - if( background ) { - log() << "WARNING: background index build not yet implemented" << endl; + if( background ) { + log(2) << "buildAnIndex: background=true\n"; } + assert( !BackgroundOperation::inProgForNs(ns.c_str()) ); // should have been checked earlier, better not be... if( !background ) { n = fastBuildIndex(ns.c_str(), d, idx, idxNo); assert( !idx.head.isNull() ); } else { - n = backgroundIndex.go(ns, d, idx, idxNo); + BackgroundIndexBuildJob j(ns.c_str()); + n = j.go(ns, d, idx, idxNo); } log() << "done for " << n << " records " << t.millis() / 1000.0 << "secs" << endl; } /* add keys to indexes for a new record */ - void indexRecord(NamespaceDetails *d, const void *buf, int len, DiskLoc newRecordLoc) { - BSONObj obj((const char *)buf); - - /*UNIQUE*/ - for ( int i = 0; i < d->nIndexes; i++ ) { + static void indexRecord(NamespaceDetails *d, BSONObj obj, DiskLoc loc) { + int n = d->nIndexesBeingBuilt(); + for ( int i = 0; i < n; i++ ) { try { bool unique = d->idx(i).unique(); - _indexRecord(d, i, obj, newRecordLoc, /*dupsAllowed*/!unique); + _indexRecord(d, i, obj, loc, /*dupsAllowed*/!unique); } catch( DBException& ) { /* try to roll back previously added index entries @@ -1108,7 +1227,7 @@ namespace mongo { */ for( int j = 0; j <= i; j++ ) { try { - _unindexRecord(d->idx(j), obj, newRecordLoc, false); + _unindexRecord(d->idx(j), obj, loc, false); } catch(...) { log(3) << "unindex fails on rollback after unique failure\n"; @@ -1119,7 +1238,7 @@ namespace mongo { } } - extern BSONObj id_obj; // { _id : ObjectId("000000000000000000000000") } + extern BSONObj id_obj; // { _id : 1 } void ensureHaveIdIndex(const char *ns) { NamespaceDetails *d = nsdetails(ns); @@ -1179,12 +1298,31 @@ namespace mongo { bool prepareToBuildIndex(const BSONObj& io, bool god, string& sourceNS, NamespaceDetails *&sourceCollection); + // We are now doing two btree scans for all unique indexes (one here, and one when we've + // written the record to the collection. This could be made more efficient inserting + // dummy data here, keeping pointers to the btree nodes holding the dummy data and then + // updating the dummy data with the DiskLoc of the real record. + void checkNoIndexConflicts( NamespaceDetails *d, const BSONObj &obj ) { + for ( int idxNo = 0; idxNo < d->nIndexes; idxNo++ ) { + if( d->idx(idxNo).unique() ) { + IndexDetails& idx = d->idx(idxNo); + BSONObjSetDefaultOrder keys; + idx.getKeysFromObject(obj, keys); + BSONObj order = idx.keyPattern(); + for ( BSONObjSetDefaultOrder::iterator i=keys.begin(); i != keys.end(); i++ ) { + uassert( 12582, "duplicate key insert for unique index of capped collection", + idx.head.btree()->findSingle(idx, idx.head, *i ).isNull() ); + } + } + } + } + /* note: if god==true, you may pass in obuf of NULL and then populate the returned DiskLoc after the call -- that will prevent a double buffer copy in some cases (btree.cpp). */ DiskLoc DataFileMgr::insert(const char *ns, const void *obuf, int len, bool god, const BSONElement &writeId, bool mayAddIndex) { bool wouldAddIndex = false; - uassert( 10093 , "cannot insert into reserved $ collection", god || strchr(ns, '$') == 0 ); + massert( 10093 , "cannot insert into reserved $ collection", god || strchr(ns, '$') == 0 ); uassert( 10094 , "invalid ns", strchr( ns , '.' ) > 0 ); const char *sys = strstr(ns, "system."); if ( sys ) { @@ -1212,8 +1350,8 @@ namespace mongo { /* todo: shouldn't be in the namespace catalog until after the allocations here work. also if this is an addIndex, those checks should happen before this! */ - // This creates first file in the database. - cc().database()->newestFile()->createExtent(ns, initialExtentSize(len)); + // This may create first file in the database. + cc().database()->allocExtent(ns, initialExtentSize(len), false); d = nsdetails(ns); if ( !god ) ensureIdIndexForNewNs(ns); @@ -1225,10 +1363,8 @@ namespace mongo { string tabletoidxns; if ( addIndex ) { BSONObj io((const char *) obuf); - backgroundIndex.checkInProg(d); - if( !prepareToBuildIndex(io, god, tabletoidxns, tableToIndex) ) { + if( !prepareToBuildIndex(io, god, tabletoidxns, tableToIndex) ) return DiskLoc(); - } } const BSONElement *newId = &writeId; @@ -1262,6 +1398,13 @@ namespace mongo { d->paddingFactor = 1.0; lenWHdr = len + Record::HeaderSize; } + + // If the collection is capped, check if the new object will violate a unique index + // constraint before allocating space. + if ( d->nIndexes && d->capped && !god ) { + checkNoIndexConflicts( d, BSONObj( reinterpret_cast<const char *>( obuf ) ) ); + } + DiskLoc loc = d->alloc(ns, lenWHdr, extentLoc); if ( loc.isNull() ) { // out of space @@ -1321,27 +1464,35 @@ namespace mongo { NamespaceDetailsTransient::get_w( ns ).notifyOfWriteOp(); if ( tableToIndex ) { + BSONObj info = loc.obj(); + bool background = info["background"].trueValue(); + int idxNo = tableToIndex->nIndexes; - IndexDetails& idx = tableToIndex->addIndex(tabletoidxns.c_str()); // clear transient info caches so they refresh; increments nIndexes + IndexDetails& idx = tableToIndex->addIndex(tabletoidxns.c_str(), !background); // clear transient info caches so they refresh; increments nIndexes idx.info = loc; try { - buildAnIndex(tabletoidxns, tableToIndex, idx, idxNo); + buildAnIndex(tabletoidxns, tableToIndex, idx, idxNo, background); } catch( DBException& ) { - // save our error msg string as an exception on deleteIndexes will overwrite our message + // save our error msg string as an exception or dropIndexes will overwrite our message LastError *le = lastError.get(); - assert( le ); - string saveerrmsg = le->msg; - assert( !saveerrmsg.empty() ); + int savecode = 0; + string saveerrmsg; + if ( le ) { + savecode = le->code; + saveerrmsg = le->msg; + } // roll back this index string name = idx.indexName(); BSONObjBuilder b; string errmsg; - bool ok = deleteIndexes(tableToIndex, tabletoidxns.c_str(), name.c_str(), errmsg, b, true); + bool ok = dropIndexes(tableToIndex, tabletoidxns.c_str(), name.c_str(), errmsg, b, true); if( !ok ) { log() << "failed to drop index after a unique key error building it: " << errmsg << ' ' << tabletoidxns << ' ' << name << endl; } - raiseError(12506,saveerrmsg.c_str()); + + assert( le && !saveerrmsg.empty() ); + raiseError(savecode,saveerrmsg.c_str()); throw; } } @@ -1349,11 +1500,13 @@ namespace mongo { /* add this record to our indexes */ if ( d->nIndexes ) { try { - indexRecord(d, r->data/*buf*/, len, loc); + BSONObj obj(r->data); + indexRecord(d, obj, loc); } catch( AssertionException& e ) { // should be a dup key error on _id index - if( tableToIndex || d->capped ) { + if( tableToIndex || d->capped ) { + massert( 12583, "unexpected index insertion failure on capped collection", !d->capped ); string s = e.toString(); s += " : on addIndex/capped - collection and its index will not match"; uassert_nothrow(s.c_str()); @@ -1406,19 +1559,6 @@ namespace mongo { return r; } - void DataFileMgr::init(const string& path ) { - /* boost::filesystem::path path( dir ); - path /= "temp.dat"; - string pathString = path.string(); - temp.open(pathString.c_str(), 64 * 1024 * 1024); - */ - } - - void pdfileInit() { - // namespaceIndex.init(dbpath); - theDataFileMgr.init(dbpath); - } - } // namespace mongo #include "clientcursor.h" @@ -1427,63 +1567,75 @@ namespace mongo { void dropDatabase(const char *ns) { // ns is of the form "<dbname>.$cmd" - char cl[256]; - nsToDatabase(ns, cl); - log(1) << "dropDatabase " << cl << endl; - assert( cc().database()->name == cl ); + char db[256]; + nsToDatabase(ns, db); + log(1) << "dropDatabase " << db << endl; + assert( cc().database()->name == db ); + + BackgroundOperation::assertNoBgOpInProgForDb(db); - closeDatabase( cl ); - _deleteDataFiles(cl); + closeDatabase( db ); + _deleteDataFiles(db); } typedef boost::filesystem::path Path; // back up original database files to 'temp' dir void _renameForBackup( const char *database, const Path &reservedPath ) { + Path newPath( reservedPath ); + if ( directoryperdb ) + newPath /= database; class Renamer : public FileOp { public: - Renamer( const Path &reservedPath ) : reservedPath_( reservedPath ) {} + Renamer( const Path &newPath ) : newPath_( newPath ) {} private: - const boost::filesystem::path &reservedPath_; + const boost::filesystem::path &newPath_; virtual bool apply( const Path &p ) { if ( !boost::filesystem::exists( p ) ) return false; - boost::filesystem::rename( p, reservedPath_ / ( p.leaf() + ".bak" ) ); + boost::filesystem::rename( p, newPath_ / ( p.leaf() + ".bak" ) ); return true; } virtual const char * op() const { return "renaming"; } - } renamer( reservedPath ); + } renamer( newPath ); _applyOpToDataFiles( database, renamer, true ); } // move temp files to standard data dir void _replaceWithRecovered( const char *database, const char *reservedPathString ) { - class : public FileOp { + Path newPath( dbpath ); + if ( directoryperdb ) + newPath /= database; + class Replacer : public FileOp { + public: + Replacer( const Path &newPath ) : newPath_( newPath ) {} + private: + const boost::filesystem::path &newPath_; virtual bool apply( const Path &p ) { if ( !boost::filesystem::exists( p ) ) return false; - boost::filesystem::rename( p, boost::filesystem::path(dbpath) / p.leaf() ); + boost::filesystem::rename( p, newPath_ / p.leaf() ); return true; } virtual const char * op() const { return "renaming"; } - } renamer; - _applyOpToDataFiles( database, renamer, true, reservedPathString ); + } replacer( newPath ); + _applyOpToDataFiles( database, replacer, true, reservedPathString ); } // generate a directory name for storing temp data files Path uniqueReservedPath( const char *prefix ) { - Path dbPath = Path( dbpath ); + Path repairPath = Path( repairpath ); Path reservedPath; int i = 0; bool exists = false; do { stringstream ss; ss << prefix << "_repairDatabase_" << i++; - reservedPath = dbPath / ss.str(); + reservedPath = repairPath / ss.str(); BOOST_CHECK_EXCEPTION( exists = boost::filesystem::exists( reservedPath ) ); } while ( exists ); return reservedPath; @@ -1540,6 +1692,8 @@ namespace mongo { problem() << "repairDatabase " << dbName << endl; assert( cc().database()->name == dbName ); + BackgroundOperation::assertNoBgOpInProgForDb(dbName); + boost::intmax_t totalSize = dbSize( dbName ); boost::intmax_t freeSize = freeSpace(); if ( freeSize > -1 && freeSize < totalSize ) { @@ -1553,14 +1707,19 @@ namespace mongo { Path reservedPath = uniqueReservedPath( ( preserveClonedFilesOnFailure || backupOriginalFiles ) ? - "backup" : "tmp" ); + "backup" : "$tmp" ); BOOST_CHECK_EXCEPTION( boost::filesystem::create_directory( reservedPath ) ); string reservedPathString = reservedPath.native_directory_string(); - assert( setClient( dbName, reservedPathString.c_str() ) ); - - bool res = cloneFrom(localhost.c_str(), errmsg, dbName, - /*logForReplication=*/false, /*slaveok*/false, /*replauth*/false, /*snapshot*/false); - closeDatabase( dbName, reservedPathString.c_str() ); + + bool res; + { // clone to temp location, which effectively does repair + Client::Context ctx( dbName, reservedPathString ); + assert( ctx.justCreated() ); + + res = cloneFrom(localhost.c_str(), errmsg, dbName, + /*logForReplication=*/false, /*slaveok*/false, /*replauth*/false, /*snapshot*/false); + closeDatabase( dbName, reservedPathString.c_str() ); + } if ( !res ) { problem() << "clone failed for " << dbName << " with error: " << errmsg << endl; @@ -1569,13 +1728,15 @@ namespace mongo { return false; } - assert( !setClient( dbName ) ); + Client::Context ctx( dbName ); closeDatabase( dbName ); - if ( backupOriginalFiles ) + if ( backupOriginalFiles ) { _renameForBackup( dbName, reservedPath ); - else + } else { _deleteDataFiles( dbName ); + BOOST_CHECK_EXCEPTION( boost::filesystem::create_directory( Path( dbpath ) / dbName ) ); + } _replaceWithRecovered( dbName, reservedPathString.c_str() ); @@ -1591,6 +1752,8 @@ namespace mongo { string c = database; c += '.'; boost::filesystem::path p(path); + if ( directoryperdb ) + p /= database; boost::filesystem::path q; q = p / (c+"ns"); bool ok = false; @@ -1619,8 +1782,8 @@ namespace mongo { NamespaceDetails* nsdetails_notinline(const char *ns) { return nsdetails(ns); } - bool DatabaseHolder::closeAll( const string& path , BSONObjBuilder& result ){ - log(2) << "DatabaseHolder::closeAll path:" << path << endl; + bool DatabaseHolder::closeAll( const string& path , BSONObjBuilder& result , bool force ){ + log() << "DatabaseHolder::closeAll path:" << path << endl; dbMutex.assertWriteLocked(); map<string,Database*>& m = _paths[path]; @@ -1633,14 +1796,23 @@ namespace mongo { BSONObjBuilder bb( result.subarrayStart( "dbs" ) ); int n = 0; + int nNotClosed = 0; for( set< string >::iterator i = dbs.begin(); i != dbs.end(); ++i ) { string name = *i; log(2) << "DatabaseHolder::closeAll path:" << path << " name:" << name << endl; - setClient( name.c_str() , path ); - closeDatabase( name.c_str() , path ); - bb.append( bb.numStr( n++ ).c_str() , name ); + Client::Context ctx( name , path ); + if( !force && BackgroundOperation::inProgForDb(name.c_str()) ) { + log() << "WARNING: can't close database " << name << " because a bg job is in progress - try killOp command" << endl; + nNotClosed++; + } + else { + closeDatabase( name.c_str() , path ); + bb.append( bb.numStr( n++ ).c_str() , name ); + } } bb.done(); + if( nNotClosed ) + result.append("nNotClosed", nNotClosed); return true; } |