summaryrefslogtreecommitdiff
path: root/db/pdfile.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'db/pdfile.cpp')
-rw-r--r--db/pdfile.cpp514
1 files changed, 343 insertions, 171 deletions
diff --git a/db/pdfile.cpp b/db/pdfile.cpp
index 18df5f1..1c4608c 100644
--- a/db/pdfile.cpp
+++ b/db/pdfile.cpp
@@ -30,6 +30,7 @@ _ disallow system* manipulations from the database.
#include "../util/mmap.h"
#include "../util/hashtab.h"
#include "../util/file_allocator.h"
+#include "../util/processinfo.h"
#include "btree.h"
#include <algorithm>
#include <list>
@@ -40,10 +41,63 @@ _ disallow system* manipulations from the database.
#include "queryutil.h"
#include "extsort.h"
#include "curop.h"
+#include "background.h"
namespace mongo {
+ map<string, unsigned> BackgroundOperation::dbsInProg;
+ set<string> BackgroundOperation::nsInProg;
+
+ bool BackgroundOperation::inProgForDb(const char *db) {
+ assertInWriteLock();
+ return dbsInProg[db] != 0;
+ }
+
+ bool BackgroundOperation::inProgForNs(const char *ns) {
+ assertInWriteLock();
+ return nsInProg.count(ns) != 0;
+ }
+
+ void BackgroundOperation::assertNoBgOpInProgForDb(const char *db) {
+ uassert(12586, "cannot perform operation: a background operation is currently running for this database",
+ !inProgForDb(db));
+ }
+
+ void BackgroundOperation::assertNoBgOpInProgForNs(const char *ns) {
+ uassert(12587, "cannot perform operation: a background operation is currently running for this collection",
+ !inProgForNs(ns));
+ }
+
+ BackgroundOperation::BackgroundOperation(const char *ns) : _ns(ns) {
+ assertInWriteLock();
+ dbsInProg[_ns.db]++;
+ assert( nsInProg.count(_ns.ns()) == 0 );
+ nsInProg.insert(_ns.ns());
+ }
+
+ BackgroundOperation::~BackgroundOperation() {
+ assertInWriteLock();
+ dbsInProg[_ns.db]--;
+ nsInProg.erase(_ns.ns());
+ }
+
+ void BackgroundOperation::dump(stringstream& ss) {
+ if( nsInProg.size() ) {
+ ss << "\n<b>Background Jobs in Progress</b>\n";
+ for( set<string>::iterator i = nsInProg.begin(); i != nsInProg.end(); i++ )
+ ss << " " << *i << '\n';
+ }
+ for( map<string,unsigned>::iterator i = dbsInProg.begin(); i != dbsInProg.end(); i++ ) {
+ if( i->second )
+ ss << "database " << i->first << ": " << i->second << '\n';
+ }
+ }
+
+ /* ----------------------------------------- */
+
string dbpath = "/data/db/";
+ bool directoryperdb = false;
+ string repairpath;
DataFileMgr theDataFileMgr;
DatabaseHolder dbHolder;
@@ -53,7 +107,8 @@ namespace mongo {
extern int otherTraceLevel;
void addNewNamespaceToCatalog(const char *ns, const BSONObj *options = 0);
void ensureIdIndexForNewNs(const char *ns) {
- if ( !strstr( ns, ".system." ) && !strstr( ns, ".$freelist" ) ) {
+ if ( ( strstr( ns, ".system." ) == 0 || legalClientSystemNS( ns , false ) ) &&
+ strstr( ns, ".$freelist" ) == 0 ){
log( 1 ) << "adding _id index for new collection" << endl;
ensureHaveIdIndex( ns );
}
@@ -63,10 +118,13 @@ namespace mongo {
stringstream ss;
Client * c = currentClient.get();
if ( c ){
- Database *database = c->database();
- if ( database ) {
- ss << database->name << ' ';
- ss << cc().ns() << ' ';
+ Client::Context * cx = c->getContext();
+ if ( cx ){
+ Database *database = cx->db();
+ if ( database ) {
+ ss << database->name << ' ';
+ ss << cx->ns() << ' ';
+ }
}
}
return ss.str();
@@ -105,7 +163,7 @@ namespace mongo {
addNewNamespaceToCatalog(ns, j.isEmpty() ? 0 : &j);
long long size = initialExtentSize(128);
- BSONElement e = j.findElement("size");
+ BSONElement e = j.getField("size");
if ( e.isNumber() ) {
size = (long long) e.number();
size += 256;
@@ -116,10 +174,10 @@ namespace mongo {
bool newCapped = false;
int mx = 0;
- e = j.findElement("capped");
+ e = j.getField("capped");
if ( e.type() == Bool && e.boolean() ) {
newCapped = true;
- e = j.findElement("max");
+ e = j.getField("max");
if ( e.isNumber() ) {
mx = (int) e.number();
}
@@ -127,7 +185,7 @@ namespace mongo {
// $nExtents just for debug/testing. We create '$nExtents' extents,
// each of size 'size'.
- e = j.findElement( "$nExtents" );
+ e = j.getField( "$nExtents" );
int nExtents = int( e.number() );
Database *database = cc().database();
if ( nExtents > 0 ) {
@@ -487,13 +545,11 @@ namespace mongo {
/*---------------------------------------------------------------------*/
auto_ptr<Cursor> DataFileMgr::findAll(const char *ns, const DiskLoc &startLoc) {
- DiskLoc loc;
- bool found = nsindex(ns)->find(ns, loc);
- if ( !found ) {
- // out() << "info: findAll() namespace does not exist: " << ns << endl;
+ NamespaceDetails * d = nsdetails( ns );
+ if ( ! d )
return auto_ptr<Cursor>(new BasicCursor(DiskLoc()));
- }
+ DiskLoc loc = d->firstExtent;
Extent *e = getExtent(loc);
DEBUGGING {
@@ -512,40 +568,42 @@ namespace mongo {
}
out() << endl;
- nsdetails(ns)->dumpDeleted(&extents);
+ d->dumpDeleted(&extents);
}
- if ( !nsdetails( ns )->capped ) {
- if ( !startLoc.isNull() )
- return auto_ptr<Cursor>(new BasicCursor( startLoc ));
- while ( e->firstRecord.isNull() && !e->xnext.isNull() ) {
- /* todo: if extent is empty, free it for reuse elsewhere.
- that is a bit complicated have to clean up the freelists.
- */
- RARELY out() << "info DFM::findAll(): extent " << loc.toString() << " was empty, skipping ahead " << ns << endl;
- // find a nonempty extent
- // it might be nice to free the whole extent here! but have to clean up free recs then.
- e = e->getNextExtent();
- }
- return auto_ptr<Cursor>(new BasicCursor( e->firstRecord ));
- } else {
- return auto_ptr< Cursor >( new ForwardCappedCursor( nsdetails( ns ), startLoc ) );
+ if ( d->capped )
+ return auto_ptr< Cursor >( new ForwardCappedCursor( d , startLoc ) );
+
+ if ( !startLoc.isNull() )
+ return auto_ptr<Cursor>(new BasicCursor( startLoc ));
+
+ while ( e->firstRecord.isNull() && !e->xnext.isNull() ) {
+ /* todo: if extent is empty, free it for reuse elsewhere.
+ that is a bit complicated have to clean up the freelists.
+ */
+ RARELY out() << "info DFM::findAll(): extent " << loc.toString() << " was empty, skipping ahead " << ns << endl;
+ // find a nonempty extent
+ // it might be nice to free the whole extent here! but have to clean up free recs then.
+ e = e->getNextExtent();
}
+ return auto_ptr<Cursor>(new BasicCursor( e->firstRecord ));
}
/* get a table scan cursor, but can be forward or reverse direction.
order.$natural - if set, > 0 means forward (asc), < 0 backward (desc).
*/
auto_ptr<Cursor> findTableScan(const char *ns, const BSONObj& order, const DiskLoc &startLoc) {
- BSONElement el = order.findElement("$natural"); // e.g., { $natural : -1 }
+ BSONElement el = order.getField("$natural"); // e.g., { $natural : -1 }
if ( el.number() >= 0 )
return DataFileMgr::findAll(ns, startLoc);
-
+
// "reverse natural order"
NamespaceDetails *d = nsdetails(ns);
+
if ( !d )
return auto_ptr<Cursor>(new BasicCursor(DiskLoc()));
+
if ( !d->capped ) {
if ( !startLoc.isNull() )
return auto_ptr<Cursor>(new ReverseCursor( startLoc ));
@@ -583,6 +641,8 @@ namespace mongo {
NamespaceDetails* d = nsdetails(nsToDrop.c_str());
uassert( 10086 , (string)"ns not found: " + nsToDrop , d );
+ BackgroundOperation::assertNoBgOpInProgForNs(nsToDrop.c_str());
+
NamespaceString s(nsToDrop);
assert( s.db == cc().database()->name );
if( s.isSystem() ) {
@@ -634,29 +694,33 @@ namespace mongo {
log(1) << "dropCollection: " << name << endl;
NamespaceDetails *d = nsdetails(name.c_str());
assert( d );
+
+ BackgroundOperation::assertNoBgOpInProgForNs(name.c_str());
+
if ( d->nIndexes != 0 ) {
try {
- assert( deleteIndexes(d, name.c_str(), "*", errmsg, result, true) );
+ assert( dropIndexes(d, name.c_str(), "*", errmsg, result, true) );
}
catch( DBException& ) {
- uasserted(12503,"drop: deleteIndexes for collection failed - consider trying repair");
+ uasserted(12503,"drop: dropIndexes for collection failed - consider trying repair");
}
assert( d->nIndexes == 0 );
}
- log(1) << "\t deleteIndexes done" << endl;
+ log(1) << "\t dropIndexes done" << endl;
result.append("ns", name.c_str());
ClientCursor::invalidate(name.c_str());
+ Top::global.collectionDropped( name );
dropNS(name);
}
int nUnindexes = 0;
- void _unindexRecord(IndexDetails& id, BSONObj& obj, const DiskLoc& dl, bool logMissing = true) {
+ /* unindex all keys in index for this record. */
+ static void _unindexRecord(IndexDetails& id, BSONObj& obj, const DiskLoc& dl, bool logMissing = true) {
BSONObjSetDefaultOrder keys;
id.getKeysFromObject(obj, keys);
for ( BSONObjSetDefaultOrder::iterator i=keys.begin(); i != keys.end(); i++ ) {
BSONObj j = *i;
- // out() << "UNINDEX: j:" << j.toString() << " head:" << id.head.toString() << dl.toString() << endl;
if ( otherTraceLevel >= 5 ) {
out() << "_unindexRecord() " << obj.toString();
out() << "\n unindex:" << j.toString() << endl;
@@ -666,9 +730,9 @@ namespace mongo {
try {
ok = id.head.btree()->unindex(id.head, id, j, dl);
}
- catch (AssertionException&) {
+ catch (AssertionException& e) {
problem() << "Assertion failure: _unindex failed " << id.indexNamespace() << endl;
- out() << "Assertion failure: _unindex failed" << '\n';
+ out() << "Assertion failure: _unindex failed: " << e.what() << '\n';
out() << " obj:" << obj.toString() << '\n';
out() << " key:" << j.toString() << '\n';
out() << " dl:" << dl.toString() << endl;
@@ -682,12 +746,14 @@ namespace mongo {
}
/* unindex all keys in all indexes for this record. */
- void unindexRecord(NamespaceDetails *d, Record *todelete, const DiskLoc& dl, bool noWarn = false) {
- if ( d->nIndexes == 0 ) return;
+ static void unindexRecord(NamespaceDetails *d, Record *todelete, const DiskLoc& dl, bool noWarn = false) {
BSONObj obj(todelete);
- NamespaceDetails::IndexIterator i = d->ii();
- while( i.more() ) {
- _unindexRecord(i.next(), obj, dl, !noWarn);
+ int n = d->nIndexes;
+ for ( int i = 0; i < n; i++ )
+ _unindexRecord(d->idx(i), obj, dl, !noWarn);
+ if( d->backgroundIndexBuildInProgress ) {
+ // always pass nowarn here, as this one may be missing for valid reasons as we are concurrently building it
+ _unindexRecord(d->idx(n), obj, dl, false);
}
}
@@ -763,19 +829,20 @@ namespace mongo {
/** Note: if the object shrinks a lot, we don't free up space, we leave extra at end of the record.
*/
- const DiskLoc DataFileMgr::update(const char *ns,
- Record *toupdate, const DiskLoc& dl,
- const char *_buf, int _len, OpDebug& debug)
+ const DiskLoc DataFileMgr::updateRecord(
+ const char *ns,
+ NamespaceDetails *d,
+ NamespaceDetailsTransient *nsdt,
+ Record *toupdate, const DiskLoc& dl,
+ const char *_buf, int _len, OpDebug& debug)
{
StringBuilder& ss = debug.str;
dassert( toupdate == dl.rec() );
- NamespaceDetails *d = nsdetails(ns);
-
BSONObj objOld(toupdate);
BSONObj objNew(_buf);
- assert( objNew.objsize() == _len );
- assert( objNew.objdata() == _buf );
+ DEV assert( objNew.objsize() == _len );
+ DEV assert( objNew.objdata() == _buf );
if( !objNew.hasElement("_id") && objOld.hasElement("_id") ) {
/* add back the old _id value if the update removes it. Note this implementation is slow
@@ -795,7 +862,7 @@ namespace mongo {
*/
vector<IndexChanges> changes;
getIndexChanges(changes, *d, objNew, objOld);
- dupCheck(changes, *d);
+ dupCheck(changes, *d, dl);
if ( toupdate->netLength() < objNew.objsize() ) {
// doesn't fit. reallocate -----------------------------------------------------
@@ -807,13 +874,14 @@ namespace mongo {
return insert(ns, objNew.objdata(), objNew.objsize(), false);
}
- NamespaceDetailsTransient::get_w( ns ).notifyOfWriteOp();
+ nsdt->notifyOfWriteOp();
d->paddingFits();
/* have any index keys changed? */
{
unsigned keyUpdates = 0;
- for ( int x = 0; x < d->nIndexes; x++ ) {
+ int z = d->nIndexesBeingBuilt();
+ for ( int x = 0; x < z; x++ ) {
IndexDetails& idx = d->idx(x);
for ( unsigned i = 0; i < changes[x].removed.size(); i++ ) {
try {
@@ -859,10 +927,8 @@ namespace mongo {
return sz;
}
- int deb=0;
-
- /* add keys to indexes for a new record */
- inline void _indexRecord(NamespaceDetails *d, int idxNo, BSONObj& obj, DiskLoc newRecordLoc, bool dupsAllowed) {
+ /* add keys to index idxNo for a new record */
+ static inline void _indexRecord(NamespaceDetails *d, int idxNo, BSONObj& obj, DiskLoc recordLoc, bool dupsAllowed) {
IndexDetails& idx = d->idx(idxNo);
BSONObjSetDefaultOrder keys;
idx.getKeysFromObject(obj, keys);
@@ -872,12 +938,16 @@ namespace mongo {
if( ++n == 2 ) {
d->setIndexIsMultikey(idxNo);
}
- assert( !newRecordLoc.isNull() );
+ assert( !recordLoc.isNull() );
try {
- idx.head.btree()->bt_insert(idx.head, newRecordLoc,
+ idx.head.btree()->bt_insert(idx.head, recordLoc,
*i, order, dupsAllowed, idx);
}
- catch (AssertionException& ) {
+ catch (AssertionException& e) {
+ if( e.code == 10287 && idxNo == d->nIndexes ) {
+ DEV log() << "info: caught key already in index on bg indexing (ok)" << endl;
+ continue;
+ }
if( !dupsAllowed ) {
// dup key exception, presumably.
throw;
@@ -913,10 +983,10 @@ namespace mongo {
}
// throws DBException
- /* _ TODO dropDups
- */
unsigned long long fastBuildIndex(const char *ns, NamespaceDetails *d, IndexDetails& idx, int idxNo) {
- // testSorting();
+ assert( d->backgroundIndexBuildInProgress == 0 );
+ CurOp * op = cc().curop();
+
Timer t;
log() << "Buildindex " << ns << " idxNo:" << idxNo << ' ' << idx.info.obj().toString() << endl;
@@ -926,13 +996,16 @@ namespace mongo {
BSONObj order = idx.keyPattern();
idx.head.Null();
+
+ if ( logLevel > 1 ) printMemInfo( "before index start" );
/* get and sort all the keys ----- */
unsigned long long n = 0;
auto_ptr<Cursor> c = theDataFileMgr.findAll(ns);
BSONObjExternalSorter sorter(order);
+ sorter.hintNumObjects( d->nrecords );
unsigned long long nkeys = 0;
- ProgressMeter pm( d->nrecords , 10 );
+ ProgressMeter & pm = op->setMessage( "index: (1/3) external sort" , d->nrecords , 10 );
while ( c->ok() ) {
BSONObj o = c->current();
DiskLoc loc = c->currLoc();
@@ -947,12 +1020,20 @@ namespace mongo {
sorter.add(*i, loc);
nkeys++;
}
-
+
c->advance();
n++;
pm.hit();
+ if ( logLevel > 1 && n % 10000 == 0 ){
+ printMemInfo( "\t iterating objects" );
+ }
+
};
+ pm.finished();
+
+ if ( logLevel > 1 ) printMemInfo( "before final sort" );
sorter.sort();
+ if ( logLevel > 1 ) printMemInfo( "after final sort" );
log(t.seconds() > 5 ? 0 : 1) << "\t external sort used : " << sorter.numFiles() << " files " << " in " << t.seconds() << " secs" << endl;
@@ -963,21 +1044,23 @@ namespace mongo {
BtreeBuilder btBuilder(dupsAllowed, idx);
BSONObj keyLast;
auto_ptr<BSONObjExternalSorter::Iterator> i = sorter.iterator();
- ProgressMeter pm2( nkeys , 10 );
+ pm = op->setMessage( "index: (2/3) btree bottom up" , nkeys , 10 );
while( i->more() ) {
RARELY killCurrentOp.checkForInterrupt();
BSONObjExternalSorter::Data d = i->next();
- //cout<<"TEMP SORTER next " << d.first.toString() << endl;
try {
btBuilder.addKey(d.first, d.second);
}
- catch( AssertionException& ) {
+ catch( AssertionException& e ) {
if ( dupsAllowed ){
// unknow exception??
throw;
}
+ if( e.interrupted() )
+ throw;
+
if ( ! dropDups )
throw;
@@ -987,8 +1070,11 @@ namespace mongo {
dupsToDrop.push_back(d.second);
uassert( 10092 , "too may dups on index build with dropDups=true", dupsToDrop.size() < 1000000 );
}
- pm2.hit();
+ pm.hit();
}
+ pm.finished();
+ op->setMessage( "index: (3/3) btree-middle" );
+ log(t.seconds() > 10 ? 0 : 1 ) << "\t done building bottom layer, going to commit" << endl;
btBuilder.commit();
wassert( btBuilder.getn() == nkeys || dropDups );
}
@@ -1001,32 +1087,61 @@ namespace mongo {
return n;
}
- static class BackgroundIndexBuildJobs {
+ class BackgroundIndexBuildJob : public BackgroundOperation {
unsigned long long addExistingToIndex(const char *ns, NamespaceDetails *d, IndexDetails& idx, int idxNo) {
bool dupsAllowed = !idx.unique();
bool dropDups = idx.dropDups();
+ ProgressMeter& progress = cc().curop()->setMessage( "bg index build" , d->nrecords );
+
unsigned long long n = 0;
- auto_ptr<Cursor> c = theDataFileMgr.findAll(ns);
- while ( c->ok() ) {
- BSONObj js = c->current();
+ auto_ptr<ClientCursor> cc;
+ {
+ auto_ptr<Cursor> c = theDataFileMgr.findAll(ns);
+ cc.reset( new ClientCursor(c, ns, false) );
+ }
+ CursorId id = cc->cursorid;
+
+ while ( cc->c->ok() ) {
+ BSONObj js = cc->c->current();
try {
- _indexRecord(d, idxNo, js, c->currLoc(),dupsAllowed);
- c->advance();
+ _indexRecord(d, idxNo, js, cc->c->currLoc(), dupsAllowed);
+ cc->c->advance();
} catch( AssertionException& e ) {
+ if( e.interrupted() )
+ throw;
+
if ( dropDups ) {
- DiskLoc toDelete = c->currLoc();
- c->advance();
+ DiskLoc toDelete = cc->c->currLoc();
+ bool ok = cc->c->advance();
+ cc->updateLocation();
theDataFileMgr.deleteRecord( ns, toDelete.rec(), toDelete, false, true );
+ if( ClientCursor::find(id, false) == 0 ) {
+ cc.release();
+ if( !ok ) {
+ /* we were already at the end. normal. */
+ }
+ else {
+ uasserted(12585, "cursor gone during bg index; dropDups");
+ }
+ break;
+ }
} else {
- _log() << endl;
- log(2) << "addExistingToIndex exception " << e.what() << endl;
+ log() << "background addExistingToIndex exception " << e.what() << endl;
throw;
}
}
n++;
- };
+ progress.hit();
+
+ if ( n % 128 == 0 && !cc->yield() ) {
+ cc.release();
+ uasserted(12584, "cursor gone during bg index");
+ break;
+ }
+ }
+ progress.done();
return n;
}
@@ -1034,72 +1149,76 @@ namespace mongo {
that way on a crash/restart, we don't think we are still building one. */
set<NamespaceDetails*> bgJobsInProgress;
- void prep(NamespaceDetails *d) {
+ void prep(const char *ns, NamespaceDetails *d) {
assertInWriteLock();
- assert( bgJobsInProgress.count(d) == 0 );
bgJobsInProgress.insert(d);
d->backgroundIndexBuildInProgress = 1;
+ d->nIndexes--;
}
-
- public:
- /* Note you cannot even do a foreground index build if a background is in progress,
- as bg build assumes it is the last index in the array!
- */
- void checkInProg(NamespaceDetails *d) {
+ void done(const char *ns, NamespaceDetails *d) {
+ d->nIndexes++;
+ d->backgroundIndexBuildInProgress = 0;
+ NamespaceDetailsTransient::get_w(ns).addedIndex(); // clear query optimizer cache
assertInWriteLock();
- uassert(12580, "already building an index for this namespace in background", bgJobsInProgress.count(d) == 0);
}
-/* todo: clean bg flag on loading of NamespaceDetails */
+ public:
+ BackgroundIndexBuildJob(const char *ns) : BackgroundOperation(ns) { }
unsigned long long go(string ns, NamespaceDetails *d, IndexDetails& idx, int idxNo) {
- unsigned long long n;
- prep(d);
+ unsigned long long n = 0;
+
+ prep(ns.c_str(), d);
+ assert( idxNo == d->nIndexes );
try {
idx.head = BtreeBucket::addBucket(idx);
n = addExistingToIndex(ns.c_str(), d, idx, idxNo);
}
catch(...) {
- assertInWriteLock();
- bgJobsInProgress.erase(d);
- d->backgroundIndexBuildInProgress = 0;
+ if( cc().database() && nsdetails(ns.c_str()) == d ) {
+ assert( idxNo == d->nIndexes );
+ done(ns.c_str(), d);
+ }
+ else {
+ log() << "ERROR: db gone during bg index?" << endl;
+ }
throw;
}
+ assert( idxNo == d->nIndexes );
+ done(ns.c_str(), d);
return n;
}
- } backgroundIndex;
+ };
// throws DBException
- static void buildAnIndex(string ns, NamespaceDetails *d, IndexDetails& idx, int idxNo) {
- log() << "building new index on " << idx.keyPattern() << " for " << ns << "..." << endl;
+ static void buildAnIndex(string ns, NamespaceDetails *d, IndexDetails& idx, int idxNo, bool background) {
+ log() << "building new index on " << idx.keyPattern() << " for " << ns << endl;
Timer t;
unsigned long long n;
- BSONObj info = idx.info.obj();
- bool background = info["background"].trueValue();
- if( background ) {
- log() << "WARNING: background index build not yet implemented" << endl;
+ if( background ) {
+ log(2) << "buildAnIndex: background=true\n";
}
+ assert( !BackgroundOperation::inProgForNs(ns.c_str()) ); // should have been checked earlier, better not be...
if( !background ) {
n = fastBuildIndex(ns.c_str(), d, idx, idxNo);
assert( !idx.head.isNull() );
}
else {
- n = backgroundIndex.go(ns, d, idx, idxNo);
+ BackgroundIndexBuildJob j(ns.c_str());
+ n = j.go(ns, d, idx, idxNo);
}
log() << "done for " << n << " records " << t.millis() / 1000.0 << "secs" << endl;
}
/* add keys to indexes for a new record */
- void indexRecord(NamespaceDetails *d, const void *buf, int len, DiskLoc newRecordLoc) {
- BSONObj obj((const char *)buf);
-
- /*UNIQUE*/
- for ( int i = 0; i < d->nIndexes; i++ ) {
+ static void indexRecord(NamespaceDetails *d, BSONObj obj, DiskLoc loc) {
+ int n = d->nIndexesBeingBuilt();
+ for ( int i = 0; i < n; i++ ) {
try {
bool unique = d->idx(i).unique();
- _indexRecord(d, i, obj, newRecordLoc, /*dupsAllowed*/!unique);
+ _indexRecord(d, i, obj, loc, /*dupsAllowed*/!unique);
}
catch( DBException& ) {
/* try to roll back previously added index entries
@@ -1108,7 +1227,7 @@ namespace mongo {
*/
for( int j = 0; j <= i; j++ ) {
try {
- _unindexRecord(d->idx(j), obj, newRecordLoc, false);
+ _unindexRecord(d->idx(j), obj, loc, false);
}
catch(...) {
log(3) << "unindex fails on rollback after unique failure\n";
@@ -1119,7 +1238,7 @@ namespace mongo {
}
}
- extern BSONObj id_obj; // { _id : ObjectId("000000000000000000000000") }
+ extern BSONObj id_obj; // { _id : 1 }
void ensureHaveIdIndex(const char *ns) {
NamespaceDetails *d = nsdetails(ns);
@@ -1179,12 +1298,31 @@ namespace mongo {
bool prepareToBuildIndex(const BSONObj& io, bool god, string& sourceNS, NamespaceDetails *&sourceCollection);
+ // We are now doing two btree scans for all unique indexes (one here, and one when we've
+ // written the record to the collection. This could be made more efficient inserting
+ // dummy data here, keeping pointers to the btree nodes holding the dummy data and then
+ // updating the dummy data with the DiskLoc of the real record.
+ void checkNoIndexConflicts( NamespaceDetails *d, const BSONObj &obj ) {
+ for ( int idxNo = 0; idxNo < d->nIndexes; idxNo++ ) {
+ if( d->idx(idxNo).unique() ) {
+ IndexDetails& idx = d->idx(idxNo);
+ BSONObjSetDefaultOrder keys;
+ idx.getKeysFromObject(obj, keys);
+ BSONObj order = idx.keyPattern();
+ for ( BSONObjSetDefaultOrder::iterator i=keys.begin(); i != keys.end(); i++ ) {
+ uassert( 12582, "duplicate key insert for unique index of capped collection",
+ idx.head.btree()->findSingle(idx, idx.head, *i ).isNull() );
+ }
+ }
+ }
+ }
+
/* note: if god==true, you may pass in obuf of NULL and then populate the returned DiskLoc
after the call -- that will prevent a double buffer copy in some cases (btree.cpp).
*/
DiskLoc DataFileMgr::insert(const char *ns, const void *obuf, int len, bool god, const BSONElement &writeId, bool mayAddIndex) {
bool wouldAddIndex = false;
- uassert( 10093 , "cannot insert into reserved $ collection", god || strchr(ns, '$') == 0 );
+ massert( 10093 , "cannot insert into reserved $ collection", god || strchr(ns, '$') == 0 );
uassert( 10094 , "invalid ns", strchr( ns , '.' ) > 0 );
const char *sys = strstr(ns, "system.");
if ( sys ) {
@@ -1212,8 +1350,8 @@ namespace mongo {
/* todo: shouldn't be in the namespace catalog until after the allocations here work.
also if this is an addIndex, those checks should happen before this!
*/
- // This creates first file in the database.
- cc().database()->newestFile()->createExtent(ns, initialExtentSize(len));
+ // This may create first file in the database.
+ cc().database()->allocExtent(ns, initialExtentSize(len), false);
d = nsdetails(ns);
if ( !god )
ensureIdIndexForNewNs(ns);
@@ -1225,10 +1363,8 @@ namespace mongo {
string tabletoidxns;
if ( addIndex ) {
BSONObj io((const char *) obuf);
- backgroundIndex.checkInProg(d);
- if( !prepareToBuildIndex(io, god, tabletoidxns, tableToIndex) ) {
+ if( !prepareToBuildIndex(io, god, tabletoidxns, tableToIndex) )
return DiskLoc();
- }
}
const BSONElement *newId = &writeId;
@@ -1262,6 +1398,13 @@ namespace mongo {
d->paddingFactor = 1.0;
lenWHdr = len + Record::HeaderSize;
}
+
+ // If the collection is capped, check if the new object will violate a unique index
+ // constraint before allocating space.
+ if ( d->nIndexes && d->capped && !god ) {
+ checkNoIndexConflicts( d, BSONObj( reinterpret_cast<const char *>( obuf ) ) );
+ }
+
DiskLoc loc = d->alloc(ns, lenWHdr, extentLoc);
if ( loc.isNull() ) {
// out of space
@@ -1321,27 +1464,35 @@ namespace mongo {
NamespaceDetailsTransient::get_w( ns ).notifyOfWriteOp();
if ( tableToIndex ) {
+ BSONObj info = loc.obj();
+ bool background = info["background"].trueValue();
+
int idxNo = tableToIndex->nIndexes;
- IndexDetails& idx = tableToIndex->addIndex(tabletoidxns.c_str()); // clear transient info caches so they refresh; increments nIndexes
+ IndexDetails& idx = tableToIndex->addIndex(tabletoidxns.c_str(), !background); // clear transient info caches so they refresh; increments nIndexes
idx.info = loc;
try {
- buildAnIndex(tabletoidxns, tableToIndex, idx, idxNo);
+ buildAnIndex(tabletoidxns, tableToIndex, idx, idxNo, background);
} catch( DBException& ) {
- // save our error msg string as an exception on deleteIndexes will overwrite our message
+ // save our error msg string as an exception or dropIndexes will overwrite our message
LastError *le = lastError.get();
- assert( le );
- string saveerrmsg = le->msg;
- assert( !saveerrmsg.empty() );
+ int savecode = 0;
+ string saveerrmsg;
+ if ( le ) {
+ savecode = le->code;
+ saveerrmsg = le->msg;
+ }
// roll back this index
string name = idx.indexName();
BSONObjBuilder b;
string errmsg;
- bool ok = deleteIndexes(tableToIndex, tabletoidxns.c_str(), name.c_str(), errmsg, b, true);
+ bool ok = dropIndexes(tableToIndex, tabletoidxns.c_str(), name.c_str(), errmsg, b, true);
if( !ok ) {
log() << "failed to drop index after a unique key error building it: " << errmsg << ' ' << tabletoidxns << ' ' << name << endl;
}
- raiseError(12506,saveerrmsg.c_str());
+
+ assert( le && !saveerrmsg.empty() );
+ raiseError(savecode,saveerrmsg.c_str());
throw;
}
}
@@ -1349,11 +1500,13 @@ namespace mongo {
/* add this record to our indexes */
if ( d->nIndexes ) {
try {
- indexRecord(d, r->data/*buf*/, len, loc);
+ BSONObj obj(r->data);
+ indexRecord(d, obj, loc);
}
catch( AssertionException& e ) {
// should be a dup key error on _id index
- if( tableToIndex || d->capped ) {
+ if( tableToIndex || d->capped ) {
+ massert( 12583, "unexpected index insertion failure on capped collection", !d->capped );
string s = e.toString();
s += " : on addIndex/capped - collection and its index will not match";
uassert_nothrow(s.c_str());
@@ -1406,19 +1559,6 @@ namespace mongo {
return r;
}
- void DataFileMgr::init(const string& path ) {
- /* boost::filesystem::path path( dir );
- path /= "temp.dat";
- string pathString = path.string();
- temp.open(pathString.c_str(), 64 * 1024 * 1024);
- */
- }
-
- void pdfileInit() {
- // namespaceIndex.init(dbpath);
- theDataFileMgr.init(dbpath);
- }
-
} // namespace mongo
#include "clientcursor.h"
@@ -1427,63 +1567,75 @@ namespace mongo {
void dropDatabase(const char *ns) {
// ns is of the form "<dbname>.$cmd"
- char cl[256];
- nsToDatabase(ns, cl);
- log(1) << "dropDatabase " << cl << endl;
- assert( cc().database()->name == cl );
+ char db[256];
+ nsToDatabase(ns, db);
+ log(1) << "dropDatabase " << db << endl;
+ assert( cc().database()->name == db );
+
+ BackgroundOperation::assertNoBgOpInProgForDb(db);
- closeDatabase( cl );
- _deleteDataFiles(cl);
+ closeDatabase( db );
+ _deleteDataFiles(db);
}
typedef boost::filesystem::path Path;
// back up original database files to 'temp' dir
void _renameForBackup( const char *database, const Path &reservedPath ) {
+ Path newPath( reservedPath );
+ if ( directoryperdb )
+ newPath /= database;
class Renamer : public FileOp {
public:
- Renamer( const Path &reservedPath ) : reservedPath_( reservedPath ) {}
+ Renamer( const Path &newPath ) : newPath_( newPath ) {}
private:
- const boost::filesystem::path &reservedPath_;
+ const boost::filesystem::path &newPath_;
virtual bool apply( const Path &p ) {
if ( !boost::filesystem::exists( p ) )
return false;
- boost::filesystem::rename( p, reservedPath_ / ( p.leaf() + ".bak" ) );
+ boost::filesystem::rename( p, newPath_ / ( p.leaf() + ".bak" ) );
return true;
}
virtual const char * op() const {
return "renaming";
}
- } renamer( reservedPath );
+ } renamer( newPath );
_applyOpToDataFiles( database, renamer, true );
}
// move temp files to standard data dir
void _replaceWithRecovered( const char *database, const char *reservedPathString ) {
- class : public FileOp {
+ Path newPath( dbpath );
+ if ( directoryperdb )
+ newPath /= database;
+ class Replacer : public FileOp {
+ public:
+ Replacer( const Path &newPath ) : newPath_( newPath ) {}
+ private:
+ const boost::filesystem::path &newPath_;
virtual bool apply( const Path &p ) {
if ( !boost::filesystem::exists( p ) )
return false;
- boost::filesystem::rename( p, boost::filesystem::path(dbpath) / p.leaf() );
+ boost::filesystem::rename( p, newPath_ / p.leaf() );
return true;
}
virtual const char * op() const {
return "renaming";
}
- } renamer;
- _applyOpToDataFiles( database, renamer, true, reservedPathString );
+ } replacer( newPath );
+ _applyOpToDataFiles( database, replacer, true, reservedPathString );
}
// generate a directory name for storing temp data files
Path uniqueReservedPath( const char *prefix ) {
- Path dbPath = Path( dbpath );
+ Path repairPath = Path( repairpath );
Path reservedPath;
int i = 0;
bool exists = false;
do {
stringstream ss;
ss << prefix << "_repairDatabase_" << i++;
- reservedPath = dbPath / ss.str();
+ reservedPath = repairPath / ss.str();
BOOST_CHECK_EXCEPTION( exists = boost::filesystem::exists( reservedPath ) );
} while ( exists );
return reservedPath;
@@ -1540,6 +1692,8 @@ namespace mongo {
problem() << "repairDatabase " << dbName << endl;
assert( cc().database()->name == dbName );
+ BackgroundOperation::assertNoBgOpInProgForDb(dbName);
+
boost::intmax_t totalSize = dbSize( dbName );
boost::intmax_t freeSize = freeSpace();
if ( freeSize > -1 && freeSize < totalSize ) {
@@ -1553,14 +1707,19 @@ namespace mongo {
Path reservedPath =
uniqueReservedPath( ( preserveClonedFilesOnFailure || backupOriginalFiles ) ?
- "backup" : "tmp" );
+ "backup" : "$tmp" );
BOOST_CHECK_EXCEPTION( boost::filesystem::create_directory( reservedPath ) );
string reservedPathString = reservedPath.native_directory_string();
- assert( setClient( dbName, reservedPathString.c_str() ) );
-
- bool res = cloneFrom(localhost.c_str(), errmsg, dbName,
- /*logForReplication=*/false, /*slaveok*/false, /*replauth*/false, /*snapshot*/false);
- closeDatabase( dbName, reservedPathString.c_str() );
+
+ bool res;
+ { // clone to temp location, which effectively does repair
+ Client::Context ctx( dbName, reservedPathString );
+ assert( ctx.justCreated() );
+
+ res = cloneFrom(localhost.c_str(), errmsg, dbName,
+ /*logForReplication=*/false, /*slaveok*/false, /*replauth*/false, /*snapshot*/false);
+ closeDatabase( dbName, reservedPathString.c_str() );
+ }
if ( !res ) {
problem() << "clone failed for " << dbName << " with error: " << errmsg << endl;
@@ -1569,13 +1728,15 @@ namespace mongo {
return false;
}
- assert( !setClient( dbName ) );
+ Client::Context ctx( dbName );
closeDatabase( dbName );
- if ( backupOriginalFiles )
+ if ( backupOriginalFiles ) {
_renameForBackup( dbName, reservedPath );
- else
+ } else {
_deleteDataFiles( dbName );
+ BOOST_CHECK_EXCEPTION( boost::filesystem::create_directory( Path( dbpath ) / dbName ) );
+ }
_replaceWithRecovered( dbName, reservedPathString.c_str() );
@@ -1591,6 +1752,8 @@ namespace mongo {
string c = database;
c += '.';
boost::filesystem::path p(path);
+ if ( directoryperdb )
+ p /= database;
boost::filesystem::path q;
q = p / (c+"ns");
bool ok = false;
@@ -1619,8 +1782,8 @@ namespace mongo {
NamespaceDetails* nsdetails_notinline(const char *ns) { return nsdetails(ns); }
- bool DatabaseHolder::closeAll( const string& path , BSONObjBuilder& result ){
- log(2) << "DatabaseHolder::closeAll path:" << path << endl;
+ bool DatabaseHolder::closeAll( const string& path , BSONObjBuilder& result , bool force ){
+ log() << "DatabaseHolder::closeAll path:" << path << endl;
dbMutex.assertWriteLocked();
map<string,Database*>& m = _paths[path];
@@ -1633,14 +1796,23 @@ namespace mongo {
BSONObjBuilder bb( result.subarrayStart( "dbs" ) );
int n = 0;
+ int nNotClosed = 0;
for( set< string >::iterator i = dbs.begin(); i != dbs.end(); ++i ) {
string name = *i;
log(2) << "DatabaseHolder::closeAll path:" << path << " name:" << name << endl;
- setClient( name.c_str() , path );
- closeDatabase( name.c_str() , path );
- bb.append( bb.numStr( n++ ).c_str() , name );
+ Client::Context ctx( name , path );
+ if( !force && BackgroundOperation::inProgForDb(name.c_str()) ) {
+ log() << "WARNING: can't close database " << name << " because a bg job is in progress - try killOp command" << endl;
+ nNotClosed++;
+ }
+ else {
+ closeDatabase( name.c_str() , path );
+ bb.append( bb.numStr( n++ ).c_str() , name );
+ }
}
bb.done();
+ if( nNotClosed )
+ result.append("nNotClosed", nNotClosed);
return true;
}