From 582fc32574a3b158c81e49cb00e6ae59205e66ba Mon Sep 17 00:00:00 2001 From: Antonin Kral Date: Thu, 17 Mar 2011 00:05:43 +0100 Subject: Imported Upstream version 1.8.0 --- db/namespace.cpp | 398 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 236 insertions(+), 162 deletions(-) (limited to 'db/namespace.cpp') diff --git a/db/namespace.cpp b/db/namespace.cpp index 8a1ab6f..fcdaee2 100644 --- a/db/namespace.cpp +++ b/db/namespace.cpp @@ -19,7 +19,7 @@ #include "pch.h" #include "pdfile.h" #include "db.h" -#include "../util/mmap.h" +#include "mongommf.h" #include "../util/hashtab.h" #include "../scripting/engine.h" #include "btree.h" @@ -31,6 +31,8 @@ namespace mongo { + BOOST_STATIC_ASSERT( sizeof(Namespace) == 128 ); + BSONObj idKeyPattern = fromjson("{\"_id\":1}"); /* deleted lists -- linked lists of deleted records -- are placed in 'buckets' of various sizes @@ -45,7 +47,7 @@ namespace mongo { NamespaceDetails::NamespaceDetails( const DiskLoc &loc, bool _capped ) { /* be sure to initialize new fields here -- doesn't default to zeroes the way we use it */ firstExtent = lastExtent = capExtent = loc; - datasize = nrecords = 0; + stats.datasize = stats.nrecords = 0; lastExtentSize = 0; nIndexes = 0; capped = _capped; @@ -58,20 +60,23 @@ namespace mongo { // For capped case, signal that we are doing initial extent allocation. if ( capped ) cappedLastDelRecLastExtent().setInvalid(); - assert( sizeof(dataFileVersion) == 2 ); - dataFileVersion = 0; - indexFileVersion = 0; + assert( sizeof(dataFileVersion) == 2 ); + dataFileVersion = 0; + indexFileVersion = 0; multiKeyIndexBits = 0; reservedA = 0; extraOffset = 0; - backgroundIndexBuildInProgress = 0; + indexBuildInProgress = 0; + reservedB = 0; + capped2.cc2_ptr = 0; + capped2.fileNumber = 0; memset(reserved, 0, sizeof(reserved)); } bool NamespaceIndex::exists() const { return !MMF::exists(path()); } - + boost::filesystem::path NamespaceIndex::path() const { boost::filesystem::path ret( dir_ ); if ( directoryperdb ) @@ -88,23 +93,56 @@ namespace mongo { if ( !boost::filesystem::exists( dir ) ) BOOST_CHECK_EXCEPTION( boost::filesystem::create_directory( dir ) ); } - - int lenForNewNsFiles = 16 * 1024 * 1024; - - void NamespaceDetails::onLoad(const Namespace& k) { - if( k.isExtra() ) { + + unsigned lenForNewNsFiles = 16 * 1024 * 1024; + +#if defined(_DEBUG) + void NamespaceDetails::dump(const Namespace& k) { + if( !cmdLine.dur ) + cout << "ns offsets which follow will not display correctly with --dur disabled" << endl; + + size_t ofs = 1; // 1 is sentinel that the find call below failed + privateViews.find(this, /*out*/ofs); + + cout << "ns" << hex << setw(8) << ofs << ' '; + cout << k.toString() << '\n'; + + if( k.isExtra() ) { + cout << "ns\t extra" << endl; + return; + } + + cout << "ns " << firstExtent.toString() << ' ' << lastExtent.toString() << " nidx:" << nIndexes << '\n'; + cout << "ns " << stats.datasize << ' ' << stats.nrecords << ' ' << nIndexes << '\n'; + cout << "ns " << capped << ' ' << paddingFactor << ' ' << flags << ' ' << dataFileVersion << '\n'; + cout << "ns " << multiKeyIndexBits << ' ' << indexBuildInProgress << '\n'; + cout << "ns " << (int) reserved[0] << ' ' << (int) reserved[59]; + cout << endl; + } +#endif + + void NamespaceDetails::onLoad(const Namespace& k) { + //dump(k); + + if( k.isExtra() ) { /* overflow storage for indexes - so don't treat as a NamespaceDetails object. */ return; } - assertInWriteLock(); - if( backgroundIndexBuildInProgress ) { - log() << "backgroundIndexBuildInProgress was " << backgroundIndexBuildInProgress << " for " << k << ", indicating an abnormal db shutdown" << endl; - backgroundIndexBuildInProgress = 0; + DEV assertInWriteLock(); + + if( indexBuildInProgress || capped2.cc2_ptr ) { + assertInWriteLock(); + if( indexBuildInProgress ) { + log() << "indexBuildInProgress was " << indexBuildInProgress << " for " << k << ", indicating an abnormal db shutdown" << endl; + getDur().writingInt( indexBuildInProgress ) = 0; + } + if( capped2.cc2_ptr ) + *getDur().writing(&capped2.cc2_ptr) = 0; } } - static void namespaceOnLoadCallback(const Namespace& k, NamespaceDetails& v) { + static void namespaceOnLoadCallback(const Namespace& k, NamespaceDetails& v) { v.onLoad(k); } @@ -117,105 +155,113 @@ namespace mongo { we need to be sure to clear any cached info for the database in local.*. */ - /* + /* if ( "local" != database_ ) { DBInfo i(database_.c_str()); i.dbDropped(); } - */ - int len = -1; + */ + + unsigned long long len = 0; boost::filesystem::path nsPath = path(); string pathString = nsPath.string(); - MMF::Pointer p; - if( MMF::exists(nsPath) ) { - p = f.map(pathString.c_str()); - if( !p.isNull() ) { + void *p = 0; + if( MMF::exists(nsPath) ) { + if( f.open(pathString, true) ) { len = f.length(); - if ( len % (1024*1024) != 0 ){ + if ( len % (1024*1024) != 0 ) { log() << "bad .ns file: " << pathString << endl; uassert( 10079 , "bad .ns file length, cannot open database", len % (1024*1024) == 0 ); } + p = f.getView(); } - } - else { - // use lenForNewNsFiles, we are making a new database - massert( 10343 , "bad lenForNewNsFiles", lenForNewNsFiles >= 1024*1024 ); + } + else { + // use lenForNewNsFiles, we are making a new database + massert( 10343, "bad lenForNewNsFiles", lenForNewNsFiles >= 1024*1024 ); maybeMkdir(); - long l = lenForNewNsFiles; - p = f.map(pathString.c_str(), l); - if( !p.isNull() ) { - len = (int) l; + unsigned long long l = lenForNewNsFiles; + if( f.create(pathString, l, true) ) { + getDur().createdFile(pathString, l); // always a new file + len = l; assert( len == lenForNewNsFiles ); + p = f.getView(); } - } + } - if ( p.isNull() ) { - problem() << "couldn't open file " << pathString << " terminating" << endl; + if ( p == 0 ) { + /** TODO: this shouldn't terminate? */ + log() << "error couldn't open file " << pathString << " terminating" << endl; dbexit( EXIT_FS ); } - ht = new HashTable(p, len, "namespace index"); + + assert( len <= 0x7fffffff ); + ht = new HashTable(p, (int) len, "namespace index"); if( checkNsFilesOnLoad ) ht->iterAll(namespaceOnLoadCallback); } - + static void namespaceGetNamespacesCallback( const Namespace& k , NamespaceDetails& v , void * extra ) { list * l = (list*)extra; if ( ! k.hasDollarSign() ) l->push_back( (string)k ); } - void NamespaceIndex::getNamespaces( list& tofill , bool onlyCollections ) const { assert( onlyCollections ); // TODO: need to implement this // need boost::bind or something to make this less ugly - + if ( ht ) ht->iterAll( namespaceGetNamespacesCallback , (void*)&tofill ); } void NamespaceDetails::addDeletedRec(DeletedRecord *d, DiskLoc dloc) { - BOOST_STATIC_ASSERT( sizeof(NamespaceDetails::Extra) <= sizeof(NamespaceDetails) ); + BOOST_STATIC_ASSERT( sizeof(NamespaceDetails::Extra) <= sizeof(NamespaceDetails) ); + { + Record *r = (Record *) getDur().writingPtr(d, sizeof(Record)); + d = &r->asDeleted(); // defensive code: try to make us notice if we reference a deleted record - (unsigned&) (((Record *) d)->data) = 0xeeeeeeee; + (unsigned&) (r->data) = 0xeeeeeeee; } - dassert( dloc.drec() == d ); - DEBUGGING out() << "TEMP: add deleted rec " << dloc.toString() << ' ' << hex << d->extentOfs << endl; + DEBUGGING log() << "TEMP: add deleted rec " << dloc.toString() << ' ' << hex << d->extentOfs << endl; if ( capped ) { if ( !cappedLastDelRecLastExtent().isValid() ) { // Initial extent allocation. Insert at end. d->nextDeleted = DiskLoc(); if ( cappedListOfAllDeletedRecords().isNull() ) - cappedListOfAllDeletedRecords() = dloc; + getDur().writingDiskLoc( cappedListOfAllDeletedRecords() ) = dloc; else { DiskLoc i = cappedListOfAllDeletedRecords(); - for (; !i.drec()->nextDeleted.isNull(); i = i.drec()->nextDeleted ); - i.drec()->nextDeleted = dloc; + for (; !i.drec()->nextDeleted.isNull(); i = i.drec()->nextDeleted ) + ; + i.drec()->nextDeleted.writing() = dloc; } - } else { + } + else { d->nextDeleted = cappedFirstDeletedInCurExtent(); - cappedFirstDeletedInCurExtent() = dloc; + getDur().writingDiskLoc( cappedFirstDeletedInCurExtent() ) = dloc; // always compact() after this so order doesn't matter } - } else { + } + else { int b = bucket(d->lengthWithHeaders); DiskLoc& list = deletedList[b]; DiskLoc oldHead = list; - list = dloc; + getDur().writingDiskLoc(list) = dloc; d->nextDeleted = oldHead; } } - /* - lenToAlloc is WITH header - */ + // lenToAlloc is WITH header DiskLoc NamespaceDetails::alloc(const char *ns, int lenToAlloc, DiskLoc& extentLoc) { lenToAlloc = (lenToAlloc + 3) & 0xfffffffc; DiskLoc loc = _alloc(ns, lenToAlloc); if ( loc.isNull() ) return loc; - DeletedRecord *r = loc.drec(); + const DeletedRecord *r = loc.drec(); + //r = getDur().writing(r); /* note we want to grab from the front so our next pointers on disk tend to go in a forward direction which is important for performance. */ @@ -229,20 +275,21 @@ namespace mongo { if ( capped == 0 ) { if ( left < 24 || left < (lenToAlloc >> 3) ) { // you get the whole thing. - DataFileMgr::grow(loc, regionlen); + //DataFileMgr::grow(loc, regionlen); return loc; } } /* split off some for further use. */ - r->lengthWithHeaders = lenToAlloc; - DataFileMgr::grow(loc, lenToAlloc); + getDur().writingInt(r->lengthWithHeaders) = lenToAlloc; + //DataFileMgr::grow(loc, lenToAlloc); DiskLoc newDelLoc = loc; newDelLoc.inc(lenToAlloc); DeletedRecord *newDel = DataFileMgr::makeDeletedRecord(newDelLoc, left); - newDel->extentOfs = r->extentOfs; - newDel->lengthWithHeaders = left; - newDel->nextDeleted.Null(); + DeletedRecord *newDelW = getDur().writing(newDel); + newDelW->extentOfs = r->extentOfs; + newDelW->lengthWithHeaders = left; + newDelW->nextDeleted.Null(); addDeletedRec(newDel, newDelLoc); @@ -267,7 +314,7 @@ namespace mongo { int a = cur.a(); if ( a < -1 || a >= 100000 ) { problem() << "~~ Assertion - cur out of range in _alloc() " << cur.toString() << - " a:" << a << " b:" << b << " chain:" << chain << '\n'; + " a:" << a << " b:" << b << " chain:" << chain << '\n'; sayDbContext(); if ( cur == *prev ) prev->Null(); @@ -303,7 +350,7 @@ namespace mongo { cur.Null(); } else { - /*this defensive check only made sense for the mmap storage engine: + /*this defensive check only made sense for the mmap storage engine: if ( r->nextDeleted.getOfs() == 0 ) { problem() << "~~ Assertion - bad nextDeleted " << r->nextDeleted.toString() << " b:" << b << " chain:" << chain << ", fixing.\n"; @@ -316,9 +363,9 @@ namespace mongo { /* unlink ourself from the deleted list */ { - DeletedRecord *bmr = bestmatch.drec(); - *bestprev = bmr->nextDeleted; - bmr->nextDeleted.setInvalid(); // defensive. + const DeletedRecord *bmr = bestmatch.drec(); + *getDur().writing(bestprev) = bmr->nextDeleted; + bmr->nextDeleted.writing().setInvalid(); // defensive. assert(bmr->extentOfs < bestmatch.getOfs()); } @@ -371,9 +418,9 @@ namespace mongo { if ( e == capExtent ) out() << " (capExtent)"; out() << '\n'; - out() << " magic: " << hex << e.ext()->magic << dec << " extent->ns: " << e.ext()->nsDiagnostic.buf << '\n'; + out() << " magic: " << hex << e.ext()->magic << dec << " extent->ns: " << e.ext()->nsDiagnostic.toString() << '\n'; out() << " fr: " << e.ext()->firstRecord.toString() << - " lr: " << e.ext()->lastRecord.toString() << " extent->len: " << e.ext()->length << '\n'; + " lr: " << e.ext()->lastRecord.toString() << " extent->len: " << e.ext()->length << '\n'; } assert( len * 5 > lastExtentSize ); // assume it is unusually large record; if not, something is broken } @@ -387,12 +434,27 @@ namespace mongo { return cappedAlloc(ns,len); } + void NamespaceIndex::kill_ns(const char *ns) { + if ( !ht ) + return; + Namespace n(ns); + ht->kill(n); + + for( int i = 0; i<=1; i++ ) { + try { + Namespace extra(n.extraName(i).c_str()); + ht->kill(extra); + } + catch(DBException&) { } + } + } + /* extra space for indexes when more than 10 */ NamespaceDetails::Extra* NamespaceIndex::newExtra(const char *ns, int i, NamespaceDetails *d) { assert( i >= 0 && i <= 1 ); Namespace n(ns); Namespace extra(n.extraName(i).c_str()); // throws userexception if ns name too long - + massert( 10350 , "allocExtra: base ns missing?", d ); massert( 10351 , "allocExtra: extra already exists", ht->get(extra) == 0 ); @@ -409,10 +471,10 @@ namespace mongo { long ofs = e->ofsFrom(this); if( i == 0 ) { assert( extraOffset == 0 ); - extraOffset = ofs; + *getDur().writing(&extraOffset) = ofs; assert( extra() == e ); } - else { + else { Extra *hd = extra(); assert( hd->next(this) == 0 ); hd->setNext(ofs); @@ -422,25 +484,23 @@ namespace mongo { /* you MUST call when adding an index. see pdfile.cpp */ IndexDetails& NamespaceDetails::addIndex(const char *thisns, bool resetTransient) { - assert( nsdetails(thisns) == this ); - IndexDetails *id; try { id = &idx(nIndexes,true); } - catch(DBException&) { + catch(DBException&) { allocExtra(thisns, nIndexes); id = &idx(nIndexes,false); } - nIndexes++; + (*getDur().writing(&nIndexes))++; if ( resetTransient ) NamespaceDetailsTransient::get_w(thisns).addedIndex(); return *id; } // must be called when renaming a NS to fix up extra - void NamespaceDetails::copyingFrom(const char *thisns, NamespaceDetails *src) { + void NamespaceDetails::copyingFrom(const char *thisns, NamespaceDetails *src) { extraOffset = 0; // we are a copy -- the old value is wrong. fixing it up below. Extra *se = src->extra(); int n = NIndexesBase; @@ -454,7 +514,7 @@ namespace mongo { Extra *nxt = allocExtra(thisns, n); e->setNext( nxt->ofsFrom(this) ); e = nxt; - } + } assert( extraOffset ); } } @@ -473,25 +533,39 @@ namespace mongo { }*/ return -1; } - - long long NamespaceDetails::storageSize( int * numExtents ){ + + long long NamespaceDetails::storageSize( int * numExtents , BSONArrayBuilder * extentInfo ) const { Extent * e = firstExtent.ext(); assert( e ); - + long long total = 0; int n = 0; - while ( e ){ + while ( e ) { total += e->length; - e = e->getNextExtent(); n++; + + if ( extentInfo ) { + extentInfo->append( BSON( "len" << e->length << "loc: " << e->myLoc.toBSONObj() ) ); + } + + e = e->getNextExtent(); } - + if ( numExtents ) *numExtents = n; - + return total; } - + + NamespaceDetails *NamespaceDetails::writingWithExtra() { + vector< pair< long long, unsigned > > writeRanges; + writeRanges.push_back( make_pair( 0, sizeof( NamespaceDetails ) ) ); + for( Extra *e = extra(); e; e = e->next( this ) ) { + writeRanges.push_back( make_pair( (char*)e - (char*)this, sizeof( Extra ) ) ); + } + return reinterpret_cast< NamespaceDetails* >( getDur().writingRangesAtOffsets( this, writeRanges ) ); + } + /* ------------------------------------------------------------------------- */ mongo::mutex NamespaceDetailsTransient::_qcMutex("qc"); @@ -505,14 +579,14 @@ namespace mongo { _keysComputed = false; _indexSpecs.clear(); } - -/* NamespaceDetailsTransient& NamespaceDetailsTransient::get(const char *ns) { - shared_ptr< NamespaceDetailsTransient > &t = map_[ ns ]; - if ( t.get() == 0 ) - t.reset( new NamespaceDetailsTransient(ns) ); - return *t; - } -*/ + + /* NamespaceDetailsTransient& NamespaceDetailsTransient::get(const char *ns) { + shared_ptr< NamespaceDetailsTransient > &t = map_[ ns ]; + if ( t.get() == 0 ) + t.reset( new NamespaceDetailsTransient(ns) ); + return *t; + } + */ void NamespaceDetailsTransient::clearForPrefix(const char *prefix) { assertInWriteLock(); vector< string > found; @@ -523,7 +597,7 @@ namespace mongo { _map[ *i ].reset(); } } - + void NamespaceDetailsTransient::computeIndexKeys() { _keysComputed = true; _indexKeys.clear(); @@ -565,92 +639,92 @@ namespace mongo { void renameNamespace( const char *from, const char *to ) { NamespaceIndex *ni = nsindex( from ); - assert( ni ); + assert( ni ); assert( ni->details( from ) ); assert( ! ni->details( to ) ); - - // Our namespace and index details will move to a different - // memory location. The only references to namespace and - // index details across commands are in cursors and nsd - // transient (including query cache) so clear these. - ClientCursor::invalidate( from ); - NamespaceDetailsTransient::clearForPrefix( from ); - - NamespaceDetails *details = ni->details( from ); - ni->add_ns( to, *details ); + + // Our namespace and index details will move to a different + // memory location. The only references to namespace and + // index details across commands are in cursors and nsd + // transient (including query cache) so clear these. + ClientCursor::invalidate( from ); + NamespaceDetailsTransient::clearForPrefix( from ); + + NamespaceDetails *details = ni->details( from ); + ni->add_ns( to, *details ); NamespaceDetails *todetails = ni->details( to ); - try { + try { todetails->copyingFrom(to, details); // fixes extraOffset } - catch( DBException& ) { + catch( DBException& ) { // could end up here if .ns is full - if so try to clean up / roll back a little ni->kill_ns(to); throw; } - ni->kill_ns( from ); - details = todetails; - - BSONObj oldSpec; - char database[MaxDatabaseLen]; - nsToDatabase(from, database); - string s = database; - s += ".system.namespaces"; - assert( Helpers::findOne( s.c_str(), BSON( "name" << from ), oldSpec ) ); - - BSONObjBuilder newSpecB; - BSONObjIterator i( oldSpec.getObjectField( "options" ) ); - while( i.more() ) { - BSONElement e = i.next(); - if ( strcmp( e.fieldName(), "create" ) != 0 ) - newSpecB.append( e ); - else - newSpecB << "create" << to; - } - BSONObj newSpec = newSpecB.done(); - addNewNamespaceToCatalog( to, newSpec.isEmpty() ? 0 : &newSpec ); - - deleteObjects( s.c_str(), BSON( "name" << from ), false, false, true ); - // oldSpec variable no longer valid memory - - BSONObj oldIndexSpec; - s = database; - s += ".system.indexes"; - while( Helpers::findOne( s.c_str(), BSON( "ns" << from ), oldIndexSpec ) ) { - BSONObjBuilder newIndexSpecB; - BSONObjIterator i( oldIndexSpec ); - while( i.more() ) { - BSONElement e = i.next(); - if ( strcmp( e.fieldName(), "ns" ) != 0 ) - newIndexSpecB.append( e ); - else - newIndexSpecB << "ns" << to; - } - BSONObj newIndexSpec = newIndexSpecB.done(); - DiskLoc newIndexSpecLoc = theDataFileMgr.insert( s.c_str(), newIndexSpec.objdata(), newIndexSpec.objsize(), true, BSONElement(), false ); - int indexI = details->findIndexByName( oldIndexSpec.getStringField( "name" ) ); - IndexDetails &indexDetails = details->idx(indexI); - string oldIndexNs = indexDetails.indexNamespace(); - indexDetails.info = newIndexSpecLoc; - string newIndexNs = indexDetails.indexNamespace(); - - BtreeBucket::renameIndexNamespace( oldIndexNs.c_str(), newIndexNs.c_str() ); - deleteObjects( s.c_str(), oldIndexSpec.getOwned(), true, false, true ); - } - } - - bool legalClientSystemNS( const string& ns , bool write ){ + ni->kill_ns( from ); + details = todetails; + + BSONObj oldSpec; + char database[MaxDatabaseNameLen]; + nsToDatabase(from, database); + string s = database; + s += ".system.namespaces"; + assert( Helpers::findOne( s.c_str(), BSON( "name" << from ), oldSpec ) ); + + BSONObjBuilder newSpecB; + BSONObjIterator i( oldSpec.getObjectField( "options" ) ); + while( i.more() ) { + BSONElement e = i.next(); + if ( strcmp( e.fieldName(), "create" ) != 0 ) + newSpecB.append( e ); + else + newSpecB << "create" << to; + } + BSONObj newSpec = newSpecB.done(); + addNewNamespaceToCatalog( to, newSpec.isEmpty() ? 0 : &newSpec ); + + deleteObjects( s.c_str(), BSON( "name" << from ), false, false, true ); + // oldSpec variable no longer valid memory + + BSONObj oldIndexSpec; + s = database; + s += ".system.indexes"; + while( Helpers::findOne( s.c_str(), BSON( "ns" << from ), oldIndexSpec ) ) { + BSONObjBuilder newIndexSpecB; + BSONObjIterator i( oldIndexSpec ); + while( i.more() ) { + BSONElement e = i.next(); + if ( strcmp( e.fieldName(), "ns" ) != 0 ) + newIndexSpecB.append( e ); + else + newIndexSpecB << "ns" << to; + } + BSONObj newIndexSpec = newIndexSpecB.done(); + DiskLoc newIndexSpecLoc = theDataFileMgr.insert( s.c_str(), newIndexSpec.objdata(), newIndexSpec.objsize(), true, BSONElement(), false ); + int indexI = details->findIndexByName( oldIndexSpec.getStringField( "name" ) ); + IndexDetails &indexDetails = details->idx(indexI); + string oldIndexNs = indexDetails.indexNamespace(); + indexDetails.info = newIndexSpecLoc; + string newIndexNs = indexDetails.indexNamespace(); + + BtreeBucket::renameIndexNamespace( oldIndexNs.c_str(), newIndexNs.c_str() ); + deleteObjects( s.c_str(), oldIndexSpec.getOwned(), true, false, true ); + } + } + + bool legalClientSystemNS( const string& ns , bool write ) { if( ns == "local.system.replset" ) return true; if ( ns.find( ".system.users" ) != string::npos ) return true; - if ( ns.find( ".system.js" ) != string::npos ){ + if ( ns.find( ".system.js" ) != string::npos ) { if ( write ) Scope::storedFuncMod(); return true; } - + return false; } - + } // namespace mongo -- cgit v1.2.3