diff options
Diffstat (limited to 'db/cap.cpp')
-rw-r--r-- | db/cap.cpp | 239 |
1 files changed, 149 insertions, 90 deletions
@@ -1,4 +1,5 @@ -// @file cap.cpp capped collection related +// @file cap.cpp capped collection related +// the "old" version (<= v1.6) /** * Copyright (C) 2008 10gen Inc. @@ -49,7 +50,7 @@ namespace mongo { /* combine adjacent deleted records *for the current extent* of the capped collection - + this is O(n^2) but we call it for capped tables where typically n==1 or 2! (or 3...there will be a little unused sliver at the end of the extent.) */ @@ -62,7 +63,8 @@ namespace mongo { DiskLoc i = cappedFirstDeletedInCurExtent(); for (; !i.isNull() && inCapExtent( i ); i = i.drec()->nextDeleted ) drecs.push_back( i ); - cappedFirstDeletedInCurExtent() = i; + + getDur().writingDiskLoc( cappedFirstDeletedInCurExtent() ) = i; // This is the O(n^2) part. drecs.sort(); @@ -80,7 +82,7 @@ namespace mongo { DiskLoc b = *j; while ( a.a() == b.a() && a.getOfs() + a.drec()->lengthWithHeaders == b.getOfs() ) { // a & b are adjacent. merge. - a.drec()->lengthWithHeaders += b.drec()->lengthWithHeaders; + getDur().writingInt( a.drec()->lengthWithHeaders ) += b.drec()->lengthWithHeaders; j++; if ( j == drecs.end() ) { DEBUGGING out() << "temp: compact adddelrec2\n"; @@ -106,8 +108,8 @@ namespace mongo { // migrate old NamespaceDetails format assert( capped ); if ( capExtent.a() == 0 && capExtent.getOfs() == 0 ) { - capFirstNewRecord = DiskLoc(); - capFirstNewRecord.setInvalid(); + //capFirstNewRecord = DiskLoc(); + capFirstNewRecord.writing().setInvalid(); // put all the DeletedRecords in cappedListOfAllDeletedRecords() for ( int i = 1; i < Buckets; ++i ) { DiskLoc first = deletedList[ i ]; @@ -115,14 +117,14 @@ namespace mongo { continue; DiskLoc last = first; for (; !last.drec()->nextDeleted.isNull(); last = last.drec()->nextDeleted ); - last.drec()->nextDeleted = cappedListOfAllDeletedRecords(); - cappedListOfAllDeletedRecords() = first; - deletedList[ i ] = DiskLoc(); + last.drec()->nextDeleted.writing() = cappedListOfAllDeletedRecords(); + cappedListOfAllDeletedRecords().writing() = first; + deletedList[i].writing() = DiskLoc(); } // NOTE cappedLastDelRecLastExtent() set to DiskLoc() in above // Last, in case we're killed before getting here - capExtent = firstExtent; + capExtent.writing() = firstExtent; } } @@ -144,20 +146,20 @@ namespace mongo { // We want cappedLastDelRecLastExtent() to be the last DeletedRecord of the prev cap extent // (or DiskLoc() if new capExtent == firstExtent) if ( capExtent == lastExtent ) - cappedLastDelRecLastExtent() = DiskLoc(); + getDur().writingDiskLoc( cappedLastDelRecLastExtent() ) = DiskLoc(); else { DiskLoc i = cappedFirstDeletedInCurExtent(); for (; !i.isNull() && nextIsInCapExtent( i ); i = i.drec()->nextDeleted ); - cappedLastDelRecLastExtent() = i; + getDur().writingDiskLoc( cappedLastDelRecLastExtent() ) = i; } - capExtent = theCapExtent()->xnext.isNull() ? firstExtent : theCapExtent()->xnext; + getDur().writingDiskLoc( capExtent ) = theCapExtent()->xnext.isNull() ? firstExtent : theCapExtent()->xnext; /* this isn't true if a collection has been renamed...that is ok just used for diagnostics */ //dassert( theCapExtent()->ns == ns ); theCapExtent()->assertOk(); - capFirstNewRecord = DiskLoc(); + getDur().writingDiskLoc( capFirstNewRecord ) = DiskLoc(); } DiskLoc NamespaceDetails::__capAlloc( int len ) { @@ -176,25 +178,25 @@ namespace mongo { /* unlink ourself from the deleted list */ if ( !ret.isNull() ) { if ( prev.isNull() ) - cappedListOfAllDeletedRecords() = ret.drec()->nextDeleted; + cappedListOfAllDeletedRecords().writing() = ret.drec()->nextDeleted; else - prev.drec()->nextDeleted = ret.drec()->nextDeleted; - ret.drec()->nextDeleted.setInvalid(); // defensive. + prev.drec()->nextDeleted.writing() = ret.drec()->nextDeleted; + ret.drec()->nextDeleted.writing().setInvalid(); // defensive. assert( ret.drec()->extentOfs < ret.getOfs() ); } return ret; } - DiskLoc NamespaceDetails::cappedAlloc(const char *ns, int len) { + DiskLoc NamespaceDetails::cappedAlloc(const char *ns, int len) { // signal done allocating new extents. if ( !cappedLastDelRecLastExtent().isValid() ) - cappedLastDelRecLastExtent() = DiskLoc(); - + getDur().writingDiskLoc( cappedLastDelRecLastExtent() ) = DiskLoc(); + assert( len < 400000000 ); int passes = 0; int maxPasses = ( len / 30 ) + 2; // 30 is about the smallest entry that could go in the oplog - if ( maxPasses < 5000 ){ + if ( maxPasses < 5000 ) { // this is for bacwards safety since 5000 was the old value maxPasses = 5000; } @@ -208,7 +210,7 @@ namespace mongo { theCapExtent()->assertOk(); DiskLoc firstEmptyExtent; while ( 1 ) { - if ( nrecords < max ) { + if ( stats.nrecords < max ) { loc = __capAlloc( len ); if ( !loc.isNull() ) break; @@ -217,8 +219,9 @@ namespace mongo { // If on first iteration through extents, don't delete anything. if ( !capFirstNewRecord.isValid() ) { advanceCapExtent( ns ); + if ( capExtent != firstExtent ) - capFirstNewRecord.setInvalid(); + capFirstNewRecord.writing().setInvalid(); // else signal done with first iteration through extents. continue; } @@ -247,14 +250,14 @@ namespace mongo { compact(); if( ++passes > maxPasses ) { log() << "passes ns:" << ns << " len:" << len << " maxPasses: " << maxPasses << '\n'; - log() << "passes max:" << max << " nrecords:" << nrecords << " datasize: " << datasize << endl; + log() << "passes max:" << max << " nrecords:" << stats.nrecords << " datasize: " << stats.datasize << endl; massert( 10345 , "passes >= maxPasses in capped collection alloc", false ); } } // Remember first record allocated on this iteration through capExtent. if ( capFirstNewRecord.isValid() && capFirstNewRecord.isNull() ) - capFirstNewRecord = loc; + getDur().writingDiskLoc(capFirstNewRecord) = loc; return loc; } @@ -269,123 +272,179 @@ namespace mongo { } } - void NamespaceDetails::cappedDumpDelInfo() { + void NamespaceDetails::cappedDumpDelInfo() { cout << "dl[0]: " << deletedList[0].toString() << endl; - for( DiskLoc z = deletedList[0]; !z.isNull(); z = z.drec()->nextDeleted ) { - cout << " drec:" << z.toString() << " dreclen:" << hex << z.drec()->lengthWithHeaders << - " ext:" << z.drec()->myExtent(z)->myLoc.toString() << endl; + for( DiskLoc z = deletedList[0]; !z.isNull(); z = z.drec()->nextDeleted ) { + cout << " drec:" << z.toString() << " dreclen:" << hex << z.drec()->lengthWithHeaders << + " ext:" << z.drec()->myExtent(z)->myLoc.toString() << endl; } cout << "dl[1]: " << deletedList[1].toString() << endl; } - /* everything from end on, eliminate from the capped collection. - @param inclusive if true, deletes end (i.e. closed or open range) - */ + void NamespaceDetails::cappedTruncateLastDelUpdate() { + if ( capExtent == firstExtent ) { + // Only one extent of the collection is in use, so there + // is no deleted record in a previous extent, so nullify + // cappedLastDelRecLastExtent(). + cappedLastDelRecLastExtent().writing() = DiskLoc(); + } + else { + // Scan through all deleted records in the collection + // until the last deleted record for the extent prior + // to the new capExtent is found. Then set + // cappedLastDelRecLastExtent() to that deleted record. + DiskLoc i = cappedListOfAllDeletedRecords(); + for( ; + !i.drec()->nextDeleted.isNull() && + !inCapExtent( i.drec()->nextDeleted ); + i = i.drec()->nextDeleted ); + // In our capped storage model, every extent must have at least one + // deleted record. Here we check that 'i' is not the last deleted + // record. (We expect that there will be deleted records in the new + // capExtent as well.) + assert( !i.drec()->nextDeleted.isNull() ); + cappedLastDelRecLastExtent().writing() = i; + } + } + void NamespaceDetails::cappedTruncateAfter(const char *ns, DiskLoc end, bool inclusive) { DEV assert( this == nsdetails(ns) ); assert( cappedLastDelRecLastExtent().isValid() ); - + + // We iteratively remove the newest document until the newest document + // is 'end', then we remove 'end' if requested. bool foundLast = false; while( 1 ) { if ( foundLast ) { + // 'end' has been found and removed, so break. break; } + // 'curr' will point to the newest document in the collection. DiskLoc curr = theCapExtent()->lastRecord; assert( !curr.isNull() ); if ( curr == end ) { if ( inclusive ) { + // 'end' has been found, so break next iteration. foundLast = true; - } else { + } + else { + // 'end' has been found, so break. break; } } - - uassert( 13415, "emptying the collection is not allowed", nrecords > 1 ); - + + // TODO The algorithm used in this function cannot generate an + // empty collection, but we could call emptyCappedCollection() in + // this case instead of asserting. + uassert( 13415, "emptying the collection is not allowed", stats.nrecords > 1 ); + + // Delete the newest record, and coalesce the new deleted + // record with existing deleted records. + theDataFileMgr.deleteRecord(ns, curr.rec(), curr, true); + compact(); + + // This is the case where we have not yet had to remove any + // documents to make room for other documents, and we are allocating + // documents from free space in fresh extents instead of reusing + // space from familiar extents. if ( !capLooped() ) { - theDataFileMgr.deleteRecord(ns, curr.rec(), curr, true); - compact(); + + // We just removed the last record from the 'capExtent', and + // the 'capExtent' can't be empty, so we set 'capExtent' to + // capExtent's prev extent. if ( theCapExtent()->lastRecord.isNull() ) { assert( !theCapExtent()->xprev.isNull() ); - capExtent = theCapExtent()->xprev; + // NOTE Because we didn't delete the last document, and + // capLooped() is false, capExtent is not the first extent + // so xprev will be nonnull. + capExtent.writing() = theCapExtent()->xprev; theCapExtent()->assertOk(); - if ( capExtent == firstExtent ) { - cappedLastDelRecLastExtent() = DiskLoc(); - } else { - // slow - there's no prev ptr for deleted rec - DiskLoc i = cappedListOfAllDeletedRecords(); - for( ; - !i.drec()->nextDeleted.isNull() && - !inCapExtent( i.drec()->nextDeleted ); - i = i.drec()->nextDeleted ); - assert( !i.drec()->nextDeleted.isNull() ); // I believe there is always at least one drec per extent - cappedLastDelRecLastExtent() = i; - } + + // update cappedLastDelRecLastExtent() + cappedTruncateLastDelUpdate(); } continue; } - theDataFileMgr.deleteRecord(ns, curr.rec(), curr, true); - compact(); - if ( curr == capFirstNewRecord ) { // invalid, but can compare locations - capExtent = ( capExtent == firstExtent ) ? lastExtent : theCapExtent()->xprev; - theCapExtent()->assertOk(); - assert( !theCapExtent()->firstRecord.isNull() ); - capFirstNewRecord = theCapExtent()->firstRecord; - if ( capExtent == firstExtent ) { - cappedLastDelRecLastExtent() = DiskLoc(); - } else { - // slow - there's no prev ptr for deleted rec - DiskLoc i = cappedListOfAllDeletedRecords(); - for( ; - !i.drec()->nextDeleted.isNull() && - !inCapExtent( i.drec()->nextDeleted ); - i = i.drec()->nextDeleted ); - assert( !i.drec()->nextDeleted.isNull() ); // I believe there is always at least one drec per extent - cappedLastDelRecLastExtent() = i; + // This is the case where capLooped() is true, and we just deleted + // from capExtent, and we just deleted capFirstNewRecord, which was + // the last record on the fresh side of capExtent. + // NOTE In this comparison, curr and potentially capFirstNewRecord + // may point to invalid data, but we can still compare the + // references themselves. + if ( curr == capFirstNewRecord ) { + + // Set 'capExtent' to the first nonempty extent prior to the + // initial capExtent. There must be such an extent because we + // have not deleted the last document in the collection. It is + // possible that all extents other than the capExtent are empty. + // In this case we will keep the initial capExtent and specify + // that all records contained within are on the fresh rather than + // stale side of the extent. + DiskLoc newCapExtent = capExtent; + do { + // Find the previous extent, looping if necessary. + newCapExtent = ( newCapExtent == firstExtent ) ? lastExtent : newCapExtent.ext()->xprev; + newCapExtent.ext()->assertOk(); } + while ( newCapExtent.ext()->firstRecord.isNull() ); + capExtent.writing() = newCapExtent; + + // Place all documents in the new capExtent on the fresh side + // of the capExtent by setting capFirstNewRecord to the first + // document in the new capExtent. + capFirstNewRecord.writing() = theCapExtent()->firstRecord; + + // update cappedLastDelRecLastExtent() + cappedTruncateLastDelUpdate(); } } } - + void NamespaceDetails::emptyCappedCollection( const char *ns ) { DEV assert( this == nsdetails(ns) ); massert( 13424, "collection must be capped", capped ); - massert( 13425, "background index build in progress", !backgroundIndexBuildInProgress ); + massert( 13425, "background index build in progress", !indexBuildInProgress ); massert( 13426, "indexes present", nIndexes == 0 ); + // Clear all references to this namespace. ClientCursor::invalidate( ns ); - NamespaceDetailsTransient::clearForPrefix( ns ); + NamespaceDetailsTransient::clearForPrefix( ns ); + + // Get a writeable reference to 'this' and reset all pertinent + // attributes. + NamespaceDetails *t = writingWithoutExtra(); + + t->cappedLastDelRecLastExtent() = DiskLoc(); + t->cappedListOfAllDeletedRecords() = DiskLoc(); - cappedLastDelRecLastExtent() = DiskLoc(); - cappedListOfAllDeletedRecords() = DiskLoc(); - // preserve firstExtent/lastExtent - capExtent = firstExtent; - datasize = nrecords = 0; + t->capExtent = firstExtent; + t->stats.datasize = stats.nrecords = 0; // lastExtentSize preserve // nIndexes preserve 0 // capped preserve true // max preserve - paddingFactor = 1.0; - flags = 0; - capFirstNewRecord = DiskLoc(); - capFirstNewRecord.setInvalid(); - cappedLastDelRecLastExtent().setInvalid(); + t->paddingFactor = 1.0; + t->flags = 0; + t->capFirstNewRecord = DiskLoc(); + t->capFirstNewRecord.setInvalid(); + t->cappedLastDelRecLastExtent().setInvalid(); // dataFileVersion preserve // indexFileVersion preserve - multiKeyIndexBits = 0; - reservedA = 0; - extraOffset = 0; - // backgroundIndexBuildInProgress preserve 0 - memset(reserved, 0, sizeof(reserved)); + t->multiKeyIndexBits = 0; + t->reservedA = 0; + t->extraOffset = 0; + // indexBuildInProgress preserve 0 + memset(t->reserved, 0, sizeof(t->reserved)); + // Reset all existing extents and recreate the deleted list. for( DiskLoc ext = firstExtent; !ext.isNull(); ext = ext.ext()->xnext ) { DiskLoc prev = ext.ext()->xprev; DiskLoc next = ext.ext()->xnext; DiskLoc empty = ext.ext()->reuse( ns ); - ext.ext()->xprev = prev; - ext.ext()->xnext = next; + ext.ext()->xprev.writing() = prev; + ext.ext()->xnext.writing() = next; addDeletedRec( empty.drec(), empty ); } } |