summaryrefslogtreecommitdiff
path: root/db/cap.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'db/cap.cpp')
-rw-r--r--db/cap.cpp239
1 files changed, 149 insertions, 90 deletions
diff --git a/db/cap.cpp b/db/cap.cpp
index c676429..198bd54 100644
--- a/db/cap.cpp
+++ b/db/cap.cpp
@@ -1,4 +1,5 @@
-// @file cap.cpp capped collection related
+// @file cap.cpp capped collection related
+// the "old" version (<= v1.6)
/**
* Copyright (C) 2008 10gen Inc.
@@ -49,7 +50,7 @@
namespace mongo {
/* combine adjacent deleted records *for the current extent* of the capped collection
-
+
this is O(n^2) but we call it for capped tables where typically n==1 or 2!
(or 3...there will be a little unused sliver at the end of the extent.)
*/
@@ -62,7 +63,8 @@ namespace mongo {
DiskLoc i = cappedFirstDeletedInCurExtent();
for (; !i.isNull() && inCapExtent( i ); i = i.drec()->nextDeleted )
drecs.push_back( i );
- cappedFirstDeletedInCurExtent() = i;
+
+ getDur().writingDiskLoc( cappedFirstDeletedInCurExtent() ) = i;
// This is the O(n^2) part.
drecs.sort();
@@ -80,7 +82,7 @@ namespace mongo {
DiskLoc b = *j;
while ( a.a() == b.a() && a.getOfs() + a.drec()->lengthWithHeaders == b.getOfs() ) {
// a & b are adjacent. merge.
- a.drec()->lengthWithHeaders += b.drec()->lengthWithHeaders;
+ getDur().writingInt( a.drec()->lengthWithHeaders ) += b.drec()->lengthWithHeaders;
j++;
if ( j == drecs.end() ) {
DEBUGGING out() << "temp: compact adddelrec2\n";
@@ -106,8 +108,8 @@ namespace mongo {
// migrate old NamespaceDetails format
assert( capped );
if ( capExtent.a() == 0 && capExtent.getOfs() == 0 ) {
- capFirstNewRecord = DiskLoc();
- capFirstNewRecord.setInvalid();
+ //capFirstNewRecord = DiskLoc();
+ capFirstNewRecord.writing().setInvalid();
// put all the DeletedRecords in cappedListOfAllDeletedRecords()
for ( int i = 1; i < Buckets; ++i ) {
DiskLoc first = deletedList[ i ];
@@ -115,14 +117,14 @@ namespace mongo {
continue;
DiskLoc last = first;
for (; !last.drec()->nextDeleted.isNull(); last = last.drec()->nextDeleted );
- last.drec()->nextDeleted = cappedListOfAllDeletedRecords();
- cappedListOfAllDeletedRecords() = first;
- deletedList[ i ] = DiskLoc();
+ last.drec()->nextDeleted.writing() = cappedListOfAllDeletedRecords();
+ cappedListOfAllDeletedRecords().writing() = first;
+ deletedList[i].writing() = DiskLoc();
}
// NOTE cappedLastDelRecLastExtent() set to DiskLoc() in above
// Last, in case we're killed before getting here
- capExtent = firstExtent;
+ capExtent.writing() = firstExtent;
}
}
@@ -144,20 +146,20 @@ namespace mongo {
// We want cappedLastDelRecLastExtent() to be the last DeletedRecord of the prev cap extent
// (or DiskLoc() if new capExtent == firstExtent)
if ( capExtent == lastExtent )
- cappedLastDelRecLastExtent() = DiskLoc();
+ getDur().writingDiskLoc( cappedLastDelRecLastExtent() ) = DiskLoc();
else {
DiskLoc i = cappedFirstDeletedInCurExtent();
for (; !i.isNull() && nextIsInCapExtent( i ); i = i.drec()->nextDeleted );
- cappedLastDelRecLastExtent() = i;
+ getDur().writingDiskLoc( cappedLastDelRecLastExtent() ) = i;
}
- capExtent = theCapExtent()->xnext.isNull() ? firstExtent : theCapExtent()->xnext;
+ getDur().writingDiskLoc( capExtent ) = theCapExtent()->xnext.isNull() ? firstExtent : theCapExtent()->xnext;
/* this isn't true if a collection has been renamed...that is ok just used for diagnostics */
//dassert( theCapExtent()->ns == ns );
theCapExtent()->assertOk();
- capFirstNewRecord = DiskLoc();
+ getDur().writingDiskLoc( capFirstNewRecord ) = DiskLoc();
}
DiskLoc NamespaceDetails::__capAlloc( int len ) {
@@ -176,25 +178,25 @@ namespace mongo {
/* unlink ourself from the deleted list */
if ( !ret.isNull() ) {
if ( prev.isNull() )
- cappedListOfAllDeletedRecords() = ret.drec()->nextDeleted;
+ cappedListOfAllDeletedRecords().writing() = ret.drec()->nextDeleted;
else
- prev.drec()->nextDeleted = ret.drec()->nextDeleted;
- ret.drec()->nextDeleted.setInvalid(); // defensive.
+ prev.drec()->nextDeleted.writing() = ret.drec()->nextDeleted;
+ ret.drec()->nextDeleted.writing().setInvalid(); // defensive.
assert( ret.drec()->extentOfs < ret.getOfs() );
}
return ret;
}
- DiskLoc NamespaceDetails::cappedAlloc(const char *ns, int len) {
+ DiskLoc NamespaceDetails::cappedAlloc(const char *ns, int len) {
// signal done allocating new extents.
if ( !cappedLastDelRecLastExtent().isValid() )
- cappedLastDelRecLastExtent() = DiskLoc();
-
+ getDur().writingDiskLoc( cappedLastDelRecLastExtent() ) = DiskLoc();
+
assert( len < 400000000 );
int passes = 0;
int maxPasses = ( len / 30 ) + 2; // 30 is about the smallest entry that could go in the oplog
- if ( maxPasses < 5000 ){
+ if ( maxPasses < 5000 ) {
// this is for bacwards safety since 5000 was the old value
maxPasses = 5000;
}
@@ -208,7 +210,7 @@ namespace mongo {
theCapExtent()->assertOk();
DiskLoc firstEmptyExtent;
while ( 1 ) {
- if ( nrecords < max ) {
+ if ( stats.nrecords < max ) {
loc = __capAlloc( len );
if ( !loc.isNull() )
break;
@@ -217,8 +219,9 @@ namespace mongo {
// If on first iteration through extents, don't delete anything.
if ( !capFirstNewRecord.isValid() ) {
advanceCapExtent( ns );
+
if ( capExtent != firstExtent )
- capFirstNewRecord.setInvalid();
+ capFirstNewRecord.writing().setInvalid();
// else signal done with first iteration through extents.
continue;
}
@@ -247,14 +250,14 @@ namespace mongo {
compact();
if( ++passes > maxPasses ) {
log() << "passes ns:" << ns << " len:" << len << " maxPasses: " << maxPasses << '\n';
- log() << "passes max:" << max << " nrecords:" << nrecords << " datasize: " << datasize << endl;
+ log() << "passes max:" << max << " nrecords:" << stats.nrecords << " datasize: " << stats.datasize << endl;
massert( 10345 , "passes >= maxPasses in capped collection alloc", false );
}
}
// Remember first record allocated on this iteration through capExtent.
if ( capFirstNewRecord.isValid() && capFirstNewRecord.isNull() )
- capFirstNewRecord = loc;
+ getDur().writingDiskLoc(capFirstNewRecord) = loc;
return loc;
}
@@ -269,123 +272,179 @@ namespace mongo {
}
}
- void NamespaceDetails::cappedDumpDelInfo() {
+ void NamespaceDetails::cappedDumpDelInfo() {
cout << "dl[0]: " << deletedList[0].toString() << endl;
- for( DiskLoc z = deletedList[0]; !z.isNull(); z = z.drec()->nextDeleted ) {
- cout << " drec:" << z.toString() << " dreclen:" << hex << z.drec()->lengthWithHeaders <<
- " ext:" << z.drec()->myExtent(z)->myLoc.toString() << endl;
+ for( DiskLoc z = deletedList[0]; !z.isNull(); z = z.drec()->nextDeleted ) {
+ cout << " drec:" << z.toString() << " dreclen:" << hex << z.drec()->lengthWithHeaders <<
+ " ext:" << z.drec()->myExtent(z)->myLoc.toString() << endl;
}
cout << "dl[1]: " << deletedList[1].toString() << endl;
}
- /* everything from end on, eliminate from the capped collection.
- @param inclusive if true, deletes end (i.e. closed or open range)
- */
+ void NamespaceDetails::cappedTruncateLastDelUpdate() {
+ if ( capExtent == firstExtent ) {
+ // Only one extent of the collection is in use, so there
+ // is no deleted record in a previous extent, so nullify
+ // cappedLastDelRecLastExtent().
+ cappedLastDelRecLastExtent().writing() = DiskLoc();
+ }
+ else {
+ // Scan through all deleted records in the collection
+ // until the last deleted record for the extent prior
+ // to the new capExtent is found. Then set
+ // cappedLastDelRecLastExtent() to that deleted record.
+ DiskLoc i = cappedListOfAllDeletedRecords();
+ for( ;
+ !i.drec()->nextDeleted.isNull() &&
+ !inCapExtent( i.drec()->nextDeleted );
+ i = i.drec()->nextDeleted );
+ // In our capped storage model, every extent must have at least one
+ // deleted record. Here we check that 'i' is not the last deleted
+ // record. (We expect that there will be deleted records in the new
+ // capExtent as well.)
+ assert( !i.drec()->nextDeleted.isNull() );
+ cappedLastDelRecLastExtent().writing() = i;
+ }
+ }
+
void NamespaceDetails::cappedTruncateAfter(const char *ns, DiskLoc end, bool inclusive) {
DEV assert( this == nsdetails(ns) );
assert( cappedLastDelRecLastExtent().isValid() );
-
+
+ // We iteratively remove the newest document until the newest document
+ // is 'end', then we remove 'end' if requested.
bool foundLast = false;
while( 1 ) {
if ( foundLast ) {
+ // 'end' has been found and removed, so break.
break;
}
+ // 'curr' will point to the newest document in the collection.
DiskLoc curr = theCapExtent()->lastRecord;
assert( !curr.isNull() );
if ( curr == end ) {
if ( inclusive ) {
+ // 'end' has been found, so break next iteration.
foundLast = true;
- } else {
+ }
+ else {
+ // 'end' has been found, so break.
break;
}
}
-
- uassert( 13415, "emptying the collection is not allowed", nrecords > 1 );
-
+
+ // TODO The algorithm used in this function cannot generate an
+ // empty collection, but we could call emptyCappedCollection() in
+ // this case instead of asserting.
+ uassert( 13415, "emptying the collection is not allowed", stats.nrecords > 1 );
+
+ // Delete the newest record, and coalesce the new deleted
+ // record with existing deleted records.
+ theDataFileMgr.deleteRecord(ns, curr.rec(), curr, true);
+ compact();
+
+ // This is the case where we have not yet had to remove any
+ // documents to make room for other documents, and we are allocating
+ // documents from free space in fresh extents instead of reusing
+ // space from familiar extents.
if ( !capLooped() ) {
- theDataFileMgr.deleteRecord(ns, curr.rec(), curr, true);
- compact();
+
+ // We just removed the last record from the 'capExtent', and
+ // the 'capExtent' can't be empty, so we set 'capExtent' to
+ // capExtent's prev extent.
if ( theCapExtent()->lastRecord.isNull() ) {
assert( !theCapExtent()->xprev.isNull() );
- capExtent = theCapExtent()->xprev;
+ // NOTE Because we didn't delete the last document, and
+ // capLooped() is false, capExtent is not the first extent
+ // so xprev will be nonnull.
+ capExtent.writing() = theCapExtent()->xprev;
theCapExtent()->assertOk();
- if ( capExtent == firstExtent ) {
- cappedLastDelRecLastExtent() = DiskLoc();
- } else {
- // slow - there's no prev ptr for deleted rec
- DiskLoc i = cappedListOfAllDeletedRecords();
- for( ;
- !i.drec()->nextDeleted.isNull() &&
- !inCapExtent( i.drec()->nextDeleted );
- i = i.drec()->nextDeleted );
- assert( !i.drec()->nextDeleted.isNull() ); // I believe there is always at least one drec per extent
- cappedLastDelRecLastExtent() = i;
- }
+
+ // update cappedLastDelRecLastExtent()
+ cappedTruncateLastDelUpdate();
}
continue;
}
- theDataFileMgr.deleteRecord(ns, curr.rec(), curr, true);
- compact();
- if ( curr == capFirstNewRecord ) { // invalid, but can compare locations
- capExtent = ( capExtent == firstExtent ) ? lastExtent : theCapExtent()->xprev;
- theCapExtent()->assertOk();
- assert( !theCapExtent()->firstRecord.isNull() );
- capFirstNewRecord = theCapExtent()->firstRecord;
- if ( capExtent == firstExtent ) {
- cappedLastDelRecLastExtent() = DiskLoc();
- } else {
- // slow - there's no prev ptr for deleted rec
- DiskLoc i = cappedListOfAllDeletedRecords();
- for( ;
- !i.drec()->nextDeleted.isNull() &&
- !inCapExtent( i.drec()->nextDeleted );
- i = i.drec()->nextDeleted );
- assert( !i.drec()->nextDeleted.isNull() ); // I believe there is always at least one drec per extent
- cappedLastDelRecLastExtent() = i;
+ // This is the case where capLooped() is true, and we just deleted
+ // from capExtent, and we just deleted capFirstNewRecord, which was
+ // the last record on the fresh side of capExtent.
+ // NOTE In this comparison, curr and potentially capFirstNewRecord
+ // may point to invalid data, but we can still compare the
+ // references themselves.
+ if ( curr == capFirstNewRecord ) {
+
+ // Set 'capExtent' to the first nonempty extent prior to the
+ // initial capExtent. There must be such an extent because we
+ // have not deleted the last document in the collection. It is
+ // possible that all extents other than the capExtent are empty.
+ // In this case we will keep the initial capExtent and specify
+ // that all records contained within are on the fresh rather than
+ // stale side of the extent.
+ DiskLoc newCapExtent = capExtent;
+ do {
+ // Find the previous extent, looping if necessary.
+ newCapExtent = ( newCapExtent == firstExtent ) ? lastExtent : newCapExtent.ext()->xprev;
+ newCapExtent.ext()->assertOk();
}
+ while ( newCapExtent.ext()->firstRecord.isNull() );
+ capExtent.writing() = newCapExtent;
+
+ // Place all documents in the new capExtent on the fresh side
+ // of the capExtent by setting capFirstNewRecord to the first
+ // document in the new capExtent.
+ capFirstNewRecord.writing() = theCapExtent()->firstRecord;
+
+ // update cappedLastDelRecLastExtent()
+ cappedTruncateLastDelUpdate();
}
}
}
-
+
void NamespaceDetails::emptyCappedCollection( const char *ns ) {
DEV assert( this == nsdetails(ns) );
massert( 13424, "collection must be capped", capped );
- massert( 13425, "background index build in progress", !backgroundIndexBuildInProgress );
+ massert( 13425, "background index build in progress", !indexBuildInProgress );
massert( 13426, "indexes present", nIndexes == 0 );
+ // Clear all references to this namespace.
ClientCursor::invalidate( ns );
- NamespaceDetailsTransient::clearForPrefix( ns );
+ NamespaceDetailsTransient::clearForPrefix( ns );
+
+ // Get a writeable reference to 'this' and reset all pertinent
+ // attributes.
+ NamespaceDetails *t = writingWithoutExtra();
+
+ t->cappedLastDelRecLastExtent() = DiskLoc();
+ t->cappedListOfAllDeletedRecords() = DiskLoc();
- cappedLastDelRecLastExtent() = DiskLoc();
- cappedListOfAllDeletedRecords() = DiskLoc();
-
// preserve firstExtent/lastExtent
- capExtent = firstExtent;
- datasize = nrecords = 0;
+ t->capExtent = firstExtent;
+ t->stats.datasize = stats.nrecords = 0;
// lastExtentSize preserve
// nIndexes preserve 0
// capped preserve true
// max preserve
- paddingFactor = 1.0;
- flags = 0;
- capFirstNewRecord = DiskLoc();
- capFirstNewRecord.setInvalid();
- cappedLastDelRecLastExtent().setInvalid();
+ t->paddingFactor = 1.0;
+ t->flags = 0;
+ t->capFirstNewRecord = DiskLoc();
+ t->capFirstNewRecord.setInvalid();
+ t->cappedLastDelRecLastExtent().setInvalid();
// dataFileVersion preserve
// indexFileVersion preserve
- multiKeyIndexBits = 0;
- reservedA = 0;
- extraOffset = 0;
- // backgroundIndexBuildInProgress preserve 0
- memset(reserved, 0, sizeof(reserved));
+ t->multiKeyIndexBits = 0;
+ t->reservedA = 0;
+ t->extraOffset = 0;
+ // indexBuildInProgress preserve 0
+ memset(t->reserved, 0, sizeof(t->reserved));
+ // Reset all existing extents and recreate the deleted list.
for( DiskLoc ext = firstExtent; !ext.isNull(); ext = ext.ext()->xnext ) {
DiskLoc prev = ext.ext()->xprev;
DiskLoc next = ext.ext()->xnext;
DiskLoc empty = ext.ext()->reuse( ns );
- ext.ext()->xprev = prev;
- ext.ext()->xnext = next;
+ ext.ext()->xprev.writing() = prev;
+ ext.ext()->xnext.writing() = next;
addDeletedRec( empty.drec(), empty );
}
}