diff options
Diffstat (limited to 'db/cap.cpp')
-rw-r--r-- | db/cap.cpp | 393 |
1 files changed, 393 insertions, 0 deletions
diff --git a/db/cap.cpp b/db/cap.cpp new file mode 100644 index 0000000..c676429 --- /dev/null +++ b/db/cap.cpp @@ -0,0 +1,393 @@ +// @file cap.cpp capped collection related + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "pch.h" +#include "pdfile.h" +#include "db.h" +#include "../util/mmap.h" +#include "../util/hashtab.h" +#include "../scripting/engine.h" +#include "btree.h" +#include <algorithm> +#include <list> +#include "query.h" +#include "queryutil.h" +#include "json.h" + +/* + capped collection layout + + d's below won't exist if things align perfectly: + + extent1 -> extent2 -> extent3 + ------------------- ----------------------- --------------------- + d r r r r r r r r d d r r r r d r r r r r d d r r r r r r r r r d + ^ ^ + oldest newest + + ^cappedFirstDeletedInCurExtent() + ^cappedLastDelRecLastExtent() + ^cappedListOfAllDeletedRecords() +*/ + + +namespace mongo { + + /* combine adjacent deleted records *for the current extent* of the capped collection + + this is O(n^2) but we call it for capped tables where typically n==1 or 2! + (or 3...there will be a little unused sliver at the end of the extent.) + */ + void NamespaceDetails::compact() { + assert(capped); + + list<DiskLoc> drecs; + + // Pull out capExtent's DRs from deletedList + DiskLoc i = cappedFirstDeletedInCurExtent(); + for (; !i.isNull() && inCapExtent( i ); i = i.drec()->nextDeleted ) + drecs.push_back( i ); + cappedFirstDeletedInCurExtent() = i; + + // This is the O(n^2) part. + drecs.sort(); + + list<DiskLoc>::iterator j = drecs.begin(); + assert( j != drecs.end() ); + DiskLoc a = *j; + while ( 1 ) { + j++; + if ( j == drecs.end() ) { + DEBUGGING out() << "TEMP: compact adddelrec\n"; + addDeletedRec(a.drec(), a); + break; + } + DiskLoc b = *j; + while ( a.a() == b.a() && a.getOfs() + a.drec()->lengthWithHeaders == b.getOfs() ) { + // a & b are adjacent. merge. + a.drec()->lengthWithHeaders += b.drec()->lengthWithHeaders; + j++; + if ( j == drecs.end() ) { + DEBUGGING out() << "temp: compact adddelrec2\n"; + addDeletedRec(a.drec(), a); + return; + } + b = *j; + } + DEBUGGING out() << "temp: compact adddelrec3\n"; + addDeletedRec(a.drec(), a); + a = b; + } + } + + DiskLoc &NamespaceDetails::cappedFirstDeletedInCurExtent() { + if ( cappedLastDelRecLastExtent().isNull() ) + return cappedListOfAllDeletedRecords(); + else + return cappedLastDelRecLastExtent().drec()->nextDeleted; + } + + void NamespaceDetails::cappedCheckMigrate() { + // migrate old NamespaceDetails format + assert( capped ); + if ( capExtent.a() == 0 && capExtent.getOfs() == 0 ) { + capFirstNewRecord = DiskLoc(); + capFirstNewRecord.setInvalid(); + // put all the DeletedRecords in cappedListOfAllDeletedRecords() + for ( int i = 1; i < Buckets; ++i ) { + DiskLoc first = deletedList[ i ]; + if ( first.isNull() ) + continue; + DiskLoc last = first; + for (; !last.drec()->nextDeleted.isNull(); last = last.drec()->nextDeleted ); + last.drec()->nextDeleted = cappedListOfAllDeletedRecords(); + cappedListOfAllDeletedRecords() = first; + deletedList[ i ] = DiskLoc(); + } + // NOTE cappedLastDelRecLastExtent() set to DiskLoc() in above + + // Last, in case we're killed before getting here + capExtent = firstExtent; + } + } + + bool NamespaceDetails::inCapExtent( const DiskLoc &dl ) const { + assert( !dl.isNull() ); + // We could have a rec or drec, doesn't matter. + return dl.drec()->myExtent( dl ) == capExtent.ext(); + } + + bool NamespaceDetails::nextIsInCapExtent( const DiskLoc &dl ) const { + assert( !dl.isNull() ); + DiskLoc next = dl.drec()->nextDeleted; + if ( next.isNull() ) + return false; + return inCapExtent( next ); + } + + void NamespaceDetails::advanceCapExtent( const char *ns ) { + // We want cappedLastDelRecLastExtent() to be the last DeletedRecord of the prev cap extent + // (or DiskLoc() if new capExtent == firstExtent) + if ( capExtent == lastExtent ) + cappedLastDelRecLastExtent() = DiskLoc(); + else { + DiskLoc i = cappedFirstDeletedInCurExtent(); + for (; !i.isNull() && nextIsInCapExtent( i ); i = i.drec()->nextDeleted ); + cappedLastDelRecLastExtent() = i; + } + + capExtent = theCapExtent()->xnext.isNull() ? firstExtent : theCapExtent()->xnext; + + /* this isn't true if a collection has been renamed...that is ok just used for diagnostics */ + //dassert( theCapExtent()->ns == ns ); + + theCapExtent()->assertOk(); + capFirstNewRecord = DiskLoc(); + } + + DiskLoc NamespaceDetails::__capAlloc( int len ) { + DiskLoc prev = cappedLastDelRecLastExtent(); + DiskLoc i = cappedFirstDeletedInCurExtent(); + DiskLoc ret; + for (; !i.isNull() && inCapExtent( i ); prev = i, i = i.drec()->nextDeleted ) { + // We need to keep at least one DR per extent in cappedListOfAllDeletedRecords(), + // so make sure there's space to create a DR at the end. + if ( i.drec()->lengthWithHeaders >= len + 24 ) { + ret = i; + break; + } + } + + /* unlink ourself from the deleted list */ + if ( !ret.isNull() ) { + if ( prev.isNull() ) + cappedListOfAllDeletedRecords() = ret.drec()->nextDeleted; + else + prev.drec()->nextDeleted = ret.drec()->nextDeleted; + ret.drec()->nextDeleted.setInvalid(); // defensive. + assert( ret.drec()->extentOfs < ret.getOfs() ); + } + + return ret; + } + + DiskLoc NamespaceDetails::cappedAlloc(const char *ns, int len) { + // signal done allocating new extents. + if ( !cappedLastDelRecLastExtent().isValid() ) + cappedLastDelRecLastExtent() = DiskLoc(); + + assert( len < 400000000 ); + int passes = 0; + int maxPasses = ( len / 30 ) + 2; // 30 is about the smallest entry that could go in the oplog + if ( maxPasses < 5000 ){ + // this is for bacwards safety since 5000 was the old value + maxPasses = 5000; + } + DiskLoc loc; + + // delete records until we have room and the max # objects limit achieved. + + /* this fails on a rename -- that is ok but must keep commented out */ + //assert( theCapExtent()->ns == ns ); + + theCapExtent()->assertOk(); + DiskLoc firstEmptyExtent; + while ( 1 ) { + if ( nrecords < max ) { + loc = __capAlloc( len ); + if ( !loc.isNull() ) + break; + } + + // If on first iteration through extents, don't delete anything. + if ( !capFirstNewRecord.isValid() ) { + advanceCapExtent( ns ); + if ( capExtent != firstExtent ) + capFirstNewRecord.setInvalid(); + // else signal done with first iteration through extents. + continue; + } + + if ( !capFirstNewRecord.isNull() && + theCapExtent()->firstRecord == capFirstNewRecord ) { + // We've deleted all records that were allocated on the previous + // iteration through this extent. + advanceCapExtent( ns ); + continue; + } + + if ( theCapExtent()->firstRecord.isNull() ) { + if ( firstEmptyExtent.isNull() ) + firstEmptyExtent = capExtent; + advanceCapExtent( ns ); + if ( firstEmptyExtent == capExtent ) { + maybeComplain( ns, len ); + return DiskLoc(); + } + continue; + } + + DiskLoc fr = theCapExtent()->firstRecord; + theDataFileMgr.deleteRecord(ns, fr.rec(), fr, true); // ZZZZZZZZZZZZ + compact(); + if( ++passes > maxPasses ) { + log() << "passes ns:" << ns << " len:" << len << " maxPasses: " << maxPasses << '\n'; + log() << "passes max:" << max << " nrecords:" << nrecords << " datasize: " << datasize << endl; + massert( 10345 , "passes >= maxPasses in capped collection alloc", false ); + } + } + + // Remember first record allocated on this iteration through capExtent. + if ( capFirstNewRecord.isValid() && capFirstNewRecord.isNull() ) + capFirstNewRecord = loc; + + return loc; + } + + void NamespaceDetails::dumpExtents() { + cout << "dumpExtents:" << endl; + for ( DiskLoc i = firstExtent; !i.isNull(); i = i.ext()->xnext ) { + Extent *e = i.ext(); + stringstream ss; + e->dump(ss); + cout << ss.str() << endl; + } + } + + void NamespaceDetails::cappedDumpDelInfo() { + cout << "dl[0]: " << deletedList[0].toString() << endl; + for( DiskLoc z = deletedList[0]; !z.isNull(); z = z.drec()->nextDeleted ) { + cout << " drec:" << z.toString() << " dreclen:" << hex << z.drec()->lengthWithHeaders << + " ext:" << z.drec()->myExtent(z)->myLoc.toString() << endl; + } + cout << "dl[1]: " << deletedList[1].toString() << endl; + } + + /* everything from end on, eliminate from the capped collection. + @param inclusive if true, deletes end (i.e. closed or open range) + */ + void NamespaceDetails::cappedTruncateAfter(const char *ns, DiskLoc end, bool inclusive) { + DEV assert( this == nsdetails(ns) ); + assert( cappedLastDelRecLastExtent().isValid() ); + + bool foundLast = false; + while( 1 ) { + if ( foundLast ) { + break; + } + DiskLoc curr = theCapExtent()->lastRecord; + assert( !curr.isNull() ); + if ( curr == end ) { + if ( inclusive ) { + foundLast = true; + } else { + break; + } + } + + uassert( 13415, "emptying the collection is not allowed", nrecords > 1 ); + + if ( !capLooped() ) { + theDataFileMgr.deleteRecord(ns, curr.rec(), curr, true); + compact(); + if ( theCapExtent()->lastRecord.isNull() ) { + assert( !theCapExtent()->xprev.isNull() ); + capExtent = theCapExtent()->xprev; + theCapExtent()->assertOk(); + if ( capExtent == firstExtent ) { + cappedLastDelRecLastExtent() = DiskLoc(); + } else { + // slow - there's no prev ptr for deleted rec + DiskLoc i = cappedListOfAllDeletedRecords(); + for( ; + !i.drec()->nextDeleted.isNull() && + !inCapExtent( i.drec()->nextDeleted ); + i = i.drec()->nextDeleted ); + assert( !i.drec()->nextDeleted.isNull() ); // I believe there is always at least one drec per extent + cappedLastDelRecLastExtent() = i; + } + } + continue; + } + + theDataFileMgr.deleteRecord(ns, curr.rec(), curr, true); + compact(); + if ( curr == capFirstNewRecord ) { // invalid, but can compare locations + capExtent = ( capExtent == firstExtent ) ? lastExtent : theCapExtent()->xprev; + theCapExtent()->assertOk(); + assert( !theCapExtent()->firstRecord.isNull() ); + capFirstNewRecord = theCapExtent()->firstRecord; + if ( capExtent == firstExtent ) { + cappedLastDelRecLastExtent() = DiskLoc(); + } else { + // slow - there's no prev ptr for deleted rec + DiskLoc i = cappedListOfAllDeletedRecords(); + for( ; + !i.drec()->nextDeleted.isNull() && + !inCapExtent( i.drec()->nextDeleted ); + i = i.drec()->nextDeleted ); + assert( !i.drec()->nextDeleted.isNull() ); // I believe there is always at least one drec per extent + cappedLastDelRecLastExtent() = i; + } + } + } + } + + void NamespaceDetails::emptyCappedCollection( const char *ns ) { + DEV assert( this == nsdetails(ns) ); + massert( 13424, "collection must be capped", capped ); + massert( 13425, "background index build in progress", !backgroundIndexBuildInProgress ); + massert( 13426, "indexes present", nIndexes == 0 ); + + ClientCursor::invalidate( ns ); + NamespaceDetailsTransient::clearForPrefix( ns ); + + cappedLastDelRecLastExtent() = DiskLoc(); + cappedListOfAllDeletedRecords() = DiskLoc(); + + // preserve firstExtent/lastExtent + capExtent = firstExtent; + datasize = nrecords = 0; + // lastExtentSize preserve + // nIndexes preserve 0 + // capped preserve true + // max preserve + paddingFactor = 1.0; + flags = 0; + capFirstNewRecord = DiskLoc(); + capFirstNewRecord.setInvalid(); + cappedLastDelRecLastExtent().setInvalid(); + // dataFileVersion preserve + // indexFileVersion preserve + multiKeyIndexBits = 0; + reservedA = 0; + extraOffset = 0; + // backgroundIndexBuildInProgress preserve 0 + memset(reserved, 0, sizeof(reserved)); + + for( DiskLoc ext = firstExtent; !ext.isNull(); ext = ext.ext()->xnext ) { + DiskLoc prev = ext.ext()->xprev; + DiskLoc next = ext.ext()->xnext; + DiskLoc empty = ext.ext()->reuse( ns ); + ext.ext()->xprev = prev; + ext.ext()->xnext = next; + addDeletedRec( empty.drec(), empty ); + } + } + +} |