summaryrefslogtreecommitdiff
path: root/db/btree.h
diff options
context:
space:
mode:
Diffstat (limited to 'db/btree.h')
-rw-r--r--db/btree.h585
1 files changed, 376 insertions, 209 deletions
diff --git a/db/btree.h b/db/btree.h
index 233b4dc..bced95e 100644
--- a/db/btree.h
+++ b/db/btree.h
@@ -25,8 +25,12 @@
namespace mongo {
+ const int BucketSize = 8192;
+
#pragma pack(1)
struct _KeyNode {
+ /** Signals that we are writing this _KeyNode and casts away const */
+ _KeyNode& writing() const;
DiskLoc prevChildBucket; // the lchild
DiskLoc recordLoc; // location of the record associated with the key
short keyDataOfs() const {
@@ -41,15 +45,12 @@ namespace mongo {
_kdo = s;
assert(s>=0);
}
- void setUsed() {
- recordLoc.GETOFS() &= ~1;
- }
+ void setUsed() { recordLoc.GETOFS() &= ~1; }
void setUnused() {
- /* Setting ofs to odd is the sentinel for unused, as real recordLoc's are always
- even numbers.
- Note we need to keep its value basically the same as we use the recordLoc
- as part of the key in the index (to handle duplicate keys efficiently).
- */
+ // Setting ofs to odd is the sentinel for unused, as real recordLoc's are always
+ // even numbers.
+ // Note we need to keep its value basically the same as we use the recordLoc
+ // as part of the key in the index (to handle duplicate keys efficiently).
recordLoc.GETOFS() |= 1;
}
int isUnused() const {
@@ -63,7 +64,12 @@ namespace mongo {
class BucketBasics;
- /* wrapper - this is our in memory representation of the key. _KeyNode is the disk representation. */
+ /**
+ * wrapper - this is our in memory representation of the key.
+ * _KeyNode is the disk representation.
+ *
+ * This object and its bson key will become invalid if the key is moved.
+ */
class KeyNode {
public:
KeyNode(const BucketBasics& bb, const _KeyNode &k);
@@ -73,51 +79,111 @@ namespace mongo {
};
#pragma pack(1)
- /* this class is all about the storage management */
- class BucketBasics {
+ class BtreeData {
+ protected:
+ DiskLoc parent;
+ DiskLoc nextChild; // child bucket off and to the right of the highest key.
+ unsigned short _wasSize; // can be reused, value is 8192 in current pdfile version Apr2010
+ unsigned short _reserved1; // zero
+ int flags;
+
+ // basicInsert() assumes these three are together and in this order:
+ int emptySize; // size of the empty region
+ int topSize; // size of the data at the top of the bucket (keys are at the beginning or 'bottom')
+ int n; // # of keys so far.
+
+ int reserved;
+ char data[4];
+ };
+
+ /**
+ * This class is all about the storage management
+ *
+ * Const member functions of this class are those which may be called on
+ * an object for which writing has not been signaled. Non const member
+ * functions may only be called on objects for which writing has been
+ * signaled. Note that currently some const functions write to the
+ * underlying memory representation of this bucket using optimized methods
+ * to signal write operations.
+ *
+ * DiskLoc parameters that may shadow references within the btree should
+ * be passed by value rather than by reference to non const member
+ * functions or const member functions which may perform writes. This way
+ * a callee need not worry that write operations will change or invalidate
+ * its arguments.
+ *
+ * The current policy for dealing with bson arguments is the opposite of
+ * what is described above for DiskLoc arguments. We do
+ * not want to want to copy bson into memory as an intermediate step for
+ * btree changes, so if bson is to be moved it must be copied to the new
+ * location before the old location is invalidated.
+ */
+ class BucketBasics : public BtreeData {
friend class BtreeBuilder;
friend class KeyNode;
public:
- void dumpTree(DiskLoc thisLoc, const BSONObj &order);
- bool isHead() { return parent.isNull(); }
- void assertValid(const Ordering &order, bool force = false);
- void assertValid(const BSONObj &orderObj, bool force = false) {
- return assertValid(Ordering::make(orderObj),force);
- }
- int fullValidate(const DiskLoc& thisLoc, const BSONObj &order, int *unusedCount = 0); /* traverses everything */
+ /** assert write intent declared for this bucket already */
+ void assertWritable();
- KeyNode keyNode(int i) const {
- if ( i >= n ){
+ void assertValid(const Ordering &order, bool force = false) const;
+ void assertValid(const BSONObj &orderObj, bool force = false) const { return assertValid(Ordering::make(orderObj),force); }
+
+ /**
+ * @return KeyNode for key at index i. The KeyNode will become invalid
+ * if the key is moved or reassigned, or if the node is packed.
+ */
+ const KeyNode keyNode(int i) const {
+ if ( i >= n ) {
massert( 13000 , (string)"invalid keyNode: " + BSON( "i" << i << "n" << n ).jsonString() , i < n );
}
return KeyNode(*this, k(i));
}
- protected:
+ static int headerSize() {
+ const BucketBasics *d = 0;
+ return (char*)&(d->data) - (char*)&(d->parent);
+ }
+ static int bodySize() { return BucketSize - headerSize(); }
- void modified(const DiskLoc& thisLoc);
+ // for testing
+ int nKeys() const { return n; }
+ const DiskLoc getNextChild() const { return nextChild; }
- char * dataAt(short ofs) {
- return data + ofs;
- }
+ protected:
+ char * dataAt(short ofs) { return data + ofs; }
void init(); // initialize a new node
- /* returns false if node is full and must be split
- keypos is where to insert -- inserted after that key #. so keypos=0 is the leftmost one.
- */
- bool basicInsert(const DiskLoc& thisLoc, int &keypos, const DiskLoc& recordLoc, const BSONObj& key, const Ordering &order);
-
/**
- * @return true if works, false if not enough space
+ * @return false if node is full and must be split
+ * @keypos is where to insert -- inserted before that key #. so keypos=0 is the leftmost one.
+ * keypos will be updated if keys are moved as a result of pack()
+ * This function will modify the btree bucket memory representation even
+ * though it is marked const.
*/
- bool _pushBack(const DiskLoc& recordLoc, BSONObj& key, const Ordering &order, DiskLoc prevChild);
- void pushBack(const DiskLoc& recordLoc, BSONObj& key, const Ordering &order, DiskLoc prevChild){
+ bool basicInsert(const DiskLoc thisLoc, int &keypos, const DiskLoc recordLoc, const BSONObj& key, const Ordering &order) const;
+
+ /** @return true if works, false if not enough space */
+ bool _pushBack(const DiskLoc recordLoc, const BSONObj& key, const Ordering &order, const DiskLoc prevChild);
+ void pushBack(const DiskLoc recordLoc, const BSONObj& key, const Ordering &order, const DiskLoc prevChild) {
bool ok = _pushBack( recordLoc , key , order , prevChild );
assert(ok);
}
+
+ /**
+ * This is a special purpose function used by BtreeBuilder. The
+ * interface is quite dangerous if you're not careful. The bson key
+ * returned here points to bucket memory that has been invalidated but
+ * not yet reclaimed.
+ *
+ * TODO Maybe this could be replaced with two functions, one which
+ * returns the last key without deleting it and another which simply
+ * deletes the last key. Then the caller would have enough control to
+ * ensure proper memory integrity.
+ */
void popBack(DiskLoc& recLoc, BSONObj& key);
- void _delKeyAtPos(int keypos); // low level version that doesn't deal with child ptrs.
+
+ void _delKeyAtPos(int keypos, bool mayEmpty = false); // low level version that doesn't deal with child ptrs.
/* !Packed means there is deleted fragment space within the bucket.
We "repack" when we run out of space before considering the node
@@ -125,145 +191,257 @@ namespace mongo {
*/
enum Flags { Packed=1 };
- DiskLoc& childForPos(int p) {
- return p == n ? nextChild : k(p).prevChildBucket;
- }
+ const DiskLoc& childForPos(int p) const { return p == n ? nextChild : k(p).prevChildBucket; }
+ DiskLoc& childForPos(int p) { return p == n ? nextChild : k(p).prevChildBucket; }
int totalDataSize() const;
- void pack( const Ordering &order, int &refPos);
- void setNotPacked();
- void setPacked();
+ /** @return true if the key may be dropped by pack() */
+ bool mayDropKey( int index, int refPos ) const;
+
+ /**
+ * Pack the bucket to reclaim space from invalidated memory.
+ * @refPos is an index in the bucket which will may be updated if we
+ * delete keys from the bucket
+ * This function may cast away const and perform a write.
+ */
+ void _pack(const DiskLoc thisLoc, const Ordering &order, int &refPos) const;
+ /** Pack when already writable */
+ void _packReadyForMod(const Ordering &order, int &refPos);
+
+ /**
+ * @return the size of non header data in this bucket if we were to
+ * call pack().
+ */
+ int packedDataSize( int refPos ) const;
+ void setNotPacked() { flags &= ~Packed; }
+ void setPacked() { flags |= Packed; }
int _alloc(int bytes);
void _unalloc(int bytes);
void truncateTo(int N, const Ordering &order, int &refPos);
+ /** drop specified number of keys from beginning of key array, and pack */
+ void dropFront(int nDrop, const Ordering &order, int &refPos);
void markUnused(int keypos);
- /* BtreeBuilder uses the parent var as a temp place to maintain a linked list chain.
- we use tempNext() when we do that to be less confusing. (one might have written a union in C)
- */
+ /**
+ * BtreeBuilder uses the parent var as a temp place to maintain a linked list chain.
+ * we use tempNext() when we do that to be less confusing. (one might have written a union in C)
+ */
+ const DiskLoc& tempNext() const { return parent; }
DiskLoc& tempNext() { return parent; }
- public:
- DiskLoc parent;
-
- string bucketSummary() const {
- stringstream ss;
- ss << " Bucket info:" << endl;
- ss << " n: " << n << endl;
- ss << " parent: " << parent.toString() << endl;
- ss << " nextChild: " << parent.toString() << endl;
- ss << " flags:" << flags << endl;
- ss << " emptySize: " << emptySize << " topSize: " << topSize << endl;
- return ss.str();
- }
-
- bool isUsed( int i ) const {
- return k(i).isUsed();
- }
+ void _shape(int level, stringstream&) const;
+ int Size() const;
+ const _KeyNode& k(int i) const { return ((const _KeyNode*)data)[i]; }
+ _KeyNode& k(int i) { return ((_KeyNode*)data)[i]; }
- protected:
- void _shape(int level, stringstream&);
- DiskLoc nextChild; // child bucket off and to the right of the highest key.
+ /** @return the key position where a split should occur on insert */
+ int splitPos( int keypos ) const;
- private:
- unsigned short _wasSize; // can be reused, value is 8192 in current pdfile version Apr2010
- unsigned short _reserved1; // zero
+ /**
+ * Adds new entries to beginning of key array, shifting existing
+ * entries to the right. After this is called, setKey() must be called
+ * on all the newly created entries in the key array.
+ */
+ void reserveKeysFront( int nAdd );
- protected:
- int Size() const;
- int flags;
- int emptySize; // size of the empty region
- int topSize; // size of the data at the top of the bucket (keys are at the beginning or 'bottom')
- int n; // # of keys so far.
- int reserved;
- const _KeyNode& k(int i) const {
- return ((_KeyNode*)data)[i];
- }
- _KeyNode& k(int i) {
- return ((_KeyNode*)data)[i];
- }
- char data[4];
+ /**
+ * Sets an existing key using the given parameters.
+ * @i index of key to set
+ */
+ void setKey( int i, const DiskLoc recordLoc, const BSONObj &key, const DiskLoc prevChildBucket );
};
-#pragma pack()
-#pragma pack(1)
+ /**
+ * This class adds functionality for manipulating buckets that are assembled
+ * in a tree. The requirements for const and non const functions and
+ * arguments are generally the same as in BtreeBucket. Because this class
+ * deals with tree structure, some functions that are marked const may
+ * trigger modification of another node in the btree or potentially of the
+ * current node. In such cases, the function's implementation explicitly
+ * casts away const when indicating an intent to write to the durability
+ * layer. The DiskLocs provided to such functions should be passed by
+ * value if they shadow pointers within the btree.
+ *
+ * To clarify enforcement of referential integrity in this implementation,
+ * we use the following pattern when deleting data we have a persistent
+ * pointer to. The pointer is cleared or removed explicitly, then the data
+ * it pointed to is cleaned up with a helper function.
+ *
+ * TODO It might make sense to put some of these functions in a class
+ * representing a full btree instead of a single btree bucket. That would
+ * allow us to use the const qualifier in a manner more consistent with
+ * standard usage. Right now the interface is for both a node and a tree,
+ * so assignment of const is sometimes nonideal.
+ *
+ * TODO There are several cases in which the this pointer is invalidated
+ * as a result of deallocation. A seperate class representing a btree would
+ * alleviate some fragile cases where the implementation must currently
+ * behave correctly if the this pointer is suddenly invalidated by a
+ * callee.
+ */
class BtreeBucket : public BucketBasics {
friend class BtreeCursor;
public:
- void dump();
+ bool isHead() const { return parent.isNull(); }
+ void dumpTree(const DiskLoc &thisLoc, const BSONObj &order) const;
+ int fullValidate(const DiskLoc& thisLoc, const BSONObj &order, int *unusedCount = 0, bool strict = false) const; /* traverses everything */
- /* @return true if key exists in index
+ bool isUsed( int i ) const { return k(i).isUsed(); }
+ string bucketSummary() const;
+ void dump() const;
- order - indicates order of keys in the index. this is basically the index's key pattern, e.g.:
- BSONObj order = ((IndexDetails&)idx).keyPattern();
- likewise below in bt_insert() etc.
- */
- bool exists(const IndexDetails& idx, DiskLoc thisLoc, const BSONObj& key, const Ordering& order);
+ /**
+ * @return true if key exists in index
+ *
+ * @order - indicates order of keys in the index. this is basically the index's key pattern, e.g.:
+ * BSONObj order = ((IndexDetails&)idx).keyPattern();
+ * likewise below in bt_insert() etc.
+ */
+ bool exists(const IndexDetails& idx, const DiskLoc &thisLoc, const BSONObj& key, const Ordering& order) const;
bool wouldCreateDup(
- const IndexDetails& idx, DiskLoc thisLoc,
+ const IndexDetails& idx, const DiskLoc &thisLoc,
const BSONObj& key, const Ordering& order,
- DiskLoc self);
+ const DiskLoc &self) const;
+
+ static DiskLoc addBucket(const IndexDetails&); /* start a new index off, empty */
+ /** invalidates 'this' and thisLoc */
+ void deallocBucket(const DiskLoc thisLoc, const IndexDetails &id);
- static DiskLoc addBucket(IndexDetails&); /* start a new index off, empty */
- void deallocBucket(const DiskLoc &thisLoc, IndexDetails &id);
-
static void renameIndexNamespace(const char *oldNs, const char *newNs);
- int bt_insert(DiskLoc thisLoc, DiskLoc recordLoc,
- const BSONObj& key, const Ordering &order, bool dupsAllowed,
- IndexDetails& idx, bool toplevel = true);
+ /** This function may change the btree root */
+ int bt_insert(const DiskLoc thisLoc, const DiskLoc recordLoc,
+ const BSONObj& key, const Ordering &order, bool dupsAllowed,
+ IndexDetails& idx, bool toplevel = true) const;
- bool unindex(const DiskLoc& thisLoc, IndexDetails& id, BSONObj& key, const DiskLoc& recordLoc);
+ /** This function may change the btree root */
+ bool unindex(const DiskLoc thisLoc, IndexDetails& id, const BSONObj& key, const DiskLoc recordLoc) const;
- /* locate may return an "unused" key that is just a marker. so be careful.
- looks for a key:recordloc pair.
+ /**
+ * locate may return an "unused" key that is just a marker. so be careful.
+ * looks for a key:recordloc pair.
+ *
+ * @found - returns true if exact match found. note you can get back a position
+ * result even if found is false.
+ */
+ DiskLoc locate(const IndexDetails &idx , const DiskLoc& thisLoc, const BSONObj& key, const Ordering &order,
+ int& pos, bool& found, const DiskLoc &recordLoc, int direction=1) const;
- found - returns true if exact match found. note you can get back a position
- result even if found is false.
- */
- DiskLoc locate(const IndexDetails& , const DiskLoc& thisLoc, const BSONObj& key, const Ordering &order,
- int& pos, bool& found, DiskLoc recordLoc, int direction=1);
-
/**
* find the first instance of the key
* does not handle dups
- * returned DiskLock isNull if can't find anything with that
+ * returned DiskLoc isNull if can't find anything with that
+ * @return the record location of the first match
*/
- DiskLoc findSingle( const IndexDetails& , const DiskLoc& thisLoc, const BSONObj& key );
+ DiskLoc findSingle( const IndexDetails &indexdetails , const DiskLoc& thisLoc, const BSONObj& key ) const;
+
+ /** advance one key position in the index: */
+ DiskLoc advance(const DiskLoc& thisLoc, int& keyOfs, int direction, const char *caller) const;
- /* advance one key position in the index: */
- DiskLoc advance(const DiskLoc& thisLoc, int& keyOfs, int direction, const char *caller);
-
- void advanceTo(const IndexDetails &id, DiskLoc &thisLoc, int &keyOfs, const BSONObj &keyBegin, int keyBeginLen, const vector< const BSONElement * > &keyEnd, const Ordering &order, int direction );
-
- DiskLoc getHead(const DiskLoc& thisLoc);
+ void advanceTo(DiskLoc &thisLoc, int &keyOfs, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction ) const;
+ void customLocate(DiskLoc &thisLoc, int &keyOfs, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction, pair< DiskLoc, int > &bestParent ) const;
- /* get tree shape */
- void shape(stringstream&);
+ const DiskLoc getHead(const DiskLoc& thisLoc) const;
+
+ /** get tree shape */
+ void shape(stringstream&) const;
static void a_test(IndexDetails&);
- private:
- void fixParentPtrs(const DiskLoc& thisLoc);
- void delBucket(const DiskLoc& thisLoc, IndexDetails&);
- void delKeyAtPos(const DiskLoc& thisLoc, IndexDetails& id, int p);
- BSONObj keyAt(int keyOfs) {
+ static int getLowWaterMark();
+ static int getKeyMax();
+
+ protected:
+ /**
+ * Fix parent pointers for children
+ * @firstIndex first index to modify
+ * @lastIndex last index to modify (-1 means last index is n)
+ */
+ void fixParentPtrs(const DiskLoc thisLoc, int firstIndex = 0, int lastIndex = -1) const;
+
+ /** invalidates this and thisLoc */
+ void delBucket(const DiskLoc thisLoc, const IndexDetails&);
+ /** may invalidate this and thisLoc */
+ void delKeyAtPos(const DiskLoc thisLoc, IndexDetails& id, int p, const Ordering &order);
+
+ /**
+ * May balance utilization of this bucket with a neighbor, either by
+ * merging the buckets or shifting nodes.
+ * @return true iff balancing was performed.
+ * NOTE This function may invalidate thisLoc.
+ */
+ bool mayBalanceWithNeighbors(const DiskLoc thisLoc, IndexDetails &id, const Ordering &order) const;
+
+ /** @return true if balance succeeded */
+ bool tryBalanceChildren( const DiskLoc thisLoc, int leftIndex, IndexDetails &id, const Ordering &order ) const;
+ void doBalanceChildren( const DiskLoc thisLoc, int leftIndex, IndexDetails &id, const Ordering &order );
+ void doBalanceLeftToRight( const DiskLoc thisLoc, int leftIndex, int split,
+ BtreeBucket *l, const DiskLoc lchild,
+ BtreeBucket *r, const DiskLoc rchild,
+ IndexDetails &id, const Ordering &order );
+ void doBalanceRightToLeft( const DiskLoc thisLoc, int leftIndex, int split,
+ BtreeBucket *l, const DiskLoc lchild,
+ BtreeBucket *r, const DiskLoc rchild,
+ IndexDetails &id, const Ordering &order );
+
+ /** may invalidate this and thisLoc */
+ void doMergeChildren( const DiskLoc thisLoc, int leftIndex, IndexDetails &id, const Ordering &order);
+
+ /** will invalidate this and thisLoc */
+ void replaceWithNextChild( const DiskLoc thisLoc, IndexDetails &id );
+
+ /** @return true iff left and right child can be merged into one node */
+ bool canMergeChildren( const DiskLoc &thisLoc, int leftIndex ) const;
+
+ /**
+ * @return index of the rebalanced separator; the index value is
+ * determined as if we had an array
+ * <left bucket keys array>.push( <old separator> ).concat( <right bucket keys array> )
+ * This is only expected to be called if the left and right child
+ * cannot be merged.
+ * This function is expected to be called on packed buckets, see also
+ * comments for splitPos().
+ */
+ int rebalancedSeparatorPos( const DiskLoc &thisLoc, int leftIndex ) const;
+
+ int indexInParent( const DiskLoc &thisLoc ) const;
+ BSONObj keyAt(int keyOfs) const {
return keyOfs >= n ? BSONObj() : keyNode(keyOfs).key;
}
static BtreeBucket* allocTemp(); /* caller must release with free() */
- void insertHere(DiskLoc thisLoc, int keypos,
- DiskLoc recordLoc, const BSONObj& key, const Ordering &order,
- DiskLoc lchild, DiskLoc rchild, IndexDetails&);
- int _insert(DiskLoc thisLoc, DiskLoc recordLoc,
+
+ /** split bucket */
+ void split(const DiskLoc thisLoc, int keypos,
+ const DiskLoc recordLoc, const BSONObj& key,
+ const Ordering& order, const DiskLoc lchild, const DiskLoc rchild, IndexDetails& idx);
+
+ void insertHere(const DiskLoc thisLoc, int keypos,
+ const DiskLoc recordLoc, const BSONObj& key, const Ordering &order,
+ const DiskLoc lchild, const DiskLoc rchild, IndexDetails &idx) const;
+
+ int _insert(const DiskLoc thisLoc, const DiskLoc recordLoc,
const BSONObj& key, const Ordering &order, bool dupsAllowed,
- DiskLoc lChild, DiskLoc rChild, IndexDetails&);
- bool find(const IndexDetails& idx, const BSONObj& key, DiskLoc recordLoc, const Ordering &order, int& pos, bool assertIfDup);
- bool customFind( int l, int h, const BSONObj &keyBegin, int keyBeginLen, const vector< const BSONElement * > &keyEnd, const Ordering &order, int direction, DiskLoc &thisLoc, int &keyOfs, pair< DiskLoc, int > &bestParent );
+ const DiskLoc lChild, const DiskLoc rChild, IndexDetails &idx) const;
+ bool find(const IndexDetails& idx, const BSONObj& key, const DiskLoc &recordLoc, const Ordering &order, int& pos, bool assertIfDup) const;
+ bool customFind( int l, int h, const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive, const Ordering &order, int direction, DiskLoc &thisLoc, int &keyOfs, pair< DiskLoc, int > &bestParent ) const;
static void findLargestKey(const DiskLoc& thisLoc, DiskLoc& largestLoc, int& largestKey);
- static int customBSONCmp( const BSONObj &l, const BSONObj &rBegin, int rBeginLen, const vector< const BSONElement * > &rEnd, const Ordering &o );
+ static int customBSONCmp( const BSONObj &l, const BSONObj &rBegin, int rBeginLen, bool rSup, const vector< const BSONElement * > &rEnd, const vector< bool > &rEndInclusive, const Ordering &o, int direction );
+ static void fix(const DiskLoc thisLoc, const DiskLoc child);
+
+ /** Replaces an existing key with the new specified key, splitting if necessary */
+ void setInternalKey( const DiskLoc thisLoc, int keypos,
+ const DiskLoc recordLoc, const BSONObj &key, const Ordering &order,
+ const DiskLoc lchild, const DiskLoc rchild, IndexDetails &idx);
+
+ /**
+ * Deletes the specified key, replacing it with the key immediately
+ * preceding or succeeding it in the btree. Either the left or right
+ * child of the specified key must be non null.
+ */
+ void deleteInternalKey( const DiskLoc thisLoc, int keypos, IndexDetails &id, const Ordering &order );
public:
- // simply builds and returns a dup key error message string
+ /** simply builds and returns a dup key error message string */
static string dupKeyError( const IndexDetails& idx , const BSONObj& key );
};
#pragma pack()
@@ -271,76 +449,59 @@ namespace mongo {
class BtreeCursor : public Cursor {
public:
BtreeCursor( NamespaceDetails *_d, int _idxNo, const IndexDetails&, const BSONObj &startKey, const BSONObj &endKey, bool endKeyInclusive, int direction );
-
BtreeCursor( NamespaceDetails *_d, int _idxNo, const IndexDetails& _id, const shared_ptr< FieldRangeVector > &_bounds, int _direction );
- ~BtreeCursor(){
- }
- virtual bool ok() {
- return !bucket.isNull();
- }
- bool eof() {
- return !ok();
- }
+ virtual bool ok() { return !bucket.isNull(); }
virtual bool advance();
-
virtual void noteLocation(); // updates keyAtKeyOfs...
virtual void checkLocation();
virtual bool supportGetMore() { return true; }
virtual bool supportYields() { return true; }
- /* used for multikey index traversal to avoid sending back dups. see Matcher::matches().
- if a multikey index traversal:
- if loc has already been sent, returns true.
- otherwise, marks loc as sent.
- @return true if the loc has not been seen
- */
+ /**
+ * used for multikey index traversal to avoid sending back dups. see Matcher::matches().
+ * if a multikey index traversal:
+ * if loc has already been sent, returns true.
+ * otherwise, marks loc as sent.
+ * @return true if the loc has not been seen
+ */
virtual bool getsetdup(DiskLoc loc) {
- if( multikey ) {
- pair<set<DiskLoc>::iterator, bool> p = dups.insert(loc);
+ if( _multikey ) {
+ pair<set<DiskLoc>::iterator, bool> p = _dups.insert(loc);
return !p.second;
}
return false;
}
- _KeyNode& _currKeyNode() {
+ virtual bool modifiedKeys() const { return _multikey; }
+ virtual bool isMultiKey() const { return _multikey; }
+
+ const _KeyNode& _currKeyNode() const {
assert( !bucket.isNull() );
- _KeyNode& kn = bucket.btree()->k(keyOfs);
+ const _KeyNode& kn = bucket.btree()->k(keyOfs);
assert( kn.isUsed() );
return kn;
}
- KeyNode currKeyNode() const {
+ const KeyNode currKeyNode() const {
assert( !bucket.isNull() );
return bucket.btree()->keyNode(keyOfs);
}
- virtual BSONObj currKey() const {
- return currKeyNode().key;
- }
- virtual BSONObj indexKeyPattern() {
- return indexDetails.keyPattern();
- }
+ virtual BSONObj currKey() const { return currKeyNode().key; }
+ virtual BSONObj indexKeyPattern() { return indexDetails.keyPattern(); }
virtual void aboutToDeleteBucket(const DiskLoc& b) {
if ( bucket == b )
keyOfs = -1;
}
- virtual DiskLoc currLoc() {
- return !bucket.isNull() ? _currKeyNode().recordLoc : DiskLoc();
- }
- virtual DiskLoc refLoc() {
- return currLoc();
- }
- virtual Record* _current() {
- return currLoc().rec();
- }
- virtual BSONObj current() {
- return BSONObj(_current());
- }
+ virtual DiskLoc currLoc() { return !bucket.isNull() ? _currKeyNode().recordLoc : DiskLoc(); }
+ virtual DiskLoc refLoc() { return currLoc(); }
+ virtual Record* _current() { return currLoc().rec(); }
+ virtual BSONObj current() { return BSONObj(_current()); }
virtual string toString() {
string s = string("BtreeCursor ") + indexDetails.indexName();
- if ( direction < 0 ) s += " reverse";
- if ( bounds_.get() && bounds_->size() > 1 ) s += " multi";
+ if ( _direction < 0 ) s += " reverse";
+ if ( _bounds.get() && _bounds->size() > 1 ) s += " multi";
return s;
}
@@ -351,77 +512,81 @@ namespace mongo {
virtual BSONObj prettyIndexBounds() const {
if ( !_independentFieldRanges ) {
return BSON( "start" << prettyKey( startKey ) << "end" << prettyKey( endKey ) );
- } else {
- return bounds_->obj();
+ }
+ else {
+ return _bounds->obj();
}
}
-
+
void forgetEndKey() { endKey = BSONObj(); }
virtual CoveredIndexMatcher *matcher() const { return _matcher.get(); }
-
- virtual void setMatcher( shared_ptr< CoveredIndexMatcher > matcher ) {
- _matcher = matcher;
- }
- // for debugging only
- DiskLoc getBucket() const { return bucket; }
-
+ virtual void setMatcher( shared_ptr< CoveredIndexMatcher > matcher ) { _matcher = matcher; }
+
+ virtual long long nscanned() { return _nscanned; }
+
+ /** for debugging only */
+ const DiskLoc getBucket() const { return bucket; }
+
private:
- /* Our btrees may (rarely) have "unused" keys when items are deleted.
- Skip past them.
- */
+ /**
+ * Our btrees may (rarely) have "unused" keys when items are deleted.
+ * Skip past them.
+ */
bool skipUnusedKeys( bool mayJump );
bool skipOutOfRangeKeysAndCheckEnd();
void skipAndCheck();
void checkEnd();
- // selective audits on construction
+ /** selective audits on construction */
void audit();
- // set initial bucket
+ /** set initial bucket */
void init();
- void advanceTo( const BSONObj &keyBegin, int keyBeginLen, const vector< const BSONElement * > &keyEnd);
-
+ /** if afterKey is true, we want the first key with values of the keyBegin fields greater than keyBegin */
+ void advanceTo( const BSONObj &keyBegin, int keyBeginLen, bool afterKey, const vector< const BSONElement * > &keyEnd, const vector< bool > &keyEndInclusive );
+
friend class BtreeBucket;
- set<DiskLoc> dups;
- NamespaceDetails *d;
- int idxNo;
-
+
+ set<DiskLoc> _dups;
+ NamespaceDetails * const d;
+ const int idxNo;
BSONObj startKey;
BSONObj endKey;
- bool endKeyInclusive_;
-
- bool multikey; // note this must be updated every getmore batch in case someone added a multikey...
-
+ bool _endKeyInclusive;
+ bool _multikey; // this must be updated every getmore batch in case someone added a multikey
const IndexDetails& indexDetails;
- BSONObj order;
- Ordering _ordering;
+ const BSONObj _order;
+ const Ordering _ordering;
DiskLoc bucket;
int keyOfs;
- int direction; // 1=fwd,-1=reverse
+ const int _direction; // 1=fwd,-1=reverse
BSONObj keyAtKeyOfs; // so we can tell if things moved around on us between the query and the getMore call
DiskLoc locAtKeyOfs;
- shared_ptr< FieldRangeVector > bounds_;
+ const shared_ptr< FieldRangeVector > _bounds;
auto_ptr< FieldRangeVector::Iterator > _boundsIterator;
const IndexSpec& _spec;
shared_ptr< CoveredIndexMatcher > _matcher;
bool _independentFieldRanges;
+ long long _nscanned;
};
- inline bool IndexDetails::hasKey(const BSONObj& key) {
+ inline bool IndexDetails::hasKey(const BSONObj& key) {
return head.btree()->exists(*this, head, key, Ordering::make(keyPattern()));
}
- inline bool IndexDetails::wouldCreateDup(const BSONObj& key, DiskLoc self) {
+ inline bool IndexDetails::wouldCreateDup(const BSONObj& key, DiskLoc self) {
return head.btree()->wouldCreateDup(*this, head, key, Ordering::make(keyPattern()), self);
}
- /* build btree from the bottom up */
- /* _ TODO dropDups */
+ /**
+ * build btree from the bottom up
+ * _ TODO dropDups
+ */
class BtreeBuilder {
- bool dupsAllowed;
+ bool dupsAllowed;
IndexDetails& idx;
unsigned long long n;
BSONObj keyLast;
@@ -434,18 +599,20 @@ namespace mongo {
void newBucket();
void buildNextLevel(DiskLoc);
+ void mayCommitProgressDurably();
public:
~BtreeBuilder();
BtreeBuilder(bool _dupsAllowed, IndexDetails& _idx);
- /* keys must be added in order */
+ /** keys must be added in order */
void addKey(BSONObj& key, DiskLoc loc);
- /* commit work. if not called, destructor will clean up partially completed work
- (in case exception has happened).
- */
+ /**
+ * commit work. if not called, destructor will clean up partially completed work
+ * (in case exception has happened).
+ */
void commit();
unsigned long long getn() { return n; }