diff options
Diffstat (limited to 'db/queryutil.h')
-rw-r--r-- | db/queryutil.h | 441 |
1 files changed, 395 insertions, 46 deletions
diff --git a/db/queryutil.h b/db/queryutil.h index 7d8be78..37dfa2a 100644 --- a/db/queryutil.h +++ b/db/queryutil.h @@ -22,26 +22,34 @@ namespace mongo { struct FieldBound { - BSONElement bound_; - bool inclusive_; + BSONElement _bound; + bool _inclusive; bool operator==( const FieldBound &other ) const { - return bound_.woCompare( other.bound_ ) == 0 && - inclusive_ == other.inclusive_; + return _bound.woCompare( other._bound ) == 0 && + _inclusive == other._inclusive; } + void flipInclusive() { _inclusive = !_inclusive; } }; struct FieldInterval { - FieldInterval(){} - FieldInterval( const BSONElement& e ){ - lower_.bound_ = upper_.bound_ = e; - lower_.inclusive_ = upper_.inclusive_ = true; + FieldInterval() : _cachedEquality( -1 ) {} + FieldInterval( const BSONElement& e ) : _cachedEquality( -1 ) { + _lower._bound = _upper._bound = e; + _lower._inclusive = _upper._inclusive = true; } - FieldBound lower_; - FieldBound upper_; - bool valid() const { - int cmp = lower_.bound_.woCompare( upper_.bound_, false ); - return ( cmp < 0 || ( cmp == 0 && lower_.inclusive_ && upper_.inclusive_ ) ); + FieldBound _lower; + FieldBound _upper; + bool strictValid() const { + int cmp = _lower._bound.woCompare( _upper._bound, false ); + return ( cmp < 0 || ( cmp == 0 && _lower._inclusive && _upper._inclusive ) ); } + bool equality() const { + if ( _cachedEquality == -1 ) { + _cachedEquality = ( _lower._inclusive && _upper._inclusive && _lower._bound.woCompare( _upper._bound, false ) == 0 ); + } + return _cachedEquality; + } + mutable int _cachedEquality; }; // range of a field's value that may be determined from query -- used to @@ -50,11 +58,16 @@ namespace mongo { public: FieldRange( const BSONElement &e = BSONObj().firstElement() , bool isNot=false , bool optimize=true ); const FieldRange &operator&=( const FieldRange &other ); - const FieldRange &operator|=( const FieldRange &other ); - BSONElement min() const { assert( !empty() ); return intervals_[ 0 ].lower_.bound_; } - BSONElement max() const { assert( !empty() ); return intervals_[ intervals_.size() - 1 ].upper_.bound_; } - bool minInclusive() const { assert( !empty() ); return intervals_[ 0 ].lower_.inclusive_; } - bool maxInclusive() const { assert( !empty() ); return intervals_[ intervals_.size() - 1 ].upper_.inclusive_; } + const FieldRange &operator|=( const FieldRange &other ); + // does not remove fully contained ranges (eg [1,3] - [2,2] doesn't remove anything) + // in future we can change so that an or on $in:[3] combined with $in:{$gt:2} doesn't scan 3 a second time + const FieldRange &operator-=( const FieldRange &other ); + // true iff other includes this + bool operator<=( const FieldRange &other ); + BSONElement min() const { assert( !empty() ); return _intervals[ 0 ]._lower._bound; } + BSONElement max() const { assert( !empty() ); return _intervals[ _intervals.size() - 1 ]._upper._bound; } + bool minInclusive() const { assert( !empty() ); return _intervals[ 0 ]._lower._inclusive; } + bool maxInclusive() const { assert( !empty() ); return _intervals[ _intervals.size() - 1 ]._upper._inclusive; } bool equality() const { return !empty() && @@ -62,20 +75,51 @@ namespace mongo { maxInclusive() && minInclusive(); } + bool inQuery() const { + if ( equality() ) { + return true; + } + for( vector< FieldInterval >::const_iterator i = _intervals.begin(); i != _intervals.end(); ++i ) { + if ( !i->equality() ) { + return false; + } + } + return true; + } bool nontrivial() const { return ! empty() && ( minKey.firstElement().woCompare( min(), false ) != 0 || maxKey.firstElement().woCompare( max(), false ) != 0 ); } - bool empty() const { return intervals_.empty(); } - const vector< FieldInterval > &intervals() const { return intervals_; } + bool empty() const { return _intervals.empty(); } + void makeEmpty() { _intervals.clear(); } + const vector< FieldInterval > &intervals() const { return _intervals; } string getSpecial() const { return _special; } - + void setExclusiveBounds() { + for( vector< FieldInterval >::iterator i = _intervals.begin(); i != _intervals.end(); ++i ) { + i->_lower._inclusive = false; + i->_upper._inclusive = false; + } + } + // constructs a range which is the reverse of the current one + // note - the resulting intervals may not be strictValid() + void reverse( FieldRange &ret ) const { + assert( _special.empty() ); + ret._intervals.clear(); + ret._objData = _objData; + for( vector< FieldInterval >::const_reverse_iterator i = _intervals.rbegin(); i != _intervals.rend(); ++i ) { + FieldInterval fi; + fi._lower = i->_upper; + fi._upper = i->_lower; + ret._intervals.push_back( fi ); + } + } private: BSONObj addObj( const BSONObj &o ); - vector< FieldInterval > intervals_; - vector< BSONObj > objData_; + void finishOperation( const vector< FieldInterval > &newIntervals, const FieldRange &other ); + vector< FieldInterval > _intervals; + vector< BSONObj > _objData; string _special; }; @@ -101,10 +145,10 @@ namespace mongo { return !operator==( other ); } bool operator<( const QueryPattern &other ) const { - map< string, Type >::const_iterator i = fieldTypes_.begin(); - map< string, Type >::const_iterator j = other.fieldTypes_.begin(); - while( i != fieldTypes_.end() ) { - if ( j == other.fieldTypes_.end() ) + map< string, Type >::const_iterator i = _fieldTypes.begin(); + map< string, Type >::const_iterator j = other._fieldTypes.begin(); + while( i != _fieldTypes.end() ) { + if ( j == other._fieldTypes.end() ) return false; if ( i->first < j->first ) return true; @@ -117,14 +161,14 @@ namespace mongo { ++i; ++j; } - if ( j != other.fieldTypes_.end() ) + if ( j != other._fieldTypes.end() ) return true; - return sort_.woCompare( other.sort_ ) < 0; + return _sort.woCompare( other._sort ) < 0; } private: QueryPattern() {} void setSort( const BSONObj sort ) { - sort_ = normalizeSort( sort ); + _sort = normalizeSort( sort ); } BSONObj static normalizeSort( const BSONObj &spec ) { if ( spec.isEmpty() ) @@ -140,73 +184,376 @@ namespace mongo { } return b.obj(); } - map< string, Type > fieldTypes_; - BSONObj sort_; + map< string, Type > _fieldTypes; + BSONObj _sort; }; + + // a BoundList contains intervals specified by inclusive start + // and end bounds. The intervals should be nonoverlapping and occur in + // the specified direction of traversal. For example, given a simple index {i:1} + // and direction +1, one valid BoundList is: (1, 2); (4, 6). The same BoundList + // would be valid for index {i:-1} with direction -1. + typedef vector< pair< BSONObj, BSONObj > > BoundList; // ranges of fields' value that may be determined from query -- used to // determine index limits class FieldRangeSet { public: + friend class FieldRangeOrSet; + friend class FieldRangeVector; FieldRangeSet( const char *ns, const BSONObj &query , bool optimize=true ); + bool hasRange( const char *fieldName ) const { + map< string, FieldRange >::const_iterator f = _ranges.find( fieldName ); + return f != _ranges.end(); + } const FieldRange &range( const char *fieldName ) const { - map< string, FieldRange >::const_iterator f = ranges_.find( fieldName ); - if ( f == ranges_.end() ) + map< string, FieldRange >::const_iterator f = _ranges.find( fieldName ); + if ( f == _ranges.end() ) + return trivialRange(); + return f->second; + } + FieldRange &range( const char *fieldName ) { + map< string, FieldRange >::iterator f = _ranges.find( fieldName ); + if ( f == _ranges.end() ) return trivialRange(); - return f->second; + return f->second; } int nNontrivialRanges() const { int count = 0; - for( map< string, FieldRange >::const_iterator i = ranges_.begin(); i != ranges_.end(); ++i ) + for( map< string, FieldRange >::const_iterator i = _ranges.begin(); i != _ranges.end(); ++i ) if ( i->second.nontrivial() ) ++count; return count; } - const char *ns() const { return ns_; } - BSONObj query() const { return query_; } + const char *ns() const { return _ns; } // if fields is specified, order fields of returned object to match those of 'fields' BSONObj simplifiedQuery( const BSONObj &fields = BSONObj() ) const; bool matchPossible() const { - for( map< string, FieldRange >::const_iterator i = ranges_.begin(); i != ranges_.end(); ++i ) + for( map< string, FieldRange >::const_iterator i = _ranges.begin(); i != _ranges.end(); ++i ) if ( i->second.empty() ) return false; return true; } QueryPattern pattern( const BSONObj &sort = BSONObj() ) const; - BoundList indexBounds( const BSONObj &keyPattern, int direction ) const; string getSpecial() const; + const FieldRangeSet &operator-=( const FieldRangeSet &other ) { + int nUnincluded = 0; + string unincludedKey; + map< string, FieldRange >::iterator i = _ranges.begin(); + map< string, FieldRange >::const_iterator j = other._ranges.begin(); + while( nUnincluded < 2 && i != _ranges.end() && j != other._ranges.end() ) { + int cmp = i->first.compare( j->first ); + if ( cmp == 0 ) { + if ( i->second <= j->second ) { + // nothing + } else { + ++nUnincluded; + unincludedKey = i->first; + } + ++i; + ++j; + } else if ( cmp < 0 ) { + ++i; + } else { + // other has a bound we don't, nothing can be done + return *this; + } + } + if ( j != other._ranges.end() ) { + // other has a bound we don't, nothing can be done + return *this; + } + if ( nUnincluded > 1 ) { + return *this; + } + if ( nUnincluded == 0 ) { + makeEmpty(); + return *this; + } + // nUnincluded == 1 + _ranges[ unincludedKey ] -= other._ranges[ unincludedKey ]; + appendQueries( other ); + return *this; + } + const FieldRangeSet &operator&=( const FieldRangeSet &other ) { + map< string, FieldRange >::iterator i = _ranges.begin(); + map< string, FieldRange >::const_iterator j = other._ranges.begin(); + while( i != _ranges.end() && j != other._ranges.end() ) { + int cmp = i->first.compare( j->first ); + if ( cmp == 0 ) { + i->second &= j->second; + ++i; + ++j; + } else if ( cmp < 0 ) { + ++i; + } else { + _ranges[ j->first ] = j->second; + ++j; + } + } + while( j != other._ranges.end() ) { + _ranges[ j->first ] = j->second; + ++j; + } + appendQueries( other ); + return *this; + } + // TODO get rid of this + BoundList indexBounds( const BSONObj &keyPattern, int direction ) const; private: + void appendQueries( const FieldRangeSet &other ) { + for( vector< BSONObj >::const_iterator i = other._queries.begin(); i != other._queries.end(); ++i ) { + _queries.push_back( *i ); + } + } + void makeEmpty() { + for( map< string, FieldRange >::iterator i = _ranges.begin(); i != _ranges.end(); ++i ) { + i->second.makeEmpty(); + } + } + void processQueryField( const BSONElement &e, bool optimize ); void processOpElement( const char *fieldName, const BSONElement &f, bool isNot, bool optimize ); static FieldRange *trivialRange_; static FieldRange &trivialRange(); - mutable map< string, FieldRange > ranges_; - const char *ns_; - BSONObj query_; + mutable map< string, FieldRange > _ranges; + const char *_ns; + // make sure memory for FieldRange BSONElements is owned + vector< BSONObj > _queries; }; + class FieldRangeVector { + public: + FieldRangeVector( const FieldRangeSet &frs, const BSONObj &keyPattern, int direction ) + :_keyPattern( keyPattern ), _direction( direction >= 0 ? 1 : -1 ) + { + _queries = frs._queries; + BSONObjIterator i( _keyPattern ); + while( i.more() ) { + BSONElement e = i.next(); + int number = (int) e.number(); // returns 0.0 if not numeric + bool forward = ( ( number >= 0 ? 1 : -1 ) * ( direction >= 0 ? 1 : -1 ) > 0 ); + if ( forward ) { + _ranges.push_back( frs.range( e.fieldName() ) ); + } else { + _ranges.push_back( FieldRange() ); + frs.range( e.fieldName() ).reverse( _ranges.back() ); + } + assert( !_ranges.back().empty() ); + } + uassert( 13385, "combinatorial limit of $in partitioning of result set exceeded", size() < 1000000 ); + } + long long size() { + long long ret = 1; + for( vector< FieldRange >::const_iterator i = _ranges.begin(); i != _ranges.end(); ++i ) { + ret *= i->intervals().size(); + } + return ret; + } + BSONObj startKey() const { + BSONObjBuilder b; + for( vector< FieldRange >::const_iterator i = _ranges.begin(); i != _ranges.end(); ++i ) { + const FieldInterval &fi = i->intervals().front(); + b.appendAs( fi._lower._bound, "" ); + } + return b.obj(); + } + BSONObj endKey() const { + BSONObjBuilder b; + for( vector< FieldRange >::const_iterator i = _ranges.begin(); i != _ranges.end(); ++i ) { + const FieldInterval &fi = i->intervals().back(); + b.appendAs( fi._upper._bound, "" ); + } + return b.obj(); + } + BSONObj obj() const { + BSONObjBuilder b; + BSONObjIterator k( _keyPattern ); + for( int i = 0; i < (int)_ranges.size(); ++i ) { + BSONArrayBuilder a( b.subarrayStart( k.next().fieldName() ) ); + for( vector< FieldInterval >::const_iterator j = _ranges[ i ].intervals().begin(); + j != _ranges[ i ].intervals().end(); ++j ) { + a << BSONArray( BSON_ARRAY( j->_lower._bound << j->_upper._bound ).clientReadable() ); + } + a.done(); + } + return b.obj(); + } + bool matches( const BSONObj &obj ) const; + class Iterator { + public: + Iterator( const FieldRangeVector &v ) : _v( v ), _i( _v._ranges.size(), -1 ), _cmp( _v._ranges.size(), 0 ), _superlative( _v._ranges.size(), 0 ) { + static BSONObj minObj = minObject(); + static BSONElement minElt = minObj.firstElement(); + static BSONObj maxObj = maxObject(); + static BSONElement maxElt = maxObj.firstElement(); + BSONObjIterator i( _v._keyPattern ); + for( int j = 0; j < (int)_superlative.size(); ++j ) { + int number = (int) i.next().number(); + bool forward = ( ( number >= 0 ? 1 : -1 ) * ( _v._direction >= 0 ? 1 : -1 ) > 0 ); + _superlative[ j ] = forward ? &maxElt : &minElt; + } + } + static BSONObj minObject() { + BSONObjBuilder b; + b.appendMinKey( "" ); + return b.obj(); + } + static BSONObj maxObject() { + BSONObjBuilder b; + b.appendMaxKey( "" ); + return b.obj(); + } + bool advance() { + int i = _i.size() - 1; + while( i >= 0 && _i[ i ] >= ( (int)_v._ranges[ i ].intervals().size() - 1 ) ) { + --i; + } + if( i >= 0 ) { + _i[ i ]++; + for( unsigned j = i + 1; j < _i.size(); ++j ) { + _i[ j ] = 0; + } + } else { + _i[ 0 ] = _v._ranges[ 0 ].intervals().size(); + } + return ok(); + } + // return value + // -2 end of iteration + // -1 no skipping + // >= 0 skip parameter + int advance( const BSONObj &curr ); + const vector< const BSONElement * > &cmp() const { return _cmp; } + void setZero( int i ) { + for( int j = i; j < (int)_i.size(); ++j ) { + _i[ j ] = 0; + } + } + void setMinus( int i ) { + for( int j = i; j < (int)_i.size(); ++j ) { + _i[ j ] = -1; + } + } + bool ok() { + return _i[ 0 ] < (int)_v._ranges[ 0 ].intervals().size(); + } + BSONObj startKey() { + BSONObjBuilder b; + for( int unsigned i = 0; i < _i.size(); ++i ) { + const FieldInterval &fi = _v._ranges[ i ].intervals()[ _i[ i ] ]; + b.appendAs( fi._lower._bound, "" ); + } + return b.obj(); + } + // temp + BSONObj endKey() { + BSONObjBuilder b; + for( int unsigned i = 0; i < _i.size(); ++i ) { + const FieldInterval &fi = _v._ranges[ i ].intervals()[ _i[ i ] ]; + b.appendAs( fi._upper._bound, "" ); + } + return b.obj(); + } + // check + private: + const FieldRangeVector &_v; + vector< int > _i; + vector< const BSONElement* > _cmp; + vector< const BSONElement* > _superlative; + }; + private: + int matchingLowElement( const BSONElement &e, int i, bool direction ) const; + bool matchesElement( const BSONElement &e, int i, bool direction ) const; + vector< FieldRange > _ranges; + BSONObj _keyPattern; + int _direction; + vector< BSONObj > _queries; // make sure mem owned + }; + + // generages FieldRangeSet objects, accounting for or clauses + class FieldRangeOrSet { + public: + FieldRangeOrSet( const char *ns, const BSONObj &query , bool optimize=true ); + // if there's a useless or clause, we won't use or ranges to help with scanning + bool orFinished() const { return _orFound && _orSets.empty(); } + // removes first or clause, and removes the field ranges it covers from all subsequent or clauses + // this could invalidate the result of the last topFrs() + void popOrClause() { + massert( 13274, "no or clause to pop", !orFinished() ); + const FieldRangeSet &toPop = _orSets.front(); + list< FieldRangeSet >::iterator i = _orSets.begin(); + ++i; + while( i != _orSets.end() ) { + *i -= toPop; + if( !i->matchPossible() ) { + i = _orSets.erase( i ); + } else { + ++i; + } + } + _oldOrSets.push_front( toPop ); + _orSets.pop_front(); + } + FieldRangeSet *topFrs() const { + FieldRangeSet *ret = new FieldRangeSet( _baseSet ); + if (_orSets.size()){ + *ret &= _orSets.front(); + } + return ret; + } + void allClausesSimplified( vector< BSONObj > &ret ) const { + for( list< FieldRangeSet >::const_iterator i = _orSets.begin(); i != _orSets.end(); ++i ) { + if ( i->matchPossible() ) { + ret.push_back( i->simplifiedQuery() ); + } + } + } + string getSpecial() const { return _baseSet.getSpecial(); } + + bool moreOrClauses() const { return !_orSets.empty(); } + private: + FieldRangeSet _baseSet; + list< FieldRangeSet > _orSets; + list< FieldRangeSet > _oldOrSets; // make sure memory is owned + bool _orFound; + }; + /** used for doing field limiting */ class FieldMatcher { public: - - FieldMatcher(bool include=false) : _include(include){} + FieldMatcher() + : _include(true) + , _special(false) + , _includeID(true) + , _skip(0) + , _limit(-1) + {} void add( const BSONObj& o ); void append( BSONObjBuilder& b , const BSONElement& e ) const; BSONObj getSpec() const; + bool includeID() { return _includeID; } private: void add( const string& field, bool include ); - void appendArray( BSONObjBuilder& b , const BSONObj& a ) const; + void add( const string& field, int skip, int limit ); + void appendArray( BSONObjBuilder& b , const BSONObj& a , bool nested=false) const; bool _include; // true if default at this level is to include + bool _special; // true if this level can't be skipped or included without recursing //TODO: benchmark vector<pair> vs map typedef map<string, boost::shared_ptr<FieldMatcher> > FieldMap; FieldMap _fields; BSONObj _source; + bool _includeID; + + // used for $slice operator + int _skip; + int _limit; }; /** returns a string that when used as a matcher, would match a super set of regex() @@ -220,4 +567,6 @@ namespace mongo { /** returns the upper bound of a query that matches prefix */ string simpleRegexEnd( string prefix ); + long long applySkipLimit( long long num , const BSONObj& cmd ); + } // namespace mongo |