summaryrefslogtreecommitdiff
path: root/db/queryutil.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'db/queryutil.cpp')
-rw-r--r--db/queryutil.cpp840
1 files changed, 407 insertions, 433 deletions
diff --git a/db/queryutil.cpp b/db/queryutil.cpp
index 2153046..1cd750b 100644
--- a/db/queryutil.cpp
+++ b/db/queryutil.cpp
@@ -23,111 +23,119 @@
#include "queryoptimizer.h"
#include "../util/unittest.h"
#include "dbmessage.h"
+#include "indexkey.h"
namespace mongo {
extern BSONObj staticNull;
-
+
/** returns a string that when used as a matcher, would match a super set of regex()
returns "" for complex regular expressions
used to optimize queries in some simple regex cases that start with '^'
if purePrefix != NULL, sets it to whether the regex can be converted to a range query
*/
- string simpleRegex(const char* regex, const char* flags, bool* purePrefix){
+ string simpleRegex(const char* regex, const char* flags, bool* purePrefix) {
string r = "";
if (purePrefix) *purePrefix = false;
bool multilineOK;
- if ( regex[0] == '\\' && regex[1] == 'A'){
+ if ( regex[0] == '\\' && regex[1] == 'A') {
multilineOK = true;
regex += 2;
- } else if (regex[0] == '^') {
+ }
+ else if (regex[0] == '^') {
multilineOK = false;
regex += 1;
- } else {
+ }
+ else {
return r;
}
bool extended = false;
- while (*flags){
- switch (*(flags++)){
- case 'm': // multiline
- if (multilineOK)
- continue;
- else
- return r;
- case 'x': // extended
- extended = true;
- break;
- default:
- return r; // cant use index
+ while (*flags) {
+ switch (*(flags++)) {
+ case 'm': // multiline
+ if (multilineOK)
+ continue;
+ else
+ return r;
+ case 'x': // extended
+ extended = true;
+ break;
+ default:
+ return r; // cant use index
}
}
stringstream ss;
- while(*regex){
+ while(*regex) {
char c = *(regex++);
- if ( c == '*' || c == '?' ){
+ if ( c == '*' || c == '?' ) {
// These are the only two symbols that make the last char optional
r = ss.str();
r = r.substr( 0 , r.size() - 1 );
return r; //breaking here fails with /^a?/
- } else if (c == '\\'){
+ }
+ else if (c == '\\') {
// slash followed by non-alphanumeric represents the following char
c = *(regex++);
if ((c >= 'A' && c <= 'Z') ||
- (c >= 'a' && c <= 'z') ||
- (c >= '0' && c <= '0') ||
- (c == '\0'))
- {
+ (c >= 'a' && c <= 'z') ||
+ (c >= '0' && c <= '0') ||
+ (c == '\0')) {
r = ss.str();
break;
- } else {
+ }
+ else {
ss << c;
}
- } else if (strchr("^$.[|()+{", c)){
+ }
+ else if (strchr("^$.[|()+{", c)) {
// list of "metacharacters" from man pcrepattern
r = ss.str();
break;
- } else if (extended && c == '#'){
+ }
+ else if (extended && c == '#') {
// comment
r = ss.str();
break;
- } else if (extended && isspace(c)){
+ }
+ else if (extended && isspace(c)) {
continue;
- } else {
+ }
+ else {
// self-matching char
ss << c;
}
}
- if ( r.empty() && *regex == 0 ){
+ if ( r.empty() && *regex == 0 ) {
r = ss.str();
if (purePrefix) *purePrefix = !r.empty();
}
return r;
}
- inline string simpleRegex(const BSONElement& e){
- switch(e.type()){
- case RegEx:
- return simpleRegex(e.regex(), e.regexFlags());
- case Object:{
- BSONObj o = e.embeddedObject();
- return simpleRegex(o["$regex"].valuestrsafe(), o["$options"].valuestrsafe());
- }
- default: assert(false); return ""; //return squashes compiler warning
+ inline string simpleRegex(const BSONElement& e) {
+ switch(e.type()) {
+ case RegEx:
+ return simpleRegex(e.regex(), e.regexFlags());
+ case Object: {
+ BSONObj o = e.embeddedObject();
+ return simpleRegex(o["$regex"].valuestrsafe(), o["$options"].valuestrsafe());
+ }
+ default: assert(false); return ""; //return squashes compiler warning
}
}
string simpleRegexEnd( string regex ) {
++regex[ regex.length() - 1 ];
return regex;
- }
-
-
+ }
+
+
FieldRange::FieldRange( const BSONElement &e, bool isNot, bool optimize ) {
// NOTE with $not, we could potentially form a complementary set of intervals.
if ( !isNot && !e.eoo() && e.type() != RegEx && e.getGtLtOp() == BSONObj::opIN ) {
@@ -139,7 +147,8 @@ namespace mongo {
BSONElement ie = i.next();
if ( ie.type() == RegEx ) {
regexes.push_back( FieldRange( ie, false, optimize ) );
- } else {
+ }
+ else {
vals.insert( ie );
}
}
@@ -149,22 +158,22 @@ namespace mongo {
for( vector< FieldRange >::const_iterator i = regexes.begin(); i != regexes.end(); ++i )
*this |= *i;
-
+
return;
}
-
- if ( e.type() == Array && e.getGtLtOp() == BSONObj::Equality ){
-
+
+ if ( e.type() == Array && e.getGtLtOp() == BSONObj::Equality ) {
+
_intervals.push_back( FieldInterval(e) );
-
+
const BSONElement& temp = e.embeddedObject().firstElement();
- if ( ! temp.eoo() ){
+ if ( ! temp.eoo() ) {
if ( temp < e )
_intervals.insert( _intervals.begin() , temp );
else
_intervals.push_back( FieldInterval(temp) );
}
-
+
return;
}
@@ -181,17 +190,19 @@ namespace mongo {
if ( e.eoo() )
return;
+ int op = e.getGtLtOp();
if ( e.type() == RegEx
- || (e.type() == Object && !e.embeddedObject()["$regex"].eoo())
- )
- {
+ || (e.type() == Object && !e.embeddedObject()["$regex"].eoo())
+ ) {
+ uassert( 13454, "invalid regular expression operator", op == BSONObj::Equality || op == BSONObj::opREGEX );
if ( !isNot ) { // no optimization for negated regex - we could consider creating 2 intervals comprising all nonmatching prefixes
const string r = simpleRegex(e);
if ( r.size() ) {
lower = addObj( BSON( "" << r ) ).firstElement();
upper = addObj( BSON( "" << simpleRegexEnd( r ) ) ).firstElement();
upperInclusive = false;
- } else {
+ }
+ else {
BSONObjBuilder b1(32), b2(32);
b1.appendMinForType( "" , String );
lower = addObj( b1.obj() ).firstElement();
@@ -202,10 +213,11 @@ namespace mongo {
}
// regex matches self - regex type > string type
- if (e.type() == RegEx){
+ if (e.type() == RegEx) {
BSONElement re = addObj( BSON( "" << e ) ).firstElement();
_intervals.push_back( FieldInterval(re) );
- } else {
+ }
+ else {
BSONObj orig = e.embeddedObject();
BSONObjBuilder b;
b.appendRegex("", orig["$regex"].valuestrsafe(), orig["$options"].valuestrsafe());
@@ -216,38 +228,53 @@ namespace mongo {
}
return;
}
- int op = e.getGtLtOp();
if ( isNot ) {
switch( op ) {
- case BSONObj::Equality:
- case BSONObj::opALL:
- case BSONObj::opMOD: // NOTE for mod and type, we could consider having 1-2 intervals comprising the complementary types (multiple intervals already possible with $in)
- case BSONObj::opTYPE:
- op = BSONObj::NE; // no bound calculation
- break;
- case BSONObj::NE:
- op = BSONObj::Equality;
- break;
- case BSONObj::LT:
- op = BSONObj::GTE;
- break;
- case BSONObj::LTE:
- op = BSONObj::GT;
- break;
- case BSONObj::GT:
- op = BSONObj::LTE;
- break;
- case BSONObj::GTE:
- op = BSONObj::LT;
- break;
- default: // otherwise doesn't matter
- break;
+ case BSONObj::Equality:
+ return;
+// op = BSONObj::NE;
+// break;
+ case BSONObj::opALL:
+ case BSONObj::opMOD: // NOTE for mod and type, we could consider having 1-2 intervals comprising the complementary types (multiple intervals already possible with $in)
+ case BSONObj::opTYPE:
+ // no bound calculation
+ return;
+ case BSONObj::NE:
+ op = BSONObj::Equality;
+ break;
+ case BSONObj::LT:
+ op = BSONObj::GTE;
+ break;
+ case BSONObj::LTE:
+ op = BSONObj::GT;
+ break;
+ case BSONObj::GT:
+ op = BSONObj::LTE;
+ break;
+ case BSONObj::GTE:
+ op = BSONObj::LT;
+ break;
+ default: // otherwise doesn't matter
+ break;
}
}
switch( op ) {
case BSONObj::Equality:
lower = upper = e;
break;
+ case BSONObj::NE: {
+ // this will invalidate the upper/lower references above
+ _intervals.push_back( FieldInterval() );
+ // optimize doesn't make sense for negative ranges
+ _intervals[ 0 ]._upper._bound = e;
+ _intervals[ 0 ]._upper._inclusive = false;
+ _intervals[ 1 ]._lower._bound = e;
+ _intervals[ 1 ]._lower._inclusive = false;
+ _intervals[ 1 ]._upper._bound = maxKey.firstElement();
+ _intervals[ 1 ]._upper._inclusive = true;
+ optimize = false; // don't run optimize code below
+ break;
+ }
case BSONObj::LT:
upperInclusive = false;
case BSONObj::LTE:
@@ -262,9 +289,9 @@ namespace mongo {
massert( 10370 , "$all requires array", e.type() == Array );
BSONObjIterator i( e.embeddedObject() );
bool bound = false;
- while ( i.more() ){
+ while ( i.more() ) {
BSONElement x = i.next();
- if ( x.type() == Object && x.embeddedObject().firstElement().getGtLtOp() == BSONObj::opELEM_MATCH ){
+ if ( x.type() == Object && x.embeddedObject().firstElement().getGtLtOp() == BSONObj::opELEM_MATCH ) {
// taken care of elsewhere
}
else if ( x.type() != RegEx ) {
@@ -299,7 +326,7 @@ namespace mongo {
BSONObjBuilder b;
b.appendMaxForType( "" , NumberDouble );
upper = addObj( b.obj() ).firstElement();
- }
+ }
break;
}
case BSONObj::opTYPE: {
@@ -314,7 +341,7 @@ namespace mongo {
b.appendMaxForType( "" , t );
upper = addObj( b.obj() ).firstElement();
}
-
+
break;
}
case BSONObj::opREGEX:
@@ -332,14 +359,14 @@ namespace mongo {
default:
break;
}
-
- if ( optimize ){
- if ( lower.type() != MinKey && upper.type() == MaxKey && lower.isSimpleType() ){ // TODO: get rid of isSimpleType
+
+ if ( optimize ) {
+ if ( lower.type() != MinKey && upper.type() == MaxKey && lower.isSimpleType() ) { // TODO: get rid of isSimpleType
BSONObjBuilder b;
b.appendMaxForType( lower.fieldName() , lower.type() );
upper = addObj( b.obj() ).firstElement();
}
- else if ( lower.type() == MinKey && upper.type() != MaxKey && upper.isSimpleType() ){ // TODO: get rid of isSimpleType
+ else if ( lower.type() == MinKey && upper.type() != MaxKey && upper.isSimpleType() ) { // TODO: get rid of isSimpleType
BSONObjBuilder b;
b.appendMinForType( upper.fieldName() , upper.type() );
lower = addObj( b.obj() ).firstElement();
@@ -355,7 +382,7 @@ namespace mongo {
if ( _special.size() == 0 && other._special.size() )
_special = other._special;
}
-
+
// as called, these functions find the max/min of a bound in the
// opposite direction, so inclusive bounds are considered less
// superlative
@@ -378,41 +405,46 @@ namespace mongo {
result._upper = minFieldBound( one._upper, two._upper );
return result.strictValid();
}
-
- // NOTE Not yet tested for complex $or bounds, just for simple bounds generated by $in
+
const FieldRange &FieldRange::operator&=( const FieldRange &other ) {
vector< FieldInterval > newIntervals;
vector< FieldInterval >::const_iterator i = _intervals.begin();
vector< FieldInterval >::const_iterator j = other._intervals.begin();
while( i != _intervals.end() && j != other._intervals.end() ) {
FieldInterval overlap;
- if ( fieldIntervalOverlap( *i, *j, overlap ) )
+ if ( fieldIntervalOverlap( *i, *j, overlap ) ) {
newIntervals.push_back( overlap );
- if ( i->_upper == minFieldBound( i->_upper, j->_upper ) )
+ }
+ if ( i->_upper == minFieldBound( i->_upper, j->_upper ) ) {
++i;
- else
- ++j;
+ }
+ else {
+ ++j;
+ }
}
finishOperation( newIntervals, other );
return *this;
}
-
+
void handleInterval( const FieldInterval &lower, FieldBound &low, FieldBound &high, vector< FieldInterval > &newIntervals ) {
if ( low._bound.eoo() ) {
low = lower._lower; high = lower._upper;
- } else {
- if ( high._bound.woCompare( lower._lower._bound, false ) < 0 ) { // when equal but neither inclusive, just assume they overlap, since current btree scanning code just as efficient either way
+ }
+ else {
+ int cmp = high._bound.woCompare( lower._lower._bound, false );
+ if ( ( cmp < 0 ) || ( cmp == 0 && !high._inclusive && !lower._lower._inclusive ) ) {
FieldInterval tmp;
tmp._lower = low;
tmp._upper = high;
newIntervals.push_back( tmp );
- low = lower._lower; high = lower._upper;
- } else {
+ low = lower._lower; high = lower._upper;
+ }
+ else {
high = lower._upper;
}
- }
+ }
}
-
+
const FieldRange &FieldRange::operator|=( const FieldRange &other ) {
vector< FieldInterval > newIntervals;
FieldBound low;
@@ -424,90 +456,107 @@ namespace mongo {
if ( ( cmp == 0 && i->_lower._inclusive ) || cmp < 0 ) {
handleInterval( *i, low, high, newIntervals );
++i;
- } else {
+ }
+ else {
handleInterval( *j, low, high, newIntervals );
++j;
- }
+ }
}
while( i != _intervals.end() ) {
handleInterval( *i, low, high, newIntervals );
- ++i;
+ ++i;
}
while( j != other._intervals.end() ) {
handleInterval( *j, low, high, newIntervals );
- ++j;
+ ++j;
}
FieldInterval tmp;
tmp._lower = low;
tmp._upper = high;
- newIntervals.push_back( tmp );
+ newIntervals.push_back( tmp );
finishOperation( newIntervals, other );
- return *this;
+ return *this;
}
-
+
const FieldRange &FieldRange::operator-=( const FieldRange &other ) {
+ vector< FieldInterval > newIntervals;
vector< FieldInterval >::iterator i = _intervals.begin();
vector< FieldInterval >::const_iterator j = other._intervals.begin();
while( i != _intervals.end() && j != other._intervals.end() ) {
int cmp = i->_lower._bound.woCompare( j->_lower._bound, false );
if ( cmp < 0 ||
- ( cmp == 0 && i->_lower._inclusive && !j->_lower._inclusive ) ) {
+ ( cmp == 0 && i->_lower._inclusive && !j->_lower._inclusive ) ) {
int cmp2 = i->_upper._bound.woCompare( j->_lower._bound, false );
if ( cmp2 < 0 ) {
+ newIntervals.push_back( *i );
++i;
- } else if ( cmp2 == 0 ) {
- if ( i->_upper._inclusive && j->_lower._inclusive ) {
- i->_upper._inclusive = false;
+ }
+ else if ( cmp2 == 0 ) {
+ newIntervals.push_back( *i );
+ if ( newIntervals.back()._upper._inclusive && j->_lower._inclusive ) {
+ newIntervals.back()._upper._inclusive = false;
}
++i;
- } else {
+ }
+ else {
+ newIntervals.push_back( *i );
+ newIntervals.back()._upper = j->_lower;
+ newIntervals.back()._upper.flipInclusive();
int cmp3 = i->_upper._bound.woCompare( j->_upper._bound, false );
if ( cmp3 < 0 ||
- ( cmp3 == 0 && ( !i->_upper._inclusive || j->_upper._inclusive ) ) ) {
- i->_upper = j->_lower;
- i->_upper.flipInclusive();
+ ( cmp3 == 0 && ( !i->_upper._inclusive || j->_upper._inclusive ) ) ) {
++i;
- } else {
+ }
+ else {
+ i->_lower = j->_upper;
+ i->_lower.flipInclusive();
++j;
}
}
- } else {
+ }
+ else {
int cmp2 = i->_lower._bound.woCompare( j->_upper._bound, false );
if ( cmp2 > 0 ||
- ( cmp2 == 0 && ( !i->_lower._inclusive || !j->_lower._inclusive ) ) ) {
+ ( cmp2 == 0 && ( !i->_lower._inclusive || !j->_upper._inclusive ) ) ) {
++j;
- } else {
+ }
+ else {
int cmp3 = i->_upper._bound.woCompare( j->_upper._bound, false );
if ( cmp3 < 0 ||
- ( cmp3 == 0 && ( !i->_upper._inclusive || j->_upper._inclusive ) ) ) {
- i = _intervals.erase( i );
- } else {
+ ( cmp3 == 0 && ( !i->_upper._inclusive || j->_upper._inclusive ) ) ) {
+ ++i;
+ }
+ else {
i->_lower = j->_upper;
- i->_lower.flipInclusive();
+ i->_lower.flipInclusive();
++j;
}
- }
+ }
}
}
- finishOperation( _intervals, other );
- return *this;
+ while( i != _intervals.end() ) {
+ newIntervals.push_back( *i );
+ ++i;
+ }
+ finishOperation( newIntervals, other );
+ return *this;
}
-
+
// TODO write a proper implementation that doesn't do a full copy
bool FieldRange::operator<=( const FieldRange &other ) {
FieldRange temp = *this;
temp -= other;
return temp.empty();
}
-
+
BSONObj FieldRange::addObj( const BSONObj &o ) {
_objData.push_back( o );
return o;
}
-
+
string FieldRangeSet::getSpecial() const {
string s = "";
- for ( map<string,FieldRange>::iterator i=_ranges.begin(); i!=_ranges.end(); i++ ){
+ for ( map<string,FieldRange>::iterator i=_ranges.begin(); i!=_ranges.end(); i++ ) {
if ( i->second.getSpecial().size() == 0 )
continue;
uassert( 13033 , "can't have 2 special fields" , s.size() == 0 );
@@ -533,34 +582,35 @@ namespace mongo {
}
if ( op2 == BSONObj::opELEM_MATCH ) {
BSONObjIterator k( g.embeddedObjectUserCheck() );
- while ( k.more() ){
+ while ( k.more() ) {
BSONElement h = k.next();
StringBuilder buf(32);
buf << fieldName << "." << h.fieldName();
string fullname = buf.str();
-
+
int op3 = getGtLtOp( h );
- if ( op3 == BSONObj::Equality ){
+ if ( op3 == BSONObj::Equality ) {
_ranges[ fullname ] &= FieldRange( h , isNot , optimize );
}
else {
BSONObjIterator l( h.embeddedObject() );
- while ( l.more() ){
+ while ( l.more() ) {
_ranges[ fullname ] &= FieldRange( l.next() , isNot , optimize );
}
}
- }
- } else {
+ }
+ }
+ else {
_ranges[ fieldName ] &= FieldRange( f , isNot , optimize );
- }
+ }
}
-
+
void FieldRangeSet::processQueryField( const BSONElement &e, bool optimize ) {
bool equality = ( getGtLtOp( e ) == BSONObj::Equality );
if ( equality && e.type() == Object ) {
equality = ( strcmp( e.embeddedObject().firstElement().fieldName(), "$not" ) != 0 );
}
-
+
if ( equality || ( e.type() == Object && !e.embeddedObject()[ "$regex" ].eoo() ) ) {
_ranges[ e.fieldName() ] &= FieldRange( e , false , optimize );
}
@@ -570,67 +620,69 @@ namespace mongo {
BSONElement f = j.next();
if ( strcmp( f.fieldName(), "$not" ) == 0 ) {
switch( f.type() ) {
- case Object: {
- BSONObjIterator k( f.embeddedObject() );
- while( k.more() ) {
- BSONElement g = k.next();
- uassert( 13034, "invalid use of $not", g.getGtLtOp() != BSONObj::Equality );
- processOpElement( e.fieldName(), g, true, optimize );
- }
- break;
+ case Object: {
+ BSONObjIterator k( f.embeddedObject() );
+ while( k.more() ) {
+ BSONElement g = k.next();
+ uassert( 13034, "invalid use of $not", g.getGtLtOp() != BSONObj::Equality );
+ processOpElement( e.fieldName(), g, true, optimize );
}
- case RegEx:
- processOpElement( e.fieldName(), f, true, optimize );
- break;
- default:
- uassert( 13041, "invalid use of $not", false );
+ break;
}
- } else {
+ case RegEx:
+ processOpElement( e.fieldName(), f, true, optimize );
+ break;
+ default:
+ uassert( 13041, "invalid use of $not", false );
+ }
+ }
+ else {
processOpElement( e.fieldName(), f, false, optimize );
}
- }
- }
+ }
+ }
}
-
+
FieldRangeSet::FieldRangeSet( const char *ns, const BSONObj &query , bool optimize )
: _ns( ns ), _queries( 1, query.getOwned() ) {
- BSONObjIterator i( _queries[ 0 ] );
-
- while( i.more() ) {
- BSONElement e = i.next();
- // e could be x:1 or x:{$gt:1}
-
- if ( strcmp( e.fieldName(), "$where" ) == 0 ) {
- continue;
- }
-
- if ( strcmp( e.fieldName(), "$or" ) == 0 ) {
- continue;
- }
-
- if ( strcmp( e.fieldName(), "$nor" ) == 0 ) {
- continue;
- }
-
- processQueryField( e, optimize );
- }
+ BSONObjIterator i( _queries[ 0 ] );
+
+ while( i.more() ) {
+ BSONElement e = i.next();
+ // e could be x:1 or x:{$gt:1}
+
+ if ( strcmp( e.fieldName(), "$where" ) == 0 ) {
+ continue;
+ }
+
+ if ( strcmp( e.fieldName(), "$or" ) == 0 ) {
+ continue;
+ }
+
+ if ( strcmp( e.fieldName(), "$nor" ) == 0 ) {
+ continue;
+ }
+
+ processQueryField( e, optimize );
}
+ }
FieldRangeOrSet::FieldRangeOrSet( const char *ns, const BSONObj &query , bool optimize )
: _baseSet( ns, query, optimize ), _orFound() {
BSONObjIterator i( _baseSet._queries[ 0 ] );
-
+
while( i.more() ) {
BSONElement e = i.next();
- if ( strcmp( e.fieldName(), "$or" ) == 0 ) {
- massert( 13262, "$or requires nonempty array", e.type() == Array && e.embeddedObject().nFields() > 0 );
- BSONObjIterator j( e.embeddedObject() );
- while( j.more() ) {
- BSONElement f = j.next();
- massert( 13263, "$or array must contain objects", f.type() == Object );
+ if ( strcmp( e.fieldName(), "$or" ) == 0 ) {
+ massert( 13262, "$or requires nonempty array", e.type() == Array && e.embeddedObject().nFields() > 0 );
+ BSONObjIterator j( e.embeddedObject() );
+ while( j.more() ) {
+ BSONElement f = j.next();
+ massert( 13263, "$or array must contain objects", f.type() == Object );
_orSets.push_back( FieldRangeSet( ns, f.embeddedObject(), optimize ) );
massert( 13291, "$or may not contain 'special' query", _orSets.back().getSpecial().empty() );
+ _originalOrSets.push_back( _orSets.back() );
}
_orFound = true;
continue;
@@ -638,13 +690,41 @@ namespace mongo {
}
}
+ void FieldRangeOrSet::popOrClause( const BSONObj &indexSpec ) {
+ massert( 13274, "no or clause to pop", !orFinished() );
+ auto_ptr< FieldRangeSet > holder;
+ FieldRangeSet *toDiff = &_originalOrSets.front();
+ if ( toDiff->matchPossible() && !indexSpec.isEmpty() ) {
+ holder.reset( toDiff->subset( indexSpec ) );
+ toDiff = holder.get();
+ }
+ list< FieldRangeSet >::iterator i = _orSets.begin();
+ list< FieldRangeSet >::iterator j = _originalOrSets.begin();
+ ++i;
+ ++j;
+ while( i != _orSets.end() ) {
+ *i -= *toDiff;
+ if( !i->matchPossible() ) {
+ i = _orSets.erase( i );
+ j = _originalOrSets.erase( j );
+ }
+ else {
+ ++i;
+ ++j;
+ }
+ }
+ _oldOrSets.push_front( _orSets.front() );
+ _orSets.pop_front();
+ _originalOrSets.pop_front();
+ }
+
FieldRange *FieldRangeSet::trivialRange_ = 0;
FieldRange &FieldRangeSet::trivialRange() {
if ( trivialRange_ == 0 )
trivialRange_ = new FieldRange();
return *trivialRange_;
}
-
+
BSONObj FieldRangeSet::simplifiedQuery( const BSONObj &_fields ) const {
BSONObj fields = _fields;
if ( fields.isEmpty() ) {
@@ -676,14 +756,15 @@ namespace mongo {
}
return b.obj();
}
-
+
QueryPattern FieldRangeSet::pattern( const BSONObj &sort ) const {
QueryPattern qp;
for( map< string, FieldRange >::const_iterator i = _ranges.begin(); i != _ranges.end(); ++i ) {
assert( !i->second.empty() );
if ( i->second.equality() ) {
qp._fieldTypes[ i->first ] = QueryPattern::Equality;
- } else if ( i->second.nontrivial() ) {
+ }
+ else if ( i->second.nontrivial() ) {
bool upper = i->second.max().type() != MaxKey;
bool lower = i->second.min().type() != MinKey;
if ( upper && lower )
@@ -691,18 +772,18 @@ namespace mongo {
else if ( upper )
qp._fieldTypes[ i->first ] = QueryPattern::UpperBound;
else if ( lower )
- qp._fieldTypes[ i->first ] = QueryPattern::LowerBound;
+ qp._fieldTypes[ i->first ] = QueryPattern::LowerBound;
}
}
qp.setSort( sort );
return qp;
}
-
+
// TODO get rid of this
BoundList FieldRangeSet::indexBounds( const BSONObj &keyPattern, int direction ) const {
typedef vector< pair< shared_ptr< BSONObjBuilder >, shared_ptr< BSONObjBuilder > > > BoundBuilders;
BoundBuilders builders;
- builders.push_back( make_pair( shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ), shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ) ) );
+ builders.push_back( make_pair( shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ), shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ) ) );
BSONObjIterator i( keyPattern );
bool ineq = false; // until ineq is true, we are just dealing with equality and $in bounds
while( i.more() ) {
@@ -716,7 +797,8 @@ namespace mongo {
j->first->appendAs( fr.min(), "" );
j->second->appendAs( fr.min(), "" );
}
- } else {
+ }
+ else {
if ( !fr.inQuery() ) {
ineq = true;
}
@@ -725,18 +807,21 @@ namespace mongo {
for( BoundBuilders::const_iterator i = builders.begin(); i != builders.end(); ++i ) {
BSONObj first = i->first->obj();
BSONObj second = i->second->obj();
+
+ const unsigned maxCombinations = 4000000;
if ( forward ) {
for( vector< FieldInterval >::const_iterator j = intervals.begin(); j != intervals.end(); ++j ) {
- uassert( 13303, "combinatorial limit of $in partitioning of result set exceeded", newBuilders.size() < 1000000 );
+ uassert( 13303, "combinatorial limit of $in partitioning of result set exceeded", newBuilders.size() < maxCombinations );
newBuilders.push_back( make_pair( shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ), shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ) ) );
newBuilders.back().first->appendElements( first );
newBuilders.back().second->appendElements( second );
newBuilders.back().first->appendAs( j->_lower._bound, "" );
newBuilders.back().second->appendAs( j->_upper._bound, "" );
}
- } else {
+ }
+ else {
for( vector< FieldInterval >::const_reverse_iterator j = intervals.rbegin(); j != intervals.rend(); ++j ) {
- uassert( 13304, "combinatorial limit of $in partitioning of result set exceeded", newBuilders.size() < 1000000 );
+ uassert( 13304, "combinatorial limit of $in partitioning of result set exceeded", newBuilders.size() < maxCombinations );
newBuilders.push_back( make_pair( shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ), shared_ptr< BSONObjBuilder >( new BSONObjBuilder() ) ) );
newBuilders.back().first->appendElements( first );
newBuilders.back().second->appendElements( second );
@@ -747,7 +832,8 @@ namespace mongo {
}
builders = newBuilders;
}
- } else {
+ }
+ else {
for( BoundBuilders::const_iterator j = builders.begin(); j != builders.end(); ++j ) {
j->first->appendAs( forward ? fr.min() : fr.max(), "" );
j->second->appendAs( forward ? fr.max() : fr.min(), "" );
@@ -758,204 +844,45 @@ namespace mongo {
for( BoundBuilders::const_iterator i = builders.begin(); i != builders.end(); ++i )
ret.push_back( make_pair( i->first->obj(), i->second->obj() ) );
return ret;
- }
-
- ///////////////////
- // FieldMatcher //
- ///////////////////
-
- void FieldMatcher::add( const BSONObj& o ){
- massert( 10371 , "can only add to FieldMatcher once", _source.isEmpty());
- _source = o;
-
- BSONObjIterator i( o );
- int true_false = -1;
- while ( i.more() ){
- BSONElement e = i.next();
-
- if (e.type() == Object){
- BSONObj obj = e.embeddedObject();
- BSONElement e2 = obj.firstElement();
- if ( strcmp(e2.fieldName(), "$slice") == 0 ){
- if (e2.isNumber()){
- int i = e2.numberInt();
- if (i < 0)
- add(e.fieldName(), i, -i); // limit is now positive
- else
- add(e.fieldName(), 0, i);
-
- } else if (e2.type() == Array) {
- BSONObj arr = e2.embeddedObject();
- uassert(13099, "$slice array wrong size", arr.nFields() == 2 );
-
- BSONObjIterator it(arr);
- int skip = it.next().numberInt();
- int limit = it.next().numberInt();
- uassert(13100, "$slice limit must be positive", limit > 0 );
- add(e.fieldName(), skip, limit);
-
- } else {
- uassert(13098, "$slice only supports numbers and [skip, limit] arrays", false);
- }
- } else {
- uassert(13097, string("Unsupported projection option: ") + obj.firstElement().fieldName(), false);
- }
-
- } else if (!strcmp(e.fieldName(), "_id") && !e.trueValue()){
- _includeID = false;
-
- } else {
-
- add (e.fieldName(), e.trueValue());
-
- // validate input
- if (true_false == -1){
- true_false = e.trueValue();
- _include = !e.trueValue();
- }
- else{
- uassert( 10053 , "You cannot currently mix including and excluding fields. Contact us if this is an issue." ,
- (bool)true_false == e.trueValue() );
- }
- }
- }
- }
-
- void FieldMatcher::add(const string& field, bool include){
- if (field.empty()){ // this is the field the user referred to
- _include = include;
- } else {
- _include = !include;
-
- const size_t dot = field.find('.');
- const string subfield = field.substr(0,dot);
- const string rest = (dot == string::npos ? "" : field.substr(dot+1,string::npos));
-
- boost::shared_ptr<FieldMatcher>& fm = _fields[subfield];
- if (!fm)
- fm.reset(new FieldMatcher());
-
- fm->add(rest, include);
- }
- }
-
- void FieldMatcher::add(const string& field, int skip, int limit){
- _special = true; // can't include or exclude whole object
-
- if (field.empty()){ // this is the field the user referred to
- _skip = skip;
- _limit = limit;
- } else {
- const size_t dot = field.find('.');
- const string subfield = field.substr(0,dot);
- const string rest = (dot == string::npos ? "" : field.substr(dot+1,string::npos));
-
- boost::shared_ptr<FieldMatcher>& fm = _fields[subfield];
- if (!fm)
- fm.reset(new FieldMatcher());
-
- fm->add(rest, skip, limit);
- }
}
- BSONObj FieldMatcher::getSpec() const{
- return _source;
- }
-
- //b will be the value part of an array-typed BSONElement
- void FieldMatcher::appendArray( BSONObjBuilder& b , const BSONObj& a , bool nested) const {
- int skip = nested ? 0 : _skip;
- int limit = nested ? -1 : _limit;
-
- if (skip < 0){
- skip = max(0, skip + a.nFields());
- }
-
- int i=0;
- BSONObjIterator it(a);
- while (it.more()){
- BSONElement e = it.next();
-
- if (skip){
- skip--;
- continue;
- }
-
- if (limit != -1 && (limit-- == 0)){
- break;
- }
-
- switch(e.type()){
- case Array:{
- BSONObjBuilder subb;
- appendArray(subb , e.embeddedObject(), true);
- b.appendArray(b.numStr(i++), subb.obj());
- break;
- }
- case Object:{
- BSONObjBuilder subb;
- BSONObjIterator jt(e.embeddedObject());
- while (jt.more()){
- append(subb , jt.next());
- }
- b.append(b.numStr(i++), subb.obj());
- break;
- }
- default:
- if (_include)
- b.appendAs(e, b.numStr(i++));
+ FieldRangeSet *FieldRangeSet::subset( const BSONObj &fields ) const {
+ FieldRangeSet *ret = new FieldRangeSet( _ns, BSONObj() );
+ BSONObjIterator i( fields );
+ while( i.more() ) {
+ BSONElement e = i.next();
+ if ( _ranges[ e.fieldName() ].nontrivial() ) {
+ ret->_ranges[ e.fieldName() ] = _ranges[ e.fieldName() ];
}
}
+ ret->_queries = _queries;
+ return ret;
}
- void FieldMatcher::append( BSONObjBuilder& b , const BSONElement& e ) const {
- FieldMap::const_iterator field = _fields.find( e.fieldName() );
-
- if (field == _fields.end()){
- if (_include)
- b.append(e);
- }
- else {
- FieldMatcher& subfm = *field->second;
-
- if ((subfm._fields.empty() && !subfm._special) || !(e.type()==Object || e.type()==Array) ){
- if (subfm._include)
- b.append(e);
- }
- else if (e.type() == Object){
- BSONObjBuilder subb;
- BSONObjIterator it(e.embeddedObject());
- while (it.more()){
- subfm.append(subb, it.next());
- }
- b.append(e.fieldName(), subb.obj());
-
- }
- else { //Array
- BSONObjBuilder subb;
- subfm.appendArray(subb, e.embeddedObject());
- b.appendArray(e.fieldName(), subb.obj());
- }
- }
- }
-
bool FieldRangeVector::matchesElement( const BSONElement &e, int i, bool forward ) const {
- int l = matchingLowElement( e, i, forward );
- return ( l % 2 == 0 ); // if we're inside an interval
+ bool eq;
+ int l = matchingLowElement( e, i, forward, eq );
+ return ( l % 2 == 0 ); // if we're inside an interval
}
-
+
// binary search for interval containing the specified element
// an even return value indicates that the element is contained within a valid interval
- int FieldRangeVector::matchingLowElement( const BSONElement &e, int i, bool forward ) const {
+ int FieldRangeVector::matchingLowElement( const BSONElement &e, int i, bool forward, bool &lowEquality ) const {
+ lowEquality = false;
int l = -1;
int h = _ranges[ i ].intervals().size() * 2;
while( l + 1 < h ) {
int m = ( l + h ) / 2;
BSONElement toCmp;
+ bool toCmpInclusive;
+ const FieldInterval &interval = _ranges[ i ].intervals()[ m / 2 ];
if ( m % 2 == 0 ) {
- toCmp = _ranges[ i ].intervals()[ m / 2 ]._lower._bound;
- } else {
- toCmp = _ranges[ i ].intervals()[ m / 2 ]._upper._bound;
+ toCmp = interval._lower._bound;
+ toCmpInclusive = interval._lower._inclusive;
+ }
+ else {
+ toCmp = interval._upper._bound;
+ toCmpInclusive = interval._upper._inclusive;
}
int cmp = toCmp.woCompare( e, false );
if ( !forward ) {
@@ -963,41 +890,60 @@ namespace mongo {
}
if ( cmp < 0 ) {
l = m;
- } else if ( cmp > 0 ) {
+ }
+ else if ( cmp > 0 ) {
h = m;
- } else {
- return ( m % 2 == 0 ) ? m : m - 1;
+ }
+ else {
+ if ( m % 2 == 0 ) {
+ lowEquality = true;
+ }
+ int ret = m;
+ // if left match and inclusive, all good
+ // if left match and not inclusive, return right before left bound
+ // if right match and inclusive, return left bound
+ // if right match and not inclusive, return right bound
+ if ( ( m % 2 == 0 && !toCmpInclusive ) || ( m % 2 == 1 && toCmpInclusive ) ) {
+ --ret;
+ }
+ return ret;
}
}
assert( l + 1 == h );
return l;
}
-
+
bool FieldRangeVector::matches( const BSONObj &obj ) const {
- BSONObjIterator k( _keyPattern );
- for( int i = 0; i < (int)_ranges.size(); ++i ) {
- if ( _ranges[ i ].empty() ) {
- return false;
- }
- BSONElement kk = k.next();
- int number = (int) kk.number();
- bool forward = ( number >= 0 ? 1 : -1 ) * ( _direction >= 0 ? 1 : -1 ) > 0;
- BSONElementSet keys;
- obj.getFieldsDotted( kk.fieldName(), keys );
- bool match = false;
- for( BSONElementSet::const_iterator j = keys.begin(); j != keys.end(); ++j ) {
- if ( matchesElement( *j, i, forward ) ) {
- match = true;
+ if ( !_indexSpec.get() ) {
+ _indexSpec.reset( new IndexSpec( _keyPattern ) );
+ }
+ // TODO The representation of matching keys could potentially be optimized
+ // more for the case at hand. (For example, we can potentially consider
+ // fields individually instead of constructing several bson objects using
+ // multikey arrays.) But getKeys() canonically defines the key set for a
+ // given object and for now we are using it as is.
+ BSONObjSetDefaultOrder keys;
+ _indexSpec->getKeys( obj, keys );
+ for( BSONObjSetDefaultOrder::const_iterator i = keys.begin(); i != keys.end(); ++i ) {
+ BSONObjIterator j( *i );
+ BSONObjIterator k( _keyPattern );
+ bool match = true;
+ for( int l = 0; l < (int)_ranges.size(); ++l ) {
+ int number = (int) k.next().number();
+ bool forward = ( number >= 0 ? 1 : -1 ) * ( _direction >= 0 ? 1 : -1 ) > 0;
+ if ( !matchesElement( j.next(), l, forward ) ) {
+ match = false;
break;
}
}
- if ( !match ) {
- return false;
+ if ( match ) {
+ // The *i key matched a valid range for every element.
+ return true;
}
}
- return true;
+ return false;
}
-
+
// TODO optimize more
int FieldRangeVector::Iterator::advance( const BSONObj &curr ) {
BSONObjIterator j( curr );
@@ -1009,7 +955,8 @@ namespace mongo {
for( int i = 0; i < (int)_i.size(); ++i ) {
if ( i > 0 && !_v._ranges[ i - 1 ].intervals()[ _i[ i - 1 ] ].equality() ) {
// if last bound was inequality, we don't know anything about where we are for this field
- // TODO if possible avoid this certain cases when field in prev key is the same
+ // TODO if possible avoid this certain cases when value in previous field of the previous
+ // key is the same as value of previous field in current key
setMinus( i );
}
bool eq = false;
@@ -1017,20 +964,23 @@ namespace mongo {
bool reverse = ( ( oo.number() < 0 ) ^ ( _v._direction < 0 ) );
BSONElement jj = j.next();
if ( _i[ i ] == -1 ) { // unknown position for this field, do binary search
- int l = _v.matchingLowElement( jj, i, !reverse );
+ bool lowEquality;
+ int l = _v.matchingLowElement( jj, i, !reverse, lowEquality );
if ( l % 2 == 0 ) { // we are in a valid range for this field
_i[ i ] = l / 2;
int diff = (int)_v._ranges[ i ].intervals().size() - _i[ i ];
if ( diff > 1 ) {
latestNonEndpoint = i;
- } else if ( diff == 1 ) {
+ }
+ else if ( diff == 1 ) {
int x = _v._ranges[ i ].intervals()[ _i[ i ] ]._upper._bound.woCompare( jj, false );
if ( x != 0 ) {
latestNonEndpoint = i;
}
}
continue;
- } else { // not in a valid range for this field - determine if and how to advance
+ }
+ else { // not in a valid range for this field - determine if and how to advance
// check if we're after the last interval for this field
if ( l == (int)_v._ranges[ i ].intervals().size() * 2 - 1 ) {
if ( latestNonEndpoint == -1 ) {
@@ -1038,18 +988,24 @@ namespace mongo {
}
setZero( latestNonEndpoint + 1 );
// skip to curr / latestNonEndpoint + 1 / superlative
- for( int j = latestNonEndpoint + 1; j < (int)_i.size(); ++j ) {
- _cmp[ j ] = _superlative[ j ];
- }
- return latestNonEndpoint + 1;
+ _after = true;
+ return latestNonEndpoint + 1;
}
_i[ i ] = ( l + 1 ) / 2;
+ if ( lowEquality ) {
+ // skip to curr / i + 1 / superlative
+ _after = true;
+ return i + 1;
+ }
// skip to curr / i / nextbounds
_cmp[ i ] = &_v._ranges[ i ].intervals()[ _i[ i ] ]._lower._bound;
+ _inc[ i ] = _v._ranges[ i ].intervals()[ _i[ i ] ]._lower._inclusive;
for( int j = i + 1; j < (int)_i.size(); ++j ) {
_cmp[ j ] = &_v._ranges[ j ].intervals().front()._lower._bound;
+ _inc[ j ] = _v._ranges[ j ].intervals().front()._lower._inclusive;
}
- return i;
+ _after = false;
+ return i;
}
}
bool first = true;
@@ -1062,7 +1018,7 @@ namespace mongo {
if ( reverse ) {
x = -x;
}
- if ( x == 0 ) {
+ if ( x == 0 && _v._ranges[ i ].intervals()[ _i[ i ] ]._upper._inclusive ) {
eq = true;
break;
}
@@ -1081,16 +1037,27 @@ namespace mongo {
x = -x;
}
}
+ // if we're equal to and not inclusive the lower bound, advance
+ if ( ( x == 0 && !_v._ranges[ i ].intervals()[ _i[ i ] ]._lower._inclusive ) ) {
+ setZero( i + 1 );
+ // skip to curr / i + 1 / superlative
+ _after = true;
+ return i + 1;
+ }
// if we're less than the lower bound, advance
if ( x > 0 ) {
setZero( i + 1 );
// skip to curr / i / nextbounds
_cmp[ i ] = &_v._ranges[ i ].intervals()[ _i[ i ] ]._lower._bound;
+ _inc[ i ] = _v._ranges[ i ].intervals()[ _i[ i ] ]._lower._inclusive;
for( int j = i + 1; j < (int)_i.size(); ++j ) {
_cmp[ j ] = &_v._ranges[ j ].intervals().front()._lower._bound;
+ _inc[ j ] = _v._ranges[ j ].intervals().front()._lower._inclusive;
}
+ _after = false;
return i;
- } else {
+ }
+ else {
break;
}
}
@@ -1101,26 +1068,32 @@ namespace mongo {
}
int diff = (int)_v._ranges[ i ].intervals().size() - _i[ i ];
if ( diff > 1 || ( !eq && diff == 1 ) ) {
- // check if we're not at the end of valid values for this field
+ // check if we're not at the end of valid values for this field
latestNonEndpoint = i;
- } else if ( diff == 0 ) { // check if we're past the last interval for this field
+ }
+ else if ( diff == 0 ) { // check if we're past the last interval for this field
if ( latestNonEndpoint == -1 ) {
return -2;
}
// more values possible, skip...
setZero( latestNonEndpoint + 1 );
// skip to curr / latestNonEndpoint + 1 / superlative
- for( int j = latestNonEndpoint + 1; j < (int)_i.size(); ++j ) {
- _cmp[ j ] = _superlative[ j ];
- }
+ _after = true;
return latestNonEndpoint + 1;
}
}
- return -1;
+ return -1;
}
-
+
+ void FieldRangeVector::Iterator::prepDive() {
+ for( int j = 0; j < (int)_i.size(); ++j ) {
+ _cmp[ j ] = &_v._ranges[ j ].intervals().front()._lower._bound;
+ _inc[ j ] = _v._ranges[ j ].intervals().front()._lower._inclusive;
+ }
+ }
+
struct SimpleRegexUnitTest : UnitTest {
- void run(){
+ void run() {
{
BSONObjBuilder b;
b.appendRegex("r", "^foo");
@@ -1179,38 +1152,39 @@ namespace mongo {
} simple_regex_unittest;
- long long applySkipLimit( long long num , const BSONObj& cmd ){
+ long long applySkipLimit( long long num , const BSONObj& cmd ) {
BSONElement s = cmd["skip"];
BSONElement l = cmd["limit"];
-
- if ( s.isNumber() ){
+
+ if ( s.isNumber() ) {
num = num - s.numberLong();
if ( num < 0 ) {
num = 0;
}
}
-
- if ( l.isNumber() ){
+
+ if ( l.isNumber() ) {
long long limit = l.numberLong();
- if ( limit < num ){
+ if ( limit < num ) {
num = limit;
}
}
- return num;
+ return num;
}
- string debugString( Message& m ){
+ string debugString( Message& m ) {
stringstream ss;
ss << "op: " << opToString( m.operation() ) << " len: " << m.size();
- if ( m.operation() >= 2000 && m.operation() < 2100 ){
+ if ( m.operation() >= 2000 && m.operation() < 2100 ) {
DbMessage d(m);
ss << " ns: " << d.getns();
- switch ( m.operation() ){
+ switch ( m.operation() ) {
case dbUpdate: {
int flags = d.pullInt();
BSONObj q = d.nextJsObj();
- ss << " flags: " << flags << " query: " << q;
+ BSONObj o = d.nextJsObj();
+ ss << " flags: " << flags << " query: " << q << " update: " << o;
break;
}
case dbInsert:
@@ -1225,10 +1199,10 @@ namespace mongo {
default:
ss << " CANNOT HANDLE YET";
}
-
-
+
+
}
return ss.str();
- }
+ }
} // namespace mongo