diff options
Diffstat (limited to 'db/queryutil.cpp')
-rw-r--r-- | db/queryutil.cpp | 464 |
1 files changed, 337 insertions, 127 deletions
diff --git a/db/queryutil.cpp b/db/queryutil.cpp index d8854be..c01b89e 100644 --- a/db/queryutil.cpp +++ b/db/queryutil.cpp @@ -24,96 +24,118 @@ #include "../util/unittest.h" namespace mongo { - namespace { - /** returns a string that when used as a matcher, would match a super set of regex() - returns "" for complex regular expressions - used to optimize queries in some simple regex cases that start with '^' - */ - inline string simpleRegexHelper(const char* regex, const char* flags){ - string r = ""; - - bool extended = false; - while (*flags){ - switch (*(flags++)){ - case 'm': // multiline - continue; - case 'x': // extended - extended = true; - break; - default: - return r; // cant use index - } - } + /** returns a string that when used as a matcher, would match a super set of regex() + returns "" for complex regular expressions + used to optimize queries in some simple regex cases that start with '^' - if ( *(regex++) != '^' ) - return r; + if purePrefix != NULL, sets it to whether the regex can be converted to a range query + */ + string simpleRegex(const char* regex, const char* flags, bool* purePrefix){ + string r = ""; - stringstream ss; + if (purePrefix) *purePrefix = false; - while(*regex){ - char c = *(regex++); - if ( c == '*' || c == '?' ){ - // These are the only two symbols that make the last char optional - r = ss.str(); - r = r.substr( 0 , r.size() - 1 ); - return r; //breaking here fails with /^a?/ - } else if (c == '\\'){ - // slash followed by non-alphanumeric represents the following char - c = *(regex++); - if ((c >= 'A' && c <= 'Z') || - (c >= 'a' && c <= 'z') || - (c >= '0' && c <= '0') || - (c == '\0')) - { - r = ss.str(); - break; - } else { - ss << c; - } - } else if (strchr("^$.[|()+{", c)){ - // list of "metacharacters" from man pcrepattern - r = ss.str(); + bool extended = false; + while (*flags){ + switch (*(flags++)){ + case 'm': // multiline + continue; + case 'x': // extended + extended = true; break; - } else if (extended && c == '#'){ - // comment + default: + return r; // cant use index + } + } + + if ( *(regex++) != '^' ) + return r; + + stringstream ss; + + while(*regex){ + char c = *(regex++); + if ( c == '*' || c == '?' ){ + // These are the only two symbols that make the last char optional + r = ss.str(); + r = r.substr( 0 , r.size() - 1 ); + return r; //breaking here fails with /^a?/ + } else if (c == '\\'){ + // slash followed by non-alphanumeric represents the following char + c = *(regex++); + if ((c >= 'A' && c <= 'Z') || + (c >= 'a' && c <= 'z') || + (c >= '0' && c <= '0') || + (c == '\0')) + { r = ss.str(); break; - } else if (extended && isspace(c)){ - continue; } else { - // self-matching char ss << c; } - } - - if ( r.size() == 0 && *regex == 0 ) + } else if (strchr("^$.[|()+{", c)){ + // list of "metacharacters" from man pcrepattern r = ss.str(); + break; + } else if (extended && c == '#'){ + // comment + r = ss.str(); + break; + } else if (extended && isspace(c)){ + continue; + } else { + // self-matching char + ss << c; + } + } - return r; + if ( r.empty() && *regex == 0 ){ + r = ss.str(); + if (purePrefix) *purePrefix = !r.empty(); } - inline string simpleRegex(const BSONElement& e){ - switch(e.type()){ - case RegEx: - return simpleRegexHelper(e.regex(), e.regexFlags()); - case Object:{ - BSONObj o = e.embeddedObject(); - return simpleRegexHelper(o["$regex"].valuestrsafe(), o["$options"].valuestrsafe()); - } - default: assert(false); return ""; //return squashes compiler warning + + return r; + } + inline string simpleRegex(const BSONElement& e){ + switch(e.type()){ + case RegEx: + return simpleRegex(e.regex(), e.regexFlags()); + case Object:{ + BSONObj o = e.embeddedObject(); + return simpleRegex(o["$regex"].valuestrsafe(), o["$options"].valuestrsafe()); } + default: assert(false); return ""; //return squashes compiler warning } } + + string simpleRegexEnd( string regex ) { + ++regex[ regex.length() - 1 ]; + return regex; + } - FieldRange::FieldRange( const BSONElement &e, bool optimize ) { - if ( !e.eoo() && e.type() != RegEx && e.getGtLtOp() == BSONObj::opIN ) { + + FieldRange::FieldRange( const BSONElement &e, bool isNot, bool optimize ) { + // NOTE with $not, we could potentially form a complementary set of intervals. + if ( !isNot && !e.eoo() && e.type() != RegEx && e.getGtLtOp() == BSONObj::opIN ) { set< BSONElement, element_lt > vals; + vector< FieldRange > regexes; + uassert( 12580 , "invalid query" , e.isABSONObj() ); BSONObjIterator i( e.embeddedObject() ); - while( i.more() ) - vals.insert( i.next() ); + while( i.more() ) { + BSONElement ie = i.next(); + if ( ie.type() == RegEx ) { + regexes.push_back( FieldRange( ie, false, optimize ) ); + } else { + vals.insert( ie ); + } + } for( set< BSONElement, element_lt >::const_iterator i = vals.begin(); i != vals.end(); ++i ) intervals_.push_back( FieldInterval(*i) ); + for( vector< FieldRange >::const_iterator i = regexes.begin(); i != regexes.end(); ++i ) + *this |= *i; + return; } @@ -149,15 +171,66 @@ namespace mongo { || (e.type() == Object && !e.embeddedObject()["$regex"].eoo()) ) { - const string r = simpleRegex(e); - if ( r.size() ) { - lower = addObj( BSON( "" << r ) ).firstElement(); - upper = addObj( BSON( "" << simpleRegexEnd( r ) ) ).firstElement(); - upperInclusive = false; - } + if ( !isNot ) { // no optimization for negated regex - we could consider creating 2 intervals comprising all nonmatching prefixes + const string r = simpleRegex(e); + if ( r.size() ) { + lower = addObj( BSON( "" << r ) ).firstElement(); + upper = addObj( BSON( "" << simpleRegexEnd( r ) ) ).firstElement(); + upperInclusive = false; + } else { + BSONObjBuilder b1(32), b2(32); + b1.appendMinForType( "" , String ); + lower = addObj( b1.obj() ).firstElement(); + + b2.appendMaxForType( "" , String ); + upper = addObj( b2.obj() ).firstElement(); + upperInclusive = false; //MaxForType String is an empty Object + } + + // regex matches self - regex type > string type + if (e.type() == RegEx){ + BSONElement re = addObj( BSON( "" << e ) ).firstElement(); + intervals_.push_back( FieldInterval(re) ); + } else { + BSONObj orig = e.embeddedObject(); + BSONObjBuilder b; + b.appendRegex("", orig["$regex"].valuestrsafe(), orig["$options"].valuestrsafe()); + BSONElement re = addObj( b.obj() ).firstElement(); + intervals_.push_back( FieldInterval(re) ); + } + + } return; } - switch( e.getGtLtOp() ) { + int op = e.getGtLtOp(); + if ( isNot ) { + switch( op ) { + case BSONObj::Equality: + case BSONObj::opALL: + case BSONObj::opMOD: // NOTE for mod and type, we could consider having 1-2 intervals comprising the complementary types (multiple intervals already possible with $in) + case BSONObj::opTYPE: + op = BSONObj::NE; // no bound calculation + break; + case BSONObj::NE: + op = BSONObj::Equality; + break; + case BSONObj::LT: + op = BSONObj::GTE; + break; + case BSONObj::LTE: + op = BSONObj::GT; + break; + case BSONObj::GT: + op = BSONObj::LTE; + break; + case BSONObj::GTE: + op = BSONObj::LT; + break; + default: // otherwise doesn't matter + break; + } + } + switch( op ) { case BSONObj::Equality: lower = upper = e; break; @@ -174,8 +247,32 @@ namespace mongo { case BSONObj::opALL: { massert( 10370 , "$all requires array", e.type() == Array ); BSONObjIterator i( e.embeddedObject() ); - if ( i.more() ) - lower = upper = i.next(); + bool bound = false; + while ( i.more() ){ + BSONElement x = i.next(); + if ( x.type() == Object && x.embeddedObject().firstElement().getGtLtOp() == BSONObj::opELEM_MATCH ){ + // taken care of elsewhere + } + else if ( x.type() != RegEx ) { + lower = upper = x; + bound = true; + break; + } + } + if ( !bound ) { // if no good non regex bound found, try regex bounds + BSONObjIterator i( e.embeddedObject() ); + while( i.more() ) { + BSONElement x = i.next(); + if ( x.type() != RegEx ) + continue; + string simple = simpleRegex( x.regex(), x.regexFlags() ); + if ( !simple.empty() ) { + lower = addObj( BSON( "" << simple ) ).firstElement(); + upper = addObj( BSON( "" << simpleRegexEnd( simple ) ) ).firstElement(); + break; + } + } + } break; } case BSONObj::opMOD: { @@ -206,10 +303,18 @@ namespace mongo { break; } + case BSONObj::opREGEX: + case BSONObj::opOPTIONS: + // do nothing + break; case BSONObj::opELEM_MATCH: { log() << "warning: shouldn't get here?" << endl; break; } + case BSONObj::opNEAR: + case BSONObj::opWITHIN: + _special = "2d"; + break; default: break; } @@ -269,19 +374,118 @@ namespace mongo { intervals_ = newIntervals; for( vector< BSONObj >::const_iterator i = other.objData_.begin(); i != other.objData_.end(); ++i ) objData_.push_back( *i ); + if ( _special.size() == 0 && other._special.size() ) + _special = other._special; return *this; } - string FieldRange::simpleRegexEnd( string regex ) { - ++regex[ regex.length() - 1 ]; - return regex; - } + void handleInterval( const FieldInterval &lower, FieldBound &low, FieldBound &high, vector< FieldInterval > &newIntervals ) { + if ( low.bound_.eoo() ) { + low = lower.lower_; high = lower.upper_; + } else { + if ( high.bound_.woCompare( lower.lower_.bound_, false ) < 0 ) { // when equal but neither inclusive, just assume they overlap, since current btree scanning code just as efficient either way + FieldInterval tmp; + tmp.lower_ = low; + tmp.upper_ = high; + newIntervals.push_back( tmp ); + low = lower.lower_; high = lower.upper_; + } else { + high = lower.upper_; + } + } + } + + const FieldRange &FieldRange::operator|=( const FieldRange &other ) { + vector< FieldInterval > newIntervals; + FieldBound low; + FieldBound high; + vector< FieldInterval >::const_iterator i = intervals_.begin(); + vector< FieldInterval >::const_iterator j = other.intervals_.begin(); + while( i != intervals_.end() && j != other.intervals_.end() ) { + int cmp = i->lower_.bound_.woCompare( j->lower_.bound_, false ); + if ( ( cmp == 0 && i->lower_.inclusive_ ) || cmp < 0 ) { + handleInterval( *i, low, high, newIntervals ); + ++i; + } else { + handleInterval( *j, low, high, newIntervals ); + ++j; + } + } + while( i != intervals_.end() ) { + handleInterval( *i, low, high, newIntervals ); + ++i; + } + while( j != other.intervals_.end() ) { + handleInterval( *j, low, high, newIntervals ); + ++j; + } + FieldInterval tmp; + tmp.lower_ = low; + tmp.upper_ = high; + newIntervals.push_back( tmp ); + intervals_ = newIntervals; + for( vector< BSONObj >::const_iterator i = other.objData_.begin(); i != other.objData_.end(); ++i ) + objData_.push_back( *i ); + if ( _special.size() == 0 && other._special.size() ) + _special = other._special; + return *this; + } BSONObj FieldRange::addObj( const BSONObj &o ) { objData_.push_back( o ); return o; } + string FieldRangeSet::getSpecial() const { + string s = ""; + for ( map<string,FieldRange>::iterator i=ranges_.begin(); i!=ranges_.end(); i++ ){ + if ( i->second.getSpecial().size() == 0 ) + continue; + uassert( 13033 , "can't have 2 special fields" , s.size() == 0 ); + s = i->second.getSpecial(); + } + return s; + } + + void FieldRangeSet::processOpElement( const char *fieldName, const BSONElement &f, bool isNot, bool optimize ) { + BSONElement g = f; + int op2 = g.getGtLtOp(); + if ( op2 == BSONObj::opALL ) { + BSONElement h = g; + massert( 13050 , "$all requires array", h.type() == Array ); + BSONObjIterator i( h.embeddedObject() ); + if( i.more() ) { + BSONElement x = i.next(); + if ( x.type() == Object && x.embeddedObject().firstElement().getGtLtOp() == BSONObj::opELEM_MATCH ) { + g = x.embeddedObject().firstElement(); + op2 = g.getGtLtOp(); + } + } + } + if ( op2 == BSONObj::opELEM_MATCH ) { + BSONObjIterator k( g.embeddedObjectUserCheck() ); + while ( k.more() ){ + BSONElement h = k.next(); + StringBuilder buf(32); + buf << fieldName << "." << h.fieldName(); + string fullname = buf.str(); + + int op3 = getGtLtOp( h ); + if ( op3 == BSONObj::Equality ){ + ranges_[ fullname ] &= FieldRange( h , isNot , optimize ); + } + else { + BSONObjIterator l( h.embeddedObject() ); + while ( l.more() ){ + ranges_[ fullname ] &= FieldRange( l.next() , isNot , optimize ); + } + } + } + } else { + ranges_[ fieldName ] &= FieldRange( f , isNot , optimize ); + } + } + FieldRangeSet::FieldRangeSet( const char *ns, const BSONObj &query , bool optimize ) : ns_( ns ), query_( query.getOwned() ) { BSONObjIterator i( query_ ); @@ -293,36 +497,38 @@ namespace mongo { if ( strcmp( e.fieldName(), "$where" ) == 0 ) continue; - int op = getGtLtOp( e ); + bool equality = ( getGtLtOp( e ) == BSONObj::Equality ); + if ( equality && e.type() == Object ) { + equality = ( strcmp( e.embeddedObject().firstElement().fieldName(), "$not" ) != 0 ); + } - if ( op == BSONObj::Equality || op == BSONObj::opREGEX || op == BSONObj::opOPTIONS ) { - ranges_[ e.fieldName() ] &= FieldRange( e , optimize ); - } - else if ( op == BSONObj::opELEM_MATCH ){ - BSONObjIterator i( e.embeddedObjectUserCheck().firstElement().embeddedObjectUserCheck() ); - while ( i.more() ){ - BSONElement f = i.next(); - StringBuilder buf(32); - buf << e.fieldName() << "." << f.fieldName(); - string fullname = buf.str(); - - int op2 = getGtLtOp( f ); - if ( op2 == BSONObj::Equality ){ - ranges_[ fullname ] &= FieldRange( f , optimize ); - } - else { - BSONObjIterator j( f.embeddedObject() ); - while ( j.more() ){ - ranges_[ fullname ] &= FieldRange( j.next() , optimize ); + if ( equality || ( e.type() == Object && !e.embeddedObject()[ "$regex" ].eoo() ) ) { + ranges_[ e.fieldName() ] &= FieldRange( e , false , optimize ); + } + if ( !equality ) { + BSONObjIterator j( e.embeddedObject() ); + while( j.more() ) { + BSONElement f = j.next(); + if ( strcmp( f.fieldName(), "$not" ) == 0 ) { + switch( f.type() ) { + case Object: { + BSONObjIterator k( f.embeddedObject() ); + while( k.more() ) { + BSONElement g = k.next(); + uassert( 13034, "invalid use of $not", g.getGtLtOp() != BSONObj::Equality ); + processOpElement( e.fieldName(), g, true, optimize ); + } + break; + } + case RegEx: + processOpElement( e.fieldName(), f, true, optimize ); + break; + default: + uassert( 13041, "invalid use of $not", false ); } + } else { + processOpElement( e.fieldName(), f, false, optimize ); } - } - } - else { - BSONObjIterator i( e.embeddedObject() ); - while( i.more() ) { - BSONElement f = i.next(); - ranges_[ e.fieldName() ] &= FieldRange( f , optimize ); } } } @@ -445,8 +651,8 @@ namespace mongo { /////////////////// void FieldMatcher::add( const BSONObj& o ){ - massert( 10371 , "can only add to FieldMatcher once", source_.isEmpty()); - source_ = o; + massert( 10371 , "can only add to FieldMatcher once", _source.isEmpty()); + _source = o; BSONObjIterator i( o ); int true_false = -1; @@ -457,23 +663,24 @@ namespace mongo { // validate input if (true_false == -1){ true_false = e.trueValue(); - include_ = !e.trueValue(); - }else{ - if((bool) true_false != e.trueValue()) - errmsg = "You cannot currently mix including and excluding fields. Contact us if this is an issue."; + _include = !e.trueValue(); + } + else{ + uassert( 10053 , "You cannot currently mix including and excluding fields. Contact us if this is an issue." , + (bool)true_false == e.trueValue() ); } } } void FieldMatcher::add(const string& field, bool include){ if (field.empty()){ // this is the field the user referred to - include_ = include; + _include = include; } else { const size_t dot = field.find('.'); const string subfield = field.substr(0,dot); const string rest = (dot == string::npos ? "" : field.substr(dot+1,string::npos)); - boost::shared_ptr<FieldMatcher>& fm = fields_[subfield]; + boost::shared_ptr<FieldMatcher>& fm = _fields[subfield]; if (!fm) fm.reset(new FieldMatcher(!include)); @@ -482,7 +689,7 @@ namespace mongo { } BSONObj FieldMatcher::getSpec() const{ - return source_; + return _source; } //b will be the value part of an array-typed BSONElement @@ -509,7 +716,7 @@ namespace mongo { break; } default: - if (include_) + if (_include) b.appendAs(e, b.numStr(i++).c_str()); } @@ -518,18 +725,20 @@ namespace mongo { } void FieldMatcher::append( BSONObjBuilder& b , const BSONElement& e ) const { - FieldMap::const_iterator field = fields_.find( e.fieldName() ); + FieldMap::const_iterator field = _fields.find( e.fieldName() ); - if (field == fields_.end()){ - if (include_) + if (field == _fields.end()){ + if (_include) b.append(e); - } else { + } + else { FieldMatcher& subfm = *field->second; - - if (subfm.fields_.empty() || !(e.type()==Object || e.type()==Array) ){ - if (subfm.include_) + + if (subfm._fields.empty() || !(e.type()==Object || e.type()==Array) ){ + if (subfm._include) b.append(e); - } else if (e.type() == Object){ + } + else if (e.type() == Object){ BSONObjBuilder subb; BSONObjIterator it(e.embeddedObject()); while (it.more()){ @@ -537,7 +746,8 @@ namespace mongo { } b.append(e.fieldName(), subb.obj()); - } else { //Array + } + else { //Array BSONObjBuilder subb; subfm.appendArray(subb, e.embeddedObject()); b.appendArray(e.fieldName(), subb.obj()); |