summaryrefslogtreecommitdiff
path: root/db/queryutil.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'db/queryutil.cpp')
-rw-r--r--db/queryutil.cpp464
1 files changed, 337 insertions, 127 deletions
diff --git a/db/queryutil.cpp b/db/queryutil.cpp
index d8854be..c01b89e 100644
--- a/db/queryutil.cpp
+++ b/db/queryutil.cpp
@@ -24,96 +24,118 @@
#include "../util/unittest.h"
namespace mongo {
- namespace {
- /** returns a string that when used as a matcher, would match a super set of regex()
- returns "" for complex regular expressions
- used to optimize queries in some simple regex cases that start with '^'
- */
- inline string simpleRegexHelper(const char* regex, const char* flags){
- string r = "";
-
- bool extended = false;
- while (*flags){
- switch (*(flags++)){
- case 'm': // multiline
- continue;
- case 'x': // extended
- extended = true;
- break;
- default:
- return r; // cant use index
- }
- }
+ /** returns a string that when used as a matcher, would match a super set of regex()
+ returns "" for complex regular expressions
+ used to optimize queries in some simple regex cases that start with '^'
- if ( *(regex++) != '^' )
- return r;
+ if purePrefix != NULL, sets it to whether the regex can be converted to a range query
+ */
+ string simpleRegex(const char* regex, const char* flags, bool* purePrefix){
+ string r = "";
- stringstream ss;
+ if (purePrefix) *purePrefix = false;
- while(*regex){
- char c = *(regex++);
- if ( c == '*' || c == '?' ){
- // These are the only two symbols that make the last char optional
- r = ss.str();
- r = r.substr( 0 , r.size() - 1 );
- return r; //breaking here fails with /^a?/
- } else if (c == '\\'){
- // slash followed by non-alphanumeric represents the following char
- c = *(regex++);
- if ((c >= 'A' && c <= 'Z') ||
- (c >= 'a' && c <= 'z') ||
- (c >= '0' && c <= '0') ||
- (c == '\0'))
- {
- r = ss.str();
- break;
- } else {
- ss << c;
- }
- } else if (strchr("^$.[|()+{", c)){
- // list of "metacharacters" from man pcrepattern
- r = ss.str();
+ bool extended = false;
+ while (*flags){
+ switch (*(flags++)){
+ case 'm': // multiline
+ continue;
+ case 'x': // extended
+ extended = true;
break;
- } else if (extended && c == '#'){
- // comment
+ default:
+ return r; // cant use index
+ }
+ }
+
+ if ( *(regex++) != '^' )
+ return r;
+
+ stringstream ss;
+
+ while(*regex){
+ char c = *(regex++);
+ if ( c == '*' || c == '?' ){
+ // These are the only two symbols that make the last char optional
+ r = ss.str();
+ r = r.substr( 0 , r.size() - 1 );
+ return r; //breaking here fails with /^a?/
+ } else if (c == '\\'){
+ // slash followed by non-alphanumeric represents the following char
+ c = *(regex++);
+ if ((c >= 'A' && c <= 'Z') ||
+ (c >= 'a' && c <= 'z') ||
+ (c >= '0' && c <= '0') ||
+ (c == '\0'))
+ {
r = ss.str();
break;
- } else if (extended && isspace(c)){
- continue;
} else {
- // self-matching char
ss << c;
}
- }
-
- if ( r.size() == 0 && *regex == 0 )
+ } else if (strchr("^$.[|()+{", c)){
+ // list of "metacharacters" from man pcrepattern
r = ss.str();
+ break;
+ } else if (extended && c == '#'){
+ // comment
+ r = ss.str();
+ break;
+ } else if (extended && isspace(c)){
+ continue;
+ } else {
+ // self-matching char
+ ss << c;
+ }
+ }
- return r;
+ if ( r.empty() && *regex == 0 ){
+ r = ss.str();
+ if (purePrefix) *purePrefix = !r.empty();
}
- inline string simpleRegex(const BSONElement& e){
- switch(e.type()){
- case RegEx:
- return simpleRegexHelper(e.regex(), e.regexFlags());
- case Object:{
- BSONObj o = e.embeddedObject();
- return simpleRegexHelper(o["$regex"].valuestrsafe(), o["$options"].valuestrsafe());
- }
- default: assert(false); return ""; //return squashes compiler warning
+
+ return r;
+ }
+ inline string simpleRegex(const BSONElement& e){
+ switch(e.type()){
+ case RegEx:
+ return simpleRegex(e.regex(), e.regexFlags());
+ case Object:{
+ BSONObj o = e.embeddedObject();
+ return simpleRegex(o["$regex"].valuestrsafe(), o["$options"].valuestrsafe());
}
+ default: assert(false); return ""; //return squashes compiler warning
}
}
+
+ string simpleRegexEnd( string regex ) {
+ ++regex[ regex.length() - 1 ];
+ return regex;
+ }
- FieldRange::FieldRange( const BSONElement &e, bool optimize ) {
- if ( !e.eoo() && e.type() != RegEx && e.getGtLtOp() == BSONObj::opIN ) {
+
+ FieldRange::FieldRange( const BSONElement &e, bool isNot, bool optimize ) {
+ // NOTE with $not, we could potentially form a complementary set of intervals.
+ if ( !isNot && !e.eoo() && e.type() != RegEx && e.getGtLtOp() == BSONObj::opIN ) {
set< BSONElement, element_lt > vals;
+ vector< FieldRange > regexes;
+ uassert( 12580 , "invalid query" , e.isABSONObj() );
BSONObjIterator i( e.embeddedObject() );
- while( i.more() )
- vals.insert( i.next() );
+ while( i.more() ) {
+ BSONElement ie = i.next();
+ if ( ie.type() == RegEx ) {
+ regexes.push_back( FieldRange( ie, false, optimize ) );
+ } else {
+ vals.insert( ie );
+ }
+ }
for( set< BSONElement, element_lt >::const_iterator i = vals.begin(); i != vals.end(); ++i )
intervals_.push_back( FieldInterval(*i) );
+ for( vector< FieldRange >::const_iterator i = regexes.begin(); i != regexes.end(); ++i )
+ *this |= *i;
+
return;
}
@@ -149,15 +171,66 @@ namespace mongo {
|| (e.type() == Object && !e.embeddedObject()["$regex"].eoo())
)
{
- const string r = simpleRegex(e);
- if ( r.size() ) {
- lower = addObj( BSON( "" << r ) ).firstElement();
- upper = addObj( BSON( "" << simpleRegexEnd( r ) ) ).firstElement();
- upperInclusive = false;
- }
+ if ( !isNot ) { // no optimization for negated regex - we could consider creating 2 intervals comprising all nonmatching prefixes
+ const string r = simpleRegex(e);
+ if ( r.size() ) {
+ lower = addObj( BSON( "" << r ) ).firstElement();
+ upper = addObj( BSON( "" << simpleRegexEnd( r ) ) ).firstElement();
+ upperInclusive = false;
+ } else {
+ BSONObjBuilder b1(32), b2(32);
+ b1.appendMinForType( "" , String );
+ lower = addObj( b1.obj() ).firstElement();
+
+ b2.appendMaxForType( "" , String );
+ upper = addObj( b2.obj() ).firstElement();
+ upperInclusive = false; //MaxForType String is an empty Object
+ }
+
+ // regex matches self - regex type > string type
+ if (e.type() == RegEx){
+ BSONElement re = addObj( BSON( "" << e ) ).firstElement();
+ intervals_.push_back( FieldInterval(re) );
+ } else {
+ BSONObj orig = e.embeddedObject();
+ BSONObjBuilder b;
+ b.appendRegex("", orig["$regex"].valuestrsafe(), orig["$options"].valuestrsafe());
+ BSONElement re = addObj( b.obj() ).firstElement();
+ intervals_.push_back( FieldInterval(re) );
+ }
+
+ }
return;
}
- switch( e.getGtLtOp() ) {
+ int op = e.getGtLtOp();
+ if ( isNot ) {
+ switch( op ) {
+ case BSONObj::Equality:
+ case BSONObj::opALL:
+ case BSONObj::opMOD: // NOTE for mod and type, we could consider having 1-2 intervals comprising the complementary types (multiple intervals already possible with $in)
+ case BSONObj::opTYPE:
+ op = BSONObj::NE; // no bound calculation
+ break;
+ case BSONObj::NE:
+ op = BSONObj::Equality;
+ break;
+ case BSONObj::LT:
+ op = BSONObj::GTE;
+ break;
+ case BSONObj::LTE:
+ op = BSONObj::GT;
+ break;
+ case BSONObj::GT:
+ op = BSONObj::LTE;
+ break;
+ case BSONObj::GTE:
+ op = BSONObj::LT;
+ break;
+ default: // otherwise doesn't matter
+ break;
+ }
+ }
+ switch( op ) {
case BSONObj::Equality:
lower = upper = e;
break;
@@ -174,8 +247,32 @@ namespace mongo {
case BSONObj::opALL: {
massert( 10370 , "$all requires array", e.type() == Array );
BSONObjIterator i( e.embeddedObject() );
- if ( i.more() )
- lower = upper = i.next();
+ bool bound = false;
+ while ( i.more() ){
+ BSONElement x = i.next();
+ if ( x.type() == Object && x.embeddedObject().firstElement().getGtLtOp() == BSONObj::opELEM_MATCH ){
+ // taken care of elsewhere
+ }
+ else if ( x.type() != RegEx ) {
+ lower = upper = x;
+ bound = true;
+ break;
+ }
+ }
+ if ( !bound ) { // if no good non regex bound found, try regex bounds
+ BSONObjIterator i( e.embeddedObject() );
+ while( i.more() ) {
+ BSONElement x = i.next();
+ if ( x.type() != RegEx )
+ continue;
+ string simple = simpleRegex( x.regex(), x.regexFlags() );
+ if ( !simple.empty() ) {
+ lower = addObj( BSON( "" << simple ) ).firstElement();
+ upper = addObj( BSON( "" << simpleRegexEnd( simple ) ) ).firstElement();
+ break;
+ }
+ }
+ }
break;
}
case BSONObj::opMOD: {
@@ -206,10 +303,18 @@ namespace mongo {
break;
}
+ case BSONObj::opREGEX:
+ case BSONObj::opOPTIONS:
+ // do nothing
+ break;
case BSONObj::opELEM_MATCH: {
log() << "warning: shouldn't get here?" << endl;
break;
}
+ case BSONObj::opNEAR:
+ case BSONObj::opWITHIN:
+ _special = "2d";
+ break;
default:
break;
}
@@ -269,19 +374,118 @@ namespace mongo {
intervals_ = newIntervals;
for( vector< BSONObj >::const_iterator i = other.objData_.begin(); i != other.objData_.end(); ++i )
objData_.push_back( *i );
+ if ( _special.size() == 0 && other._special.size() )
+ _special = other._special;
return *this;
}
- string FieldRange::simpleRegexEnd( string regex ) {
- ++regex[ regex.length() - 1 ];
- return regex;
- }
+ void handleInterval( const FieldInterval &lower, FieldBound &low, FieldBound &high, vector< FieldInterval > &newIntervals ) {
+ if ( low.bound_.eoo() ) {
+ low = lower.lower_; high = lower.upper_;
+ } else {
+ if ( high.bound_.woCompare( lower.lower_.bound_, false ) < 0 ) { // when equal but neither inclusive, just assume they overlap, since current btree scanning code just as efficient either way
+ FieldInterval tmp;
+ tmp.lower_ = low;
+ tmp.upper_ = high;
+ newIntervals.push_back( tmp );
+ low = lower.lower_; high = lower.upper_;
+ } else {
+ high = lower.upper_;
+ }
+ }
+ }
+
+ const FieldRange &FieldRange::operator|=( const FieldRange &other ) {
+ vector< FieldInterval > newIntervals;
+ FieldBound low;
+ FieldBound high;
+ vector< FieldInterval >::const_iterator i = intervals_.begin();
+ vector< FieldInterval >::const_iterator j = other.intervals_.begin();
+ while( i != intervals_.end() && j != other.intervals_.end() ) {
+ int cmp = i->lower_.bound_.woCompare( j->lower_.bound_, false );
+ if ( ( cmp == 0 && i->lower_.inclusive_ ) || cmp < 0 ) {
+ handleInterval( *i, low, high, newIntervals );
+ ++i;
+ } else {
+ handleInterval( *j, low, high, newIntervals );
+ ++j;
+ }
+ }
+ while( i != intervals_.end() ) {
+ handleInterval( *i, low, high, newIntervals );
+ ++i;
+ }
+ while( j != other.intervals_.end() ) {
+ handleInterval( *j, low, high, newIntervals );
+ ++j;
+ }
+ FieldInterval tmp;
+ tmp.lower_ = low;
+ tmp.upper_ = high;
+ newIntervals.push_back( tmp );
+ intervals_ = newIntervals;
+ for( vector< BSONObj >::const_iterator i = other.objData_.begin(); i != other.objData_.end(); ++i )
+ objData_.push_back( *i );
+ if ( _special.size() == 0 && other._special.size() )
+ _special = other._special;
+ return *this;
+ }
BSONObj FieldRange::addObj( const BSONObj &o ) {
objData_.push_back( o );
return o;
}
+ string FieldRangeSet::getSpecial() const {
+ string s = "";
+ for ( map<string,FieldRange>::iterator i=ranges_.begin(); i!=ranges_.end(); i++ ){
+ if ( i->second.getSpecial().size() == 0 )
+ continue;
+ uassert( 13033 , "can't have 2 special fields" , s.size() == 0 );
+ s = i->second.getSpecial();
+ }
+ return s;
+ }
+
+ void FieldRangeSet::processOpElement( const char *fieldName, const BSONElement &f, bool isNot, bool optimize ) {
+ BSONElement g = f;
+ int op2 = g.getGtLtOp();
+ if ( op2 == BSONObj::opALL ) {
+ BSONElement h = g;
+ massert( 13050 , "$all requires array", h.type() == Array );
+ BSONObjIterator i( h.embeddedObject() );
+ if( i.more() ) {
+ BSONElement x = i.next();
+ if ( x.type() == Object && x.embeddedObject().firstElement().getGtLtOp() == BSONObj::opELEM_MATCH ) {
+ g = x.embeddedObject().firstElement();
+ op2 = g.getGtLtOp();
+ }
+ }
+ }
+ if ( op2 == BSONObj::opELEM_MATCH ) {
+ BSONObjIterator k( g.embeddedObjectUserCheck() );
+ while ( k.more() ){
+ BSONElement h = k.next();
+ StringBuilder buf(32);
+ buf << fieldName << "." << h.fieldName();
+ string fullname = buf.str();
+
+ int op3 = getGtLtOp( h );
+ if ( op3 == BSONObj::Equality ){
+ ranges_[ fullname ] &= FieldRange( h , isNot , optimize );
+ }
+ else {
+ BSONObjIterator l( h.embeddedObject() );
+ while ( l.more() ){
+ ranges_[ fullname ] &= FieldRange( l.next() , isNot , optimize );
+ }
+ }
+ }
+ } else {
+ ranges_[ fieldName ] &= FieldRange( f , isNot , optimize );
+ }
+ }
+
FieldRangeSet::FieldRangeSet( const char *ns, const BSONObj &query , bool optimize )
: ns_( ns ), query_( query.getOwned() ) {
BSONObjIterator i( query_ );
@@ -293,36 +497,38 @@ namespace mongo {
if ( strcmp( e.fieldName(), "$where" ) == 0 )
continue;
- int op = getGtLtOp( e );
+ bool equality = ( getGtLtOp( e ) == BSONObj::Equality );
+ if ( equality && e.type() == Object ) {
+ equality = ( strcmp( e.embeddedObject().firstElement().fieldName(), "$not" ) != 0 );
+ }
- if ( op == BSONObj::Equality || op == BSONObj::opREGEX || op == BSONObj::opOPTIONS ) {
- ranges_[ e.fieldName() ] &= FieldRange( e , optimize );
- }
- else if ( op == BSONObj::opELEM_MATCH ){
- BSONObjIterator i( e.embeddedObjectUserCheck().firstElement().embeddedObjectUserCheck() );
- while ( i.more() ){
- BSONElement f = i.next();
- StringBuilder buf(32);
- buf << e.fieldName() << "." << f.fieldName();
- string fullname = buf.str();
-
- int op2 = getGtLtOp( f );
- if ( op2 == BSONObj::Equality ){
- ranges_[ fullname ] &= FieldRange( f , optimize );
- }
- else {
- BSONObjIterator j( f.embeddedObject() );
- while ( j.more() ){
- ranges_[ fullname ] &= FieldRange( j.next() , optimize );
+ if ( equality || ( e.type() == Object && !e.embeddedObject()[ "$regex" ].eoo() ) ) {
+ ranges_[ e.fieldName() ] &= FieldRange( e , false , optimize );
+ }
+ if ( !equality ) {
+ BSONObjIterator j( e.embeddedObject() );
+ while( j.more() ) {
+ BSONElement f = j.next();
+ if ( strcmp( f.fieldName(), "$not" ) == 0 ) {
+ switch( f.type() ) {
+ case Object: {
+ BSONObjIterator k( f.embeddedObject() );
+ while( k.more() ) {
+ BSONElement g = k.next();
+ uassert( 13034, "invalid use of $not", g.getGtLtOp() != BSONObj::Equality );
+ processOpElement( e.fieldName(), g, true, optimize );
+ }
+ break;
+ }
+ case RegEx:
+ processOpElement( e.fieldName(), f, true, optimize );
+ break;
+ default:
+ uassert( 13041, "invalid use of $not", false );
}
+ } else {
+ processOpElement( e.fieldName(), f, false, optimize );
}
- }
- }
- else {
- BSONObjIterator i( e.embeddedObject() );
- while( i.more() ) {
- BSONElement f = i.next();
- ranges_[ e.fieldName() ] &= FieldRange( f , optimize );
}
}
}
@@ -445,8 +651,8 @@ namespace mongo {
///////////////////
void FieldMatcher::add( const BSONObj& o ){
- massert( 10371 , "can only add to FieldMatcher once", source_.isEmpty());
- source_ = o;
+ massert( 10371 , "can only add to FieldMatcher once", _source.isEmpty());
+ _source = o;
BSONObjIterator i( o );
int true_false = -1;
@@ -457,23 +663,24 @@ namespace mongo {
// validate input
if (true_false == -1){
true_false = e.trueValue();
- include_ = !e.trueValue();
- }else{
- if((bool) true_false != e.trueValue())
- errmsg = "You cannot currently mix including and excluding fields. Contact us if this is an issue.";
+ _include = !e.trueValue();
+ }
+ else{
+ uassert( 10053 , "You cannot currently mix including and excluding fields. Contact us if this is an issue." ,
+ (bool)true_false == e.trueValue() );
}
}
}
void FieldMatcher::add(const string& field, bool include){
if (field.empty()){ // this is the field the user referred to
- include_ = include;
+ _include = include;
} else {
const size_t dot = field.find('.');
const string subfield = field.substr(0,dot);
const string rest = (dot == string::npos ? "" : field.substr(dot+1,string::npos));
- boost::shared_ptr<FieldMatcher>& fm = fields_[subfield];
+ boost::shared_ptr<FieldMatcher>& fm = _fields[subfield];
if (!fm)
fm.reset(new FieldMatcher(!include));
@@ -482,7 +689,7 @@ namespace mongo {
}
BSONObj FieldMatcher::getSpec() const{
- return source_;
+ return _source;
}
//b will be the value part of an array-typed BSONElement
@@ -509,7 +716,7 @@ namespace mongo {
break;
}
default:
- if (include_)
+ if (_include)
b.appendAs(e, b.numStr(i++).c_str());
}
@@ -518,18 +725,20 @@ namespace mongo {
}
void FieldMatcher::append( BSONObjBuilder& b , const BSONElement& e ) const {
- FieldMap::const_iterator field = fields_.find( e.fieldName() );
+ FieldMap::const_iterator field = _fields.find( e.fieldName() );
- if (field == fields_.end()){
- if (include_)
+ if (field == _fields.end()){
+ if (_include)
b.append(e);
- } else {
+ }
+ else {
FieldMatcher& subfm = *field->second;
-
- if (subfm.fields_.empty() || !(e.type()==Object || e.type()==Array) ){
- if (subfm.include_)
+
+ if (subfm._fields.empty() || !(e.type()==Object || e.type()==Array) ){
+ if (subfm._include)
b.append(e);
- } else if (e.type() == Object){
+ }
+ else if (e.type() == Object){
BSONObjBuilder subb;
BSONObjIterator it(e.embeddedObject());
while (it.more()){
@@ -537,7 +746,8 @@ namespace mongo {
}
b.append(e.fieldName(), subb.obj());
- } else { //Array
+ }
+ else { //Array
BSONObjBuilder subb;
subfm.appendArray(subb, e.embeddedObject());
b.appendArray(e.fieldName(), subb.obj());